diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index ed0f411d780cf7..1c06634a87b9a1 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1096,9 +1096,13 @@ DEFINE_mInt64(workload_group_scan_task_wait_timeout_ms, "10000"); // Whether use schema dict in backend side instead of MetaService side(cloud mode) DEFINE_mBool(variant_use_cloud_schema_dict_cache, "true"); DEFINE_mInt64(variant_threshold_rows_to_estimate_sparse_column, "2048"); +DEFINE_mInt32(variant_max_json_key_length, "255"); DEFINE_mBool(variant_throw_exeception_on_invalid_json, "false"); DEFINE_mBool(enable_vertical_compact_variant_subcolumns, "true"); +DEFINE_Validator(variant_max_json_key_length, + [](const int config) -> bool { return config > 0 && config <= 65535; }); + // block file cache DEFINE_Bool(enable_file_cache, "false"); // format: [{"path":"/path/to/file_cache","total_size":21474836480,"query_limit":10737418240}] diff --git a/be/src/common/config.h b/be/src/common/config.h index 194d41e51e118f..7b90ce5e8dd148 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1335,6 +1335,8 @@ DECLARE_mBool(variant_use_cloud_schema_dict_cache); // Threshold to estimate a column is sparsed // Notice: TEST ONLY DECLARE_mInt64(variant_threshold_rows_to_estimate_sparse_column); +// Max json key length in bytes when parsing json into variant subcolumns/jsonb. +DECLARE_mInt32(variant_max_json_key_length); // Treat invalid json format str as string, instead of throwing exception if false DECLARE_mBool(variant_throw_exeception_on_invalid_json); // Enable vertical compact subcolumns of variant column diff --git a/be/src/vec/json/json_parser.cpp b/be/src/vec/json/json_parser.cpp index e4ad2bfe7c560d..eb4d6c5e2b5fee 100644 --- a/be/src/vec/json/json_parser.cpp +++ b/be/src/vec/json/json_parser.cpp @@ -93,9 +93,12 @@ void JSONDataParser::traverseObject(const JSONObject& object, ParseC ctx.values.reserve(ctx.values.size() + object.size()); for (auto it = object.begin(); it != object.end(); ++it) { const auto& [key, value] = *it; - if (key.size() >= std::numeric_limits::max()) { - throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT, - "Key length exceeds maximum allowed size of 255 bytes."); + const size_t max_key_length = cast_set(config::variant_max_json_key_length); + if (key.size() > max_key_length) { + throw doris::Exception( + doris::ErrorCode::INVALID_ARGUMENT, + fmt::format("Key length exceeds maximum allowed size of {} bytes.", + max_key_length)); } ctx.builder.append(key, false); traverse(value, ctx); @@ -133,9 +136,12 @@ void JSONDataParser::traverseObjectAsJsonb(const JSONObject& object, writer.writeStartObject(); for (auto it = object.begin(); it != object.end(); ++it) { const auto& [key, value] = *it; - if (key.size() >= std::numeric_limits::max()) { - throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT, - "Key length exceeds maximum allowed size of 255 bytes."); + const size_t max_key_length = cast_set(config::variant_max_json_key_length); + if (key.size() > max_key_length) { + throw doris::Exception( + doris::ErrorCode::INVALID_ARGUMENT, + fmt::format("Key length exceeds maximum allowed size of {} bytes.", + max_key_length)); } writer.writeKey(key.data(), cast_set(key.size())); traverseAsJsonb(value, writer); diff --git a/be/test/vec/jsonb/json_parser_test.cpp b/be/test/vec/jsonb/json_parser_test.cpp index b878cc165f9fc3..e4790f6786c16a 100644 --- a/be/test/vec/jsonb/json_parser_test.cpp +++ b/be/test/vec/jsonb/json_parser_test.cpp @@ -21,6 +21,7 @@ #include +#include "common/config.h" #include "vec/common/string_ref.h" using doris::vectorized::JSONDataParser; @@ -430,4 +431,46 @@ TEST(JsonParserTest, ParseUInt64) { EXPECT_EQ(array_field_2.size(), 1); EXPECT_EQ(array_field_2[0].get_type(), doris::PrimitiveType::TYPE_LARGEINT); EXPECT_EQ(array_field_2[0].get(), 18446744073709551615ULL); -} \ No newline at end of file +} + +TEST(JsonParserTest, KeyLengthLimitByConfig) { + struct ScopedMaxJsonKeyLength { + int32_t old_value; + explicit ScopedMaxJsonKeyLength(int32_t new_value) + : old_value(doris::config::variant_max_json_key_length) { + doris::config::variant_max_json_key_length = new_value; + } + ~ScopedMaxJsonKeyLength() { doris::config::variant_max_json_key_length = old_value; } + }; + + JSONDataParser parser; + ParseConfig config; + + { + ScopedMaxJsonKeyLength guard(10); + std::string key11(11, 'a'); + + std::string obj_json = "{\"" + key11 + "\": 1}"; + EXPECT_ANY_THROW(parser.parse(obj_json.c_str(), obj_json.size(), config)); + + config.enable_flatten_nested = false; + std::string jsonb_json = "{\"a\": [{\"" + key11 + "\": 1}]}"; + EXPECT_ANY_THROW(parser.parse(jsonb_json.c_str(), jsonb_json.size(), config)); + } + + { + ScopedMaxJsonKeyLength guard(255); + std::string key255(255, 'b'); + + std::string obj_json = "{\"" + key255 + "\": 1}"; + auto result = parser.parse(obj_json.c_str(), obj_json.size(), config); + ASSERT_TRUE(result.has_value()); + + config.enable_flatten_nested = false; + std::string jsonb_json = "{\"a\": [{\"" + key255 + "\": 1}]}"; + result = parser.parse(jsonb_json.c_str(), jsonb_json.size(), config); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(result->values.size(), 1); + EXPECT_EQ(result->values[0].get_type(), doris::PrimitiveType::TYPE_JSONB); + } +}