StoneDB/sql/json_binary.cc

1406 lines
43 KiB
C++

/* Copyright (c) 2015, 2021, Oracle and/or its affiliates.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2.0,
as published by the Free Software Foundation.
This program is also distributed with certain software (including
but not limited to OpenSSL) that is licensed under separate terms,
as designated in a particular file or component or in included license
documentation. The authors of MySQL hereby grant you an additional
permission to link the program and your derivative works with the
separately licensed software that they have included with MySQL.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License, version 2.0, for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
#include "json_binary.h"
#include "json_dom.h"
#include "mysqld.h" // key_memory_JSON
#include "sql_class.h" // THD
#include "template_utils.h" // down_cast
#include <algorithm> // std::min
#define JSONB_TYPE_SMALL_OBJECT 0x0
#define JSONB_TYPE_LARGE_OBJECT 0x1
#define JSONB_TYPE_SMALL_ARRAY 0x2
#define JSONB_TYPE_LARGE_ARRAY 0x3
#define JSONB_TYPE_LITERAL 0x4
#define JSONB_TYPE_INT16 0x5
#define JSONB_TYPE_UINT16 0x6
#define JSONB_TYPE_INT32 0x7
#define JSONB_TYPE_UINT32 0x8
#define JSONB_TYPE_INT64 0x9
#define JSONB_TYPE_UINT64 0xA
#define JSONB_TYPE_DOUBLE 0xB
#define JSONB_TYPE_STRING 0xC
#define JSONB_TYPE_OPAQUE 0xF
#define JSONB_NULL_LITERAL '\x00'
#define JSONB_TRUE_LITERAL '\x01'
#define JSONB_FALSE_LITERAL '\x02'
/*
The size of offset or size fields in the small and the large storage
format for JSON objects and JSON arrays.
*/
#define SMALL_OFFSET_SIZE 2
#define LARGE_OFFSET_SIZE 4
/*
The size of key entries for objects when using the small storage
format or the large storage format. In the small format it is 4
bytes (2 bytes for key length and 2 bytes for key offset). In the
large format it is 6 (2 bytes for length, 4 bytes for offset).
*/
#define KEY_ENTRY_SIZE_SMALL (2 + SMALL_OFFSET_SIZE)
#define KEY_ENTRY_SIZE_LARGE (2 + LARGE_OFFSET_SIZE)
/*
The size of value entries for objects or arrays. When using the
small storage format, the entry size is 3 (1 byte for type, 2 bytes
for offset). When using the large storage format, it is 5 (1 byte
for type, 4 bytes for offset).
*/
#define VALUE_ENTRY_SIZE_SMALL (1 + SMALL_OFFSET_SIZE)
#define VALUE_ENTRY_SIZE_LARGE (1 + LARGE_OFFSET_SIZE)
namespace json_binary
{
/// Status codes for JSON serialization.
enum enum_serialization_result
{
/**
Success. The JSON value was successfully serialized.
*/
OK,
/**
The JSON value was too big to be serialized. If this status code
is returned, and the small storage format is in use, the caller
should retry the serialization with the large storage format. If
this status code is returned, and the large format is in use,
my_error() will already have been called.
*/
VALUE_TOO_BIG,
/**
Some other error occurred. my_error() will have been called with
more specific information about the failure.
*/
FAILURE
};
static enum_serialization_result
serialize_json_value(const Json_dom *dom, size_t type_pos, String *dest,
size_t depth, bool small_parent);
bool serialize(const Json_dom *dom, String *dest)
{
// Reset the destination buffer.
dest->length(0);
dest->set_charset(&my_charset_bin);
// Reserve space (one byte) for the type identifier.
if (dest->append('\0'))
return true; /* purecov: inspected */
return serialize_json_value(dom, 0, dest, 0, false) != OK;
}
/** Encode a 16-bit int at the end of the destination string. */
static bool append_int16(String *dest, int16 value)
{
if (dest->reserve(2))
return true; /* purecov: inspected */
int2store(const_cast<char *>(dest->ptr()) + dest->length(), value);
dest->length(dest->length() + 2);
return false;
}
/** Encode a 32-bit int at the end of the destination string. */
static bool append_int32(String *dest, int32 value)
{
if (dest->reserve(4))
return true; /* purecov: inspected */
int4store(const_cast<char *>(dest->ptr()) + dest->length(), value);
dest->length(dest->length() + 4);
return false;
}
/** Encode a 64-bit int at the end of the destination string. */
static bool append_int64(String *dest, int64 value)
{
if (dest->reserve(8))
return true; /* purecov: inspected */
int8store(const_cast<char *>(dest->ptr()) + dest->length(), value);
dest->length(dest->length() + 8);
return false;
}
/**
Append an offset or a size to a String.
@param dest the destination String
@param offset_or_size the offset or size to append
@param large if true, use the large storage format (4 bytes);
otherwise, use the small storage format (2 bytes)
@return false if successfully appended, true otherwise
*/
static bool append_offset_or_size(String *dest, size_t offset_or_size,
bool large)
{
if (large)
return append_int32(dest, static_cast<int32>(offset_or_size));
else
return append_int16(dest, static_cast<int16>(offset_or_size));
}
/**
Insert an offset or a size at the specified position in a String. It
is assumed that the String has already allocated enough space to
hold the value.
@param dest the destination String
@param pos the position in the String
@param offset_or_size the offset or size to append
@param large if true, use the large storage format (4 bytes);
otherwise, use the small storage format (2 bytes)
*/
static void insert_offset_or_size(String *dest, size_t pos,
size_t offset_or_size, bool large)
{
char *to= const_cast<char*>(dest->ptr()) + pos;
if (large)
{
assert(pos + LARGE_OFFSET_SIZE <= dest->alloced_length());
int4store(to, static_cast<uint32>(offset_or_size));
}
else
{
assert(pos + SMALL_OFFSET_SIZE <= dest->alloced_length());
int2store(to, static_cast<uint16>(offset_or_size));
}
}
/**
Append a length to a String. The number of bytes used to store the length
uses a variable number of bytes depending on how large the length is. If the
highest bit in a byte is 1, then the length is continued on the next byte.
The least significant bits are stored in the first byte.
@param dest the destination String
@param length the length to write
@return false on success, true on error
*/
static bool append_variable_length(String *dest, size_t length)
{
do
{
// Filter out the seven least significant bits of length.
uchar ch= (length & 0x7F);
/*
Right-shift length to drop the seven least significant bits. If there
is more data in length, set the high bit of the byte we're writing
to the String.
*/
length>>= 7;
if (length != 0)
ch|= 0x80;
if (dest->append(ch))
return true; /* purecov: inspected */
}
while (length != 0);
// Successfully appended the length.
return false;
}
/**
Read a variable length written by append_variable_length().
@param[in] data the buffer to read from
@param[in] data_length the maximum number of bytes to read from data
@param[out] length the length that was read
@param[out] num the number of bytes needed to represent the length
@return false on success, true on error
*/
static bool read_variable_length(const char *data, size_t data_length,
size_t *length, size_t *num)
{
/*
It takes five bytes to represent UINT_MAX32, which is the largest
supported length, so don't look any further.
*/
const size_t max_bytes= std::min(data_length, static_cast<size_t>(5));
size_t len= 0;
for (size_t i= 0; i < max_bytes; i++)
{
// Get the next 7 bits of the length.
len|= (data[i] & 0x7f) << (7 * i);
if ((data[i] & 0x80) == 0)
{
// The length shouldn't exceed 32 bits.
if (len > UINT_MAX32)
return true; /* purecov: inspected */
// This was the last byte. Return successfully.
*num= i + 1;
*length= len;
return false;
}
}
// No more available bytes. Return true to signal error.
return true; /* purecov: inspected */
}
/**
Check if the specified offset or size is too big to store in the
binary JSON format.
If the small storage format is used, the caller is expected to retry
serialization in the large storage format, so no error is generated
if the offset or size is too big. If the large storage format is
used, an error will be generated if the offset or size is too big.
@param offset_or_size the offset or size to check
@param large if true, we are using the large storage format
for JSON arrays and objects, which allows offsets and sizes that
fit in a uint32; otherwise, we are using the small storage format,
which allow offsets and sizes that fit in a uint16.
@return true if offset_or_size is too big for the format, false
otherwise
*/
static bool is_too_big_for_json(size_t offset_or_size, bool large)
{
if (offset_or_size > UINT_MAX16)
{
if (!large)
return true;
if (offset_or_size > UINT_MAX32)
{
/* purecov: begin inspected */
my_error(ER_JSON_VALUE_TOO_BIG, MYF(0));
return true;
/* purecov: end */
}
}
return false;
}
/**
Check if a value is small enough to be inlined in the value entry at the
beginning of an object or an array.
@param[in] value the JSON value
@param[in] large true if the large storage format is used
@param[out] inlined_val the numeric value to inline
@param[out] inlined_type the type of the inlined value
@return true if the value should be inlined, false otherwise
*/
static bool should_inline_value(const Json_dom *value, bool large,
int32 *inlined_val, uint8 *inlined_type)
{
switch (value->json_type())
{
case Json_dom::J_NULL:
*inlined_val= JSONB_NULL_LITERAL;
*inlined_type= JSONB_TYPE_LITERAL;
return true;
case Json_dom::J_BOOLEAN:
*inlined_val= (down_cast<const Json_boolean*>(value)->value()) ?
JSONB_TRUE_LITERAL : JSONB_FALSE_LITERAL;
*inlined_type= JSONB_TYPE_LITERAL;
return true;
case Json_dom::J_INT:
{
const Json_int *i= down_cast<const Json_int*>(value);
if (i->is_16bit() || (large && i->is_32bit()))
{
*inlined_val= static_cast<int32>(i->value());
*inlined_type= i->is_16bit() ? JSONB_TYPE_INT16 : JSONB_TYPE_INT32;
return true;
}
return false;
}
case Json_dom::J_UINT:
{
const Json_uint *i= down_cast<const Json_uint*>(value);
if (i->is_16bit() || (large && i->is_32bit()))
{
*inlined_val= static_cast<int32>(i->value());
*inlined_type= i->is_16bit() ? JSONB_TYPE_UINT16 : JSONB_TYPE_UINT32;
return true;
}
return false;
}
default:
return false;
}
}
/**
Append a Json_dom value to the end of the destination buffer, and go
back and update the value entry at the beginning of the parent array or
object.
@param dest the destination buffer
@param value the value to append
@param start_pos the position in the destination buffer where the
parent array or object starts
@param entry_pos the position in the destination buffer where the
entry for the appended value is located
@param large if true, the value is appended to a large array or object;
otherwise, it is appended to a small array or object
@param depth the current nesting level
@return serialization status
*/
static enum_serialization_result
append_value(String *dest, const Json_dom *value, size_t start_pos,
size_t entry_pos, bool large, size_t depth)
{
if (depth >= JSON_DOCUMENT_MAX_DEPTH)
{
my_error(ER_JSON_DOCUMENT_TOO_DEEP, MYF(0));
return FAILURE;
}
uint8 element_type;
int32 inlined_value;
if (should_inline_value(value, large, &inlined_value, &element_type))
{
(*dest)[entry_pos]= element_type;
insert_offset_or_size(dest, entry_pos + 1, inlined_value, large);
return OK;
}
size_t offset= dest->length() - start_pos;
if (is_too_big_for_json(offset, large))
return VALUE_TOO_BIG;
insert_offset_or_size(dest, entry_pos + 1, offset, large);
return serialize_json_value(value, entry_pos, dest, depth, !large);
}
/**
Serialize a JSON array at the end of the destination string.
@param array the JSON array to serialize
@param dest the destination string
@param large if true, the large storage format will be used
@param depth the current nesting level
@return serialization status
*/
static enum_serialization_result
serialize_json_array(const Json_array *array, String *dest, bool large,
size_t depth)
{
const size_t start_pos= dest->length();
const size_t size= array->size();
if (is_too_big_for_json(size, large))
return VALUE_TOO_BIG;
// First write the number of elements in the array.
if (append_offset_or_size(dest, size, large))
return FAILURE; /* purecov: inspected */
// Reserve space for the size of the array in bytes. To be filled in later.
const size_t size_pos= dest->length();
if (append_offset_or_size(dest, 0, large))
return FAILURE; /* purecov: inspected */
size_t entry_pos= dest->length();
// Reserve space for the value entries at the beginning of the array.
const size_t entry_size=
large ? VALUE_ENTRY_SIZE_LARGE : VALUE_ENTRY_SIZE_SMALL;
if (dest->fill(dest->length() + size * entry_size, 0))
return FAILURE; /* purecov: inspected */
for (uint32 i= 0; i < size; i++)
{
const Json_dom *elt= (*array)[i];
enum_serialization_result res= append_value(dest, elt, start_pos,
entry_pos, large, depth + 1);
if (res != OK)
return res;
entry_pos+= entry_size;
}
// Finally, write the size of the object in bytes.
size_t bytes= dest->length() - start_pos;
if (is_too_big_for_json(bytes, large))
return VALUE_TOO_BIG; /* purecov: inspected */
insert_offset_or_size(dest, size_pos, bytes, large);
return OK;
}
/**
Serialize a JSON object at the end of the destination string.
@param object the JSON object to serialize
@param dest the destination string
@param large if true, the large storage format will be used
@param depth the current nesting level
@return serialization status
*/
static enum_serialization_result
serialize_json_object(const Json_object *object, String *dest, bool large,
size_t depth)
{
const size_t start_pos= dest->length();
const size_t size= object->cardinality();
if (is_too_big_for_json(size, large))
return VALUE_TOO_BIG; /* purecov: inspected */
// First write the number of members in the object.
if (append_offset_or_size(dest, size, large))
return FAILURE; /* purecov: inspected */
// Reserve space for the size of the object in bytes. To be filled in later.
const size_t size_pos= dest->length();
if (append_offset_or_size(dest, 0, large))
return FAILURE; /* purecov: inspected */
const size_t key_entry_size=
large ? KEY_ENTRY_SIZE_LARGE : KEY_ENTRY_SIZE_SMALL;
const size_t value_entry_size=
large ? VALUE_ENTRY_SIZE_LARGE : VALUE_ENTRY_SIZE_SMALL;
/*
Calculate the offset of the first key relative to the start of the
object. The first key comes right after the value entries.
*/
size_t offset= dest->length() +
size * (key_entry_size + value_entry_size) - start_pos;
#ifndef NDEBUG
const std::string *prev_key= NULL;
#endif
// Add the key entries.
for (Json_object::const_iterator it= object->begin();
it != object->end(); ++it)
{
const std::string *key= &it->first;
size_t len= key->length();
#ifndef NDEBUG
// Check that the DOM returns the keys in the correct order.
if (prev_key)
{
assert(prev_key->length() <= len);
if (len == prev_key->length())
assert(memcmp(prev_key->data(), key->data(), len) < 0);
}
prev_key= key;
#endif
// We only have two bytes for the key size. Check if the key is too big.
if (len > UINT_MAX16)
{
my_error(ER_JSON_KEY_TOO_BIG, MYF(0));
return FAILURE;
}
if (is_too_big_for_json(offset, large))
return VALUE_TOO_BIG; /* purecov: inspected */
if (append_offset_or_size(dest, offset, large) ||
append_int16(dest, static_cast<int16>(len)))
return FAILURE; /* purecov: inspected */
offset+= len;
}
const size_t start_of_value_entries= dest->length();
// Reserve space for the value entries. Will be filled in later.
dest->fill(dest->length() + size * value_entry_size, 0);
// Add the actual keys.
for (Json_object::const_iterator it= object->begin(); it != object->end();
++it)
{
if (dest->append(it->first.c_str(), it->first.length()))
return FAILURE; /* purecov: inspected */
}
// Add the values, and update the value entries accordingly.
size_t entry_pos= start_of_value_entries;
for (Json_object::const_iterator it= object->begin(); it != object->end();
++it)
{
enum_serialization_result res= append_value(dest, it->second,
start_pos, entry_pos, large,
depth + 1);
if (res != OK)
return res;
entry_pos+= value_entry_size;
}
// Finally, write the size of the object in bytes.
size_t bytes= dest->length() - start_pos;
if (is_too_big_for_json(bytes, large))
return VALUE_TOO_BIG;
insert_offset_or_size(dest, size_pos, bytes, large);
return OK;
}
/**
Serialize a JSON value at the end of the destination string.
Also go back and update the type specifier for the value to specify
the correct type. For top-level documents, the type specifier is
located in the byte right in front of the value. For documents that
are nested within other documents, the type specifier is located in
the value entry portion at the beginning of the parent document.
@param dom the JSON value to serialize
@param type_pos the position of the type specifier to update
@param dest the destination string
@param depth the current nesting level
@param small_parent
tells if @a dom is contained in an array or object
which is stored in the small storage format
@return serialization status
*/
static enum_serialization_result
serialize_json_value(const Json_dom *dom, size_t type_pos, String *dest,
size_t depth, bool small_parent)
{
const size_t start_pos= dest->length();
assert(type_pos < start_pos);
enum_serialization_result result;
switch (dom->json_type())
{
case Json_dom::J_ARRAY:
{
const Json_array *array= down_cast<const Json_array*>(dom);
(*dest)[type_pos]= JSONB_TYPE_SMALL_ARRAY;
result= serialize_json_array(array, dest, false, depth);
/*
If the array was too large to fit in the small storage format,
reset the destination buffer and retry with the large storage
format.
Possible future optimization: Analyze size up front and pick the
correct format on the first attempt, so that we don't have to
redo parts of the serialization.
*/
if (result == VALUE_TOO_BIG)
{
// If the parent uses the small storage format, it needs to grow too.
if (small_parent)
return VALUE_TOO_BIG;
dest->length(start_pos);
(*dest)[type_pos]= JSONB_TYPE_LARGE_ARRAY;
result= serialize_json_array(array, dest, true, depth);
}
break;
}
case Json_dom::J_OBJECT:
{
const Json_object *object= down_cast<const Json_object*>(dom);
(*dest)[type_pos]= JSONB_TYPE_SMALL_OBJECT;
result= serialize_json_object(object, dest, false, depth);
/*
If the object was too large to fit in the small storage format,
reset the destination buffer and retry with the large storage
format.
Possible future optimization: Analyze size up front and pick the
correct format on the first attempt, so that we don't have to
redo parts of the serialization.
*/
if (result == VALUE_TOO_BIG)
{
// If the parent uses the small storage format, it needs to grow too.
if (small_parent)
return VALUE_TOO_BIG;
dest->length(start_pos);
(*dest)[type_pos]= JSONB_TYPE_LARGE_OBJECT;
result= serialize_json_object(object, dest, true, depth);
}
break;
}
case Json_dom::J_STRING:
{
const Json_string *jstr= down_cast<const Json_string*>(dom);
size_t size= jstr->size();
if (append_variable_length(dest, size) ||
dest->append(jstr->value().c_str(), size))
return FAILURE; /* purecov: inspected */
(*dest)[type_pos]= JSONB_TYPE_STRING;
result= OK;
break;
}
case Json_dom::J_INT:
{
const Json_int *i= down_cast<const Json_int*>(dom);
longlong val= i->value();
if (i->is_16bit())
{
if (append_int16(dest, static_cast<int16>(val)))
return FAILURE; /* purecov: inspected */
(*dest)[type_pos]= JSONB_TYPE_INT16;
}
else if (i->is_32bit())
{
if (append_int32(dest, static_cast<int32>(val)))
return FAILURE; /* purecov: inspected */
(*dest)[type_pos]= JSONB_TYPE_INT32;
}
else
{
if (append_int64(dest, val))
return FAILURE; /* purecov: inspected */
(*dest)[type_pos]= JSONB_TYPE_INT64;
}
result= OK;
break;
}
case Json_dom::J_UINT:
{
const Json_uint *i= down_cast<const Json_uint*>(dom);
ulonglong val= i->value();
if (i->is_16bit())
{
if (append_int16(dest, static_cast<int16>(val)))
return FAILURE; /* purecov: inspected */
(*dest)[type_pos]= JSONB_TYPE_UINT16;
}
else if (i->is_32bit())
{
if (append_int32(dest, static_cast<int32>(val)))
return FAILURE; /* purecov: inspected */
(*dest)[type_pos]= JSONB_TYPE_UINT32;
}
else
{
if (append_int64(dest, val))
return FAILURE; /* purecov: inspected */
(*dest)[type_pos]= JSONB_TYPE_UINT64;
}
result= OK;
break;
}
case Json_dom::J_DOUBLE:
{
// Store the double in a platform-independent eight-byte format.
const Json_double *d= down_cast<const Json_double*>(dom);
if (dest->reserve(8))
return FAILURE; /* purecov: inspected */
float8store(const_cast<char *>(dest->ptr()) + dest->length(), d->value());
dest->length(dest->length() + 8);
(*dest)[type_pos]= JSONB_TYPE_DOUBLE;
result= OK;
break;
}
case Json_dom::J_NULL:
if (dest->append(JSONB_NULL_LITERAL))
return FAILURE; /* purecov: inspected */
(*dest)[type_pos]= JSONB_TYPE_LITERAL;
result= OK;
break;
case Json_dom::J_BOOLEAN:
{
char c= (down_cast<const Json_boolean*>(dom)->value()) ?
JSONB_TRUE_LITERAL : JSONB_FALSE_LITERAL;
if (dest->append(c))
return FAILURE; /* purecov: inspected */
(*dest)[type_pos]= JSONB_TYPE_LITERAL;
result= OK;
break;
}
case Json_dom::J_OPAQUE:
{
const Json_opaque *o= down_cast<const Json_opaque*>(dom);
if (dest->append(static_cast<char>(o->type())) ||
append_variable_length(dest, o->size()) ||
dest->append(o->value(), o->size()))
return FAILURE; /* purecov: inspected */
(*dest)[type_pos]= JSONB_TYPE_OPAQUE;
result= OK;
break;
}
case Json_dom::J_DECIMAL:
{
// Store DECIMALs as opaque values.
const Json_decimal *jd= down_cast<const Json_decimal*>(dom);
const int bin_size= jd->binary_size();
char buf[Json_decimal::MAX_BINARY_SIZE];
if (jd->get_binary(buf))
return FAILURE; /* purecov: inspected */
Json_opaque o(MYSQL_TYPE_NEWDECIMAL, buf, bin_size);
result= serialize_json_value(&o, type_pos, dest, depth, small_parent);
break;
}
case Json_dom::J_DATETIME:
case Json_dom::J_DATE:
case Json_dom::J_TIME:
case Json_dom::J_TIMESTAMP:
{
// Store datetime as opaque values.
const Json_datetime *jdt= down_cast<const Json_datetime*>(dom);
char buf[Json_datetime::PACKED_SIZE];
jdt->to_packed(buf);
Json_opaque o(jdt->field_type(), buf, Json_datetime::PACKED_SIZE);
result= serialize_json_value(&o, type_pos, dest, depth, small_parent);
break;
}
default:
/* purecov: begin deadcode */
DBUG_ABORT();
my_error(ER_INTERNAL_ERROR, MYF(0), "JSON serialization failed");
return FAILURE;
/* purecov: end */
}
if (result == OK &&
dest->length() > current_thd->variables.max_allowed_packet)
{
my_error(ER_WARN_ALLOWED_PACKET_OVERFLOWED, MYF(0),
"json_binary::serialize",
current_thd->variables.max_allowed_packet);
return FAILURE;
}
return result;
}
// Constructor for literals and errors.
Value::Value(enum_type t)
: m_type(t), m_field_type(), m_data(), m_element_count(), m_length(),
m_int_value(), m_double_value(), m_large()
{
assert(t == LITERAL_NULL || t == LITERAL_TRUE || t == LITERAL_FALSE ||
t == ERROR);
}
// Constructor for int and uint.
Value::Value(enum_type t, int64 val)
: m_type(t), m_field_type(), m_data(), m_element_count(), m_length(),
m_int_value(val), m_double_value(), m_large()
{
assert(t == INT || t == UINT);
}
// Constructor for double.
Value::Value(double d)
: m_type(DOUBLE), m_field_type(), m_data(), m_element_count(), m_length(),
m_int_value(), m_double_value(d), m_large()
{}
// Constructor for string.
Value::Value(const char *data, size_t len)
: m_type(STRING), m_field_type(), m_data(data), m_element_count(),
m_length(len), m_int_value(), m_double_value(), m_large()
{}
// Constructor for arrays and objects.
Value::Value(enum_type t, const char *data, size_t bytes,
size_t element_count, bool large)
: m_type(t), m_field_type(), m_data(data), m_element_count(element_count),
m_length(bytes), m_int_value(), m_double_value(), m_large(large)
{
assert(t == ARRAY || t == OBJECT);
}
// Constructor for opaque values.
Value::Value(enum_field_types ft, const char *data, size_t len)
: m_type(OPAQUE), m_field_type(ft), m_data(data), m_element_count(),
m_length(len), m_int_value(), m_double_value(), m_large()
{}
bool Value::is_valid() const
{
switch (m_type)
{
case ERROR:
return false;
case ARRAY:
// Check that all the array elements are valid.
for (size_t i= 0; i < element_count(); i++)
if (!element(i).is_valid())
return false; /* purecov: inspected */
return true;
case OBJECT:
{
/*
Check that all keys and values are valid, and that the keys come
in the correct order.
*/
const char *prev_key= NULL;
size_t prev_key_len= 0;
for (size_t i= 0; i < element_count(); i++)
{
Value k= key(i);
if (!k.is_valid() || !element(i).is_valid())
return false; /* purecov: inspected */
const char *curr_key= k.get_data();
size_t curr_key_len= k.get_data_length();
if (i > 0)
{
if (prev_key_len > curr_key_len)
return false; /* purecov: inspected */
if (prev_key_len == curr_key_len &&
(memcmp(prev_key, curr_key, curr_key_len) >= 0))
return false; /* purecov: inspected */
}
prev_key= curr_key;
prev_key_len= curr_key_len;
}
return true;
}
default:
// This is a valid scalar value.
return true;
}
}
/**
Get a pointer to the beginning of the STRING or OPAQUE data
represented by this instance.
*/
const char *Value::get_data() const
{
assert(m_type == STRING || m_type == OPAQUE);
return m_data;
}
/**
Get the length in bytes of the STRING or OPAQUE value represented by
this instance.
*/
size_t Value::get_data_length() const
{
assert(m_type == STRING || m_type == OPAQUE);
return m_length;
}
/**
Get the value of an INT.
*/
int64 Value::get_int64() const
{
assert(m_type == INT);
return m_int_value;
}
/**
Get the value of a UINT.
*/
uint64 Value::get_uint64() const
{
assert(m_type == UINT);
return static_cast<uint64>(m_int_value);
}
/**
Get the value of a DOUBLE.
*/
double Value::get_double() const
{
assert(m_type == DOUBLE);
return m_double_value;
}
/**
Get the number of elements in an array, or the number of members in
an object.
*/
size_t Value::element_count() const
{
assert(m_type == ARRAY || m_type == OBJECT);
return m_element_count;
}
/**
Get the MySQL field type of an opaque value. Identifies the type of
the value stored in the data portion of an opaque value.
*/
enum_field_types Value::field_type() const
{
assert(m_type == OPAQUE);
return m_field_type;
}
/**
Create a Value object that represents an error condition.
*/
static Value err()
{
return Value(Value::ERROR);
}
/**
Parse a JSON scalar value.
@param type the binary type of the scalar
@param data pointer to the start of the binary representation of the scalar
@param len the maximum number of bytes to read from data
@return an object that represents the scalar value
*/
static Value parse_scalar(uint8 type, const char *data, size_t len)
{
switch (type)
{
case JSONB_TYPE_LITERAL:
if (len < 1)
return err(); /* purecov: inspected */
switch (static_cast<uint8>(*data))
{
case JSONB_NULL_LITERAL:
return Value(Value::LITERAL_NULL);
case JSONB_TRUE_LITERAL:
return Value(Value::LITERAL_TRUE);
case JSONB_FALSE_LITERAL:
return Value(Value::LITERAL_FALSE);
default:
return err(); /* purecov: inspected */
}
case JSONB_TYPE_INT16:
if (len < 2)
return err(); /* purecov: inspected */
return Value(Value::INT, sint2korr(data));
case JSONB_TYPE_INT32:
if (len < 4)
return err(); /* purecov: inspected */
return Value(Value::INT, sint4korr(data));
case JSONB_TYPE_INT64:
if (len < 8)
return err(); /* purecov: inspected */
return Value(Value::INT, sint8korr(data));
case JSONB_TYPE_UINT16:
if (len < 2)
return err(); /* purecov: inspected */
return Value(Value::UINT, uint2korr(data));
case JSONB_TYPE_UINT32:
if (len < 4)
return err(); /* purecov: inspected */
return Value(Value::UINT, uint4korr(data));
case JSONB_TYPE_UINT64:
if (len < 8)
return err(); /* purecov: inspected */
return Value(Value::UINT, uint8korr(data));
case JSONB_TYPE_DOUBLE:
{
if (len < 8)
return err(); /* purecov: inspected */
double d;
float8get(&d, data);
return Value(d);
}
case JSONB_TYPE_STRING:
{
size_t str_len;
size_t n;
if (read_variable_length(data, len, &str_len, &n))
return err(); /* purecov: inspected */
if (len < n + str_len)
return err(); /* purecov: inspected */
return Value(data + n, str_len);
}
case JSONB_TYPE_OPAQUE:
{
/*
There should always be at least one byte, which tells the field
type of the opaque value.
*/
if (len < 1)
return err(); /* purecov: inspected */
// The type is encoded as a uint8 that maps to an enum_field_types.
uint8 type_byte= static_cast<uint8>(*data);
enum_field_types field_type= static_cast<enum_field_types>(type_byte);
// Then there's the length of the value.
size_t val_len;
size_t n;
if (read_variable_length(data + 1, len - 1, &val_len, &n))
return err(); /* purecov: inspected */
if (len < 1 + n + val_len)
return err(); /* purecov: inspected */
return Value(field_type, data + 1 + n, val_len);
}
default:
// Not a valid scalar type.
return err();
}
}
/**
Read an offset or size field from a buffer. The offset could be either
a two byte unsigned integer or a four byte unsigned integer.
@param data the buffer to read from
@param large tells if the large or small storage format is used; true
means read four bytes, false means read two bytes
*/
static size_t read_offset_or_size(const char *data, bool large)
{
return large ? uint4korr(data) : uint2korr(data);
}
/**
Parse a JSON array or object.
@param t type (either ARRAY or OBJECT)
@param data pointer to the start of the array or object
@param len the maximum number of bytes to read from data
@param large if true, the array or object is stored using the large
storage format; otherwise, it is stored using the small
storage format
@return an object that allows access to the array or object
*/
static Value parse_array_or_object(Value::enum_type t, const char *data,
size_t len, bool large)
{
assert(t == Value::ARRAY || t == Value::OBJECT);
/*
Make sure the document is long enough to contain the two length fields
(both number of elements or members, and number of bytes).
*/
const size_t offset_size= large ? LARGE_OFFSET_SIZE : SMALL_OFFSET_SIZE;
if (len < 2 * offset_size)
return err();
const size_t element_count= read_offset_or_size(data, large);
const size_t bytes= read_offset_or_size(data + offset_size, large);
// The value can't have more bytes than what's available in the data buffer.
if (bytes > len)
return err();
/*
Calculate the size of the header. It consists of:
- two length fields
- if it is a JSON object, key entries with pointers to where the keys
are stored
- value entries with pointers to where the actual values are stored
*/
size_t header_size= 2 * offset_size;
if (t == Value::OBJECT)
header_size+= element_count *
(large ? KEY_ENTRY_SIZE_LARGE : KEY_ENTRY_SIZE_SMALL);
header_size+= element_count *
(large ? VALUE_ENTRY_SIZE_LARGE : VALUE_ENTRY_SIZE_SMALL);
// The header should not be larger than the full size of the value.
if (header_size > bytes)
return err(); /* purecov: inspected */
return Value(t, data, bytes, element_count, large);
}
/**
Parse a JSON value within a larger JSON document.
@param type the binary type of the value to parse
@param data pointer to the start of the binary representation of the value
@param len the maximum number of bytes to read from data
@return an object that allows access to the value
*/
static Value parse_value(uint8 type, const char *data, size_t len)
{
switch (type)
{
case JSONB_TYPE_SMALL_OBJECT:
return parse_array_or_object(Value::OBJECT, data, len, false);
case JSONB_TYPE_LARGE_OBJECT:
return parse_array_or_object(Value::OBJECT, data, len, true);
case JSONB_TYPE_SMALL_ARRAY:
return parse_array_or_object(Value::ARRAY, data, len, false);
case JSONB_TYPE_LARGE_ARRAY:
return parse_array_or_object(Value::ARRAY, data, len, true);
default:
return parse_scalar(type, data, len);
}
}
Value parse_binary(const char *data, size_t len)
{
// Each document should start with a one-byte type specifier.
if (len < 1)
return err(); /* purecov: inspected */
return parse_value(data[0], data + 1, len - 1);
}
/**
Get the element at the specified position of a JSON array or a JSON
object. When called on a JSON object, it returns the value
associated with the key returned by key(pos).
@param pos the index of the element
@return a value representing the specified element, or a value where
type() returns ERROR if pos does not point to an element
*/
Value Value::element(size_t pos) const
{
assert(m_type == ARRAY || m_type == OBJECT);
if (pos >= m_element_count)
return err();
/*
Value entries come after the two length fields if it's an array, or
after the two length fields and all the key entries if it's an object.
*/
size_t first_entry_offset=
2 * (m_large ? LARGE_OFFSET_SIZE : SMALL_OFFSET_SIZE);
if (type() == OBJECT)
first_entry_offset+=
m_element_count * (m_large ? KEY_ENTRY_SIZE_LARGE : KEY_ENTRY_SIZE_SMALL);
const size_t entry_size=
m_large ? VALUE_ENTRY_SIZE_LARGE : VALUE_ENTRY_SIZE_SMALL;
const size_t entry_offset= first_entry_offset + entry_size * pos;
uint8 type= m_data[entry_offset];
/*
Check if this is an inlined scalar value. If so, return it.
The scalar will be inlined just after the byte that identifies the
type, so it's found on entry_offset + 1.
*/
if (type == JSONB_TYPE_INT16 || type == JSONB_TYPE_UINT16 ||
type == JSONB_TYPE_LITERAL ||
(m_large && (type == JSONB_TYPE_INT32 || type == JSONB_TYPE_UINT32)))
return parse_scalar(type, m_data + entry_offset + 1, entry_size - 1);
/*
Otherwise, it's a non-inlined value, and the offset to where the value
is stored, can be found right after the type byte in the entry.
*/
size_t value_offset= read_offset_or_size(m_data + entry_offset + 1, m_large);
if (m_length < value_offset)
return err(); /* purecov: inspected */
return parse_value(type, m_data + value_offset, m_length - value_offset);
}
/**
Get the key of the member stored at the specified position in a JSON
object.
@param pos the index of the member
@return the key of the specified member, or a value where type()
returns ERROR if pos does not point to a member
*/
Value Value::key(size_t pos) const
{
assert(m_type == OBJECT);
if (pos >= m_element_count)
return err();
const size_t offset_size= m_large ? LARGE_OFFSET_SIZE : SMALL_OFFSET_SIZE;
const size_t key_entry_size=
m_large ? KEY_ENTRY_SIZE_LARGE : KEY_ENTRY_SIZE_SMALL;
const size_t value_entry_size=
m_large ? VALUE_ENTRY_SIZE_LARGE : VALUE_ENTRY_SIZE_SMALL;
// The key entries are located after two length fields of size offset_size.
const size_t entry_offset= 2 * offset_size + key_entry_size * pos;
// The offset of the key is the first part of the key entry.
const size_t key_offset= read_offset_or_size(m_data + entry_offset, m_large);
// The length of the key is the second part of the entry, always two bytes.
const size_t key_length= uint2korr(m_data + entry_offset + offset_size);
/*
The key must start somewhere after the last value entry, and it must
end before the end of the m_data buffer.
*/
if ((key_offset < entry_offset +
(m_element_count - pos) * key_entry_size +
m_element_count * value_entry_size) ||
(m_length < key_offset + key_length))
return err(); /* purecov: inspected */
return Value(m_data + key_offset, key_length);
}
/**
Get the value associated with the specified key in a JSON object.
@param[in] key pointer to the key
@param[in] len length of the key
@return the value associated with the key, if there is one. otherwise,
returns ERROR
*/
Value Value::lookup(const char *key, size_t len) const
{
assert(m_type == OBJECT);
const size_t offset_size=
(m_large ? LARGE_OFFSET_SIZE : SMALL_OFFSET_SIZE);
const size_t entry_size=
(m_large ? KEY_ENTRY_SIZE_LARGE : KEY_ENTRY_SIZE_SMALL);
// The first key entry is located right after the two length fields.
const size_t first_entry_offset= 2 * offset_size;
size_t lo= 0U; // lower bound for binary search (inclusive)
size_t hi= m_element_count; // upper bound for binary search (exclusive)
while (lo < hi)
{
// Find the entry in the middle of the search interval.
size_t idx= (lo + hi) / 2;
size_t entry_offset= first_entry_offset + idx * entry_size;
// Keys are ordered on length, so check length first.
size_t key_len= uint2korr(m_data + entry_offset + offset_size);
if (len > key_len)
lo= idx + 1;
else if (len < key_len)
hi= idx;
else
{
// The keys had the same length, so compare their contents.
size_t key_offset= read_offset_or_size(m_data + entry_offset, m_large);
int cmp= memcmp(key, m_data + key_offset, len);
if (cmp > 0)
lo= idx + 1;
else if (cmp < 0)
hi= idx;
else
return element(idx);
}
}
return err();
}
/**
Copy the binary representation of this value into a buffer,
replacing the contents of the receiving buffer.
@param buf the receiving buffer
@return false on success, true otherwise
*/
bool Value::raw_binary(String *buf) const
{
// Reset the buffer.
buf->length(0);
buf->set_charset(&my_charset_bin);
switch (m_type)
{
case OBJECT:
case ARRAY:
{
char tp= m_large ?
(m_type == OBJECT ? JSONB_TYPE_LARGE_OBJECT : JSONB_TYPE_LARGE_ARRAY) :
(m_type == OBJECT ? JSONB_TYPE_SMALL_OBJECT : JSONB_TYPE_SMALL_ARRAY);
return buf->append(tp) || buf->append(m_data, m_length);
}
case STRING:
return buf->append(JSONB_TYPE_STRING) ||
append_variable_length(buf, m_length) ||
buf->append(m_data, m_length);
case INT:
{
Json_int i(get_int64());
return serialize(&i, buf) != OK;
}
case UINT:
{
Json_uint i(get_uint64());
return serialize(&i, buf) != OK;
}
case DOUBLE:
{
Json_double d(get_double());
return serialize(&d, buf) != OK;
}
case LITERAL_NULL:
{
Json_null n;
return serialize(&n, buf) != OK;
}
case LITERAL_TRUE:
case LITERAL_FALSE:
{
Json_boolean b(m_type == LITERAL_TRUE);
return serialize(&b, buf) != OK;
}
case OPAQUE:
return buf->append(JSONB_TYPE_OPAQUE) ||
buf->append(field_type()) ||
append_variable_length(buf, m_length) ||
buf->append(m_data, m_length);
case ERROR:
break; /* purecov: inspected */
}
/* purecov: begin deadcode */
DBUG_ABORT();
return true;
/* purecov: end */
}
} // end namespace json_binary