[clangd] Add parsing and value inspection to JSONExpr.
Summary:
This will replace the places where we're using YAMLParser to parse JSON now:
- the new marshalling code (T::parse()) should handle fewer cases and require
fewer explicit casts
- we'll early-reject invalid JSON that YAMLParser accepts
- we'll be able to fix protocol-parsing bugs caused by the fact that YAML can
only parse forward
I plan to do the conversion as soon as this lands, but I don't want it in one
patch as the protocol.cpp changes are conflict-prone.
Reviewers: ioeric
Subscribers: ilya-biryukov, cfe-commits
Differential Revision: https://reviews.llvm.org/D40182
llvm-svn: 318774
This commit is contained in:
parent
eb89b1d46f
commit
adbaebc242
|
|
@ -22,10 +22,10 @@ void Expr::copyFrom(const Expr &M) {
|
||||||
create<std::string>(M.as<std::string>());
|
create<std::string>(M.as<std::string>());
|
||||||
break;
|
break;
|
||||||
case T_Object:
|
case T_Object:
|
||||||
create<Object>(M.as<Object>());
|
create<ObjectExpr>(M.as<ObjectExpr>());
|
||||||
break;
|
break;
|
||||||
case T_Array:
|
case T_Array:
|
||||||
create<Array>(M.as<Array>());
|
create<ArrayExpr>(M.as<ArrayExpr>());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -46,11 +46,11 @@ void Expr::moveFrom(const Expr &&M) {
|
||||||
M.Type = T_Null;
|
M.Type = T_Null;
|
||||||
break;
|
break;
|
||||||
case T_Object:
|
case T_Object:
|
||||||
create<Object>(std::move(M.as<Object>()));
|
create<ObjectExpr>(std::move(M.as<ObjectExpr>()));
|
||||||
M.Type = T_Null;
|
M.Type = T_Null;
|
||||||
break;
|
break;
|
||||||
case T_Array:
|
case T_Array:
|
||||||
create<Array>(std::move(M.as<Array>()));
|
create<ArrayExpr>(std::move(M.as<ArrayExpr>()));
|
||||||
M.Type = T_Null;
|
M.Type = T_Null;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -69,14 +69,318 @@ void Expr::destroy() {
|
||||||
as<std::string>().~basic_string();
|
as<std::string>().~basic_string();
|
||||||
break;
|
break;
|
||||||
case T_Object:
|
case T_Object:
|
||||||
as<Object>().~Object();
|
as<ObjectExpr>().~ObjectExpr();
|
||||||
break;
|
break;
|
||||||
case T_Array:
|
case T_Array:
|
||||||
as<Array>().~Array();
|
as<ArrayExpr>().~ArrayExpr();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
// Simple recursive-descent JSON parser.
|
||||||
|
class Parser {
|
||||||
|
public:
|
||||||
|
Parser(StringRef JSON)
|
||||||
|
: Start(JSON.begin()), P(JSON.begin()), End(JSON.end()) {}
|
||||||
|
|
||||||
|
bool parseExpr(Expr &Out);
|
||||||
|
|
||||||
|
bool assertEnd() {
|
||||||
|
eatWhitespace();
|
||||||
|
if (P == End)
|
||||||
|
return true;
|
||||||
|
return parseError("Text after end of document");
|
||||||
|
}
|
||||||
|
|
||||||
|
Error takeError() {
|
||||||
|
assert(Error);
|
||||||
|
return std::move(*Error);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void eatWhitespace() {
|
||||||
|
while (P != End && (*P == ' ' || *P == '\r' || *P == '\n' || *P == '\t'))
|
||||||
|
++P;
|
||||||
|
}
|
||||||
|
|
||||||
|
// On invalid syntax, parseX() functions return false and and set Error.
|
||||||
|
bool parseNumber(char First, double &Out);
|
||||||
|
bool parseString(std::string &Out);
|
||||||
|
bool parseUnicode(std::string &Out);
|
||||||
|
bool parseError(const char *Msg); // always returns false
|
||||||
|
|
||||||
|
char next() { return P == End ? 0 : *P++; }
|
||||||
|
char peek() { return P == End ? 0 : *P; }
|
||||||
|
static bool isNumber(char C) {
|
||||||
|
return C == '0' || C == '1' || C == '2' || C == '3' || C == '4' ||
|
||||||
|
C == '5' || C == '6' || C == '7' || C == '8' || C == '9' ||
|
||||||
|
C == 'e' || C == 'E' || C == '+' || C == '-' || C == '.';
|
||||||
|
}
|
||||||
|
static void encodeUtf8(uint32_t Rune, std::string &Out);
|
||||||
|
|
||||||
|
Optional<Error> Error;
|
||||||
|
const char *Start, *P, *End;
|
||||||
|
};
|
||||||
|
|
||||||
|
bool Parser::parseExpr(Expr &Out) {
|
||||||
|
eatWhitespace();
|
||||||
|
if (P == End)
|
||||||
|
return parseError("Unexpected EOF");
|
||||||
|
switch (char C = next()) {
|
||||||
|
// Bare null/true/false are easy - first char identifies them.
|
||||||
|
case 'n':
|
||||||
|
Out = nullptr;
|
||||||
|
return (next() == 'u' && next() == 'l' && next() == 'l') ||
|
||||||
|
parseError("Invalid bareword");
|
||||||
|
case 't':
|
||||||
|
Out = true;
|
||||||
|
return (next() == 'r' && next() == 'u' && next() == 'e') ||
|
||||||
|
parseError("Invalid bareword");
|
||||||
|
case 'f':
|
||||||
|
Out = false;
|
||||||
|
return (next() == 'a' && next() == 'l' && next() == 's' && next() == 'e') ||
|
||||||
|
parseError("Invalid bareword");
|
||||||
|
case '"': {
|
||||||
|
std::string S;
|
||||||
|
if (parseString(S)) {
|
||||||
|
Out = std::move(S);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
case '[': {
|
||||||
|
Out = json::ary{};
|
||||||
|
json::ary &A = *Out.array();
|
||||||
|
eatWhitespace();
|
||||||
|
if (peek() == ']') {
|
||||||
|
++P;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
for (;;) {
|
||||||
|
A.emplace_back(nullptr);
|
||||||
|
if (!parseExpr(A.back()))
|
||||||
|
return false;
|
||||||
|
eatWhitespace();
|
||||||
|
switch (next()) {
|
||||||
|
case ',':
|
||||||
|
eatWhitespace();
|
||||||
|
continue;
|
||||||
|
case ']':
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return parseError("Expected , or ] after array element");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case '{': {
|
||||||
|
Out = json::obj{};
|
||||||
|
json::obj &O = *Out.object();
|
||||||
|
eatWhitespace();
|
||||||
|
if (peek() == '}') {
|
||||||
|
++P;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
for (;;) {
|
||||||
|
if (next() != '"')
|
||||||
|
return parseError("Expected object key");
|
||||||
|
std::string K;
|
||||||
|
if (!parseString(K))
|
||||||
|
return false;
|
||||||
|
eatWhitespace();
|
||||||
|
if (next() != ':')
|
||||||
|
return parseError("Expected : after object key");
|
||||||
|
eatWhitespace();
|
||||||
|
if (!parseExpr(O[std::move(K)]))
|
||||||
|
return false;
|
||||||
|
eatWhitespace();
|
||||||
|
switch (next()) {
|
||||||
|
case ',':
|
||||||
|
eatWhitespace();
|
||||||
|
continue;
|
||||||
|
case '}':
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return parseError("Expected , or } after object property");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
if (isNumber(C)) {
|
||||||
|
double Num;
|
||||||
|
if (parseNumber(C, Num)) {
|
||||||
|
Out = Num;
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return parseError("Expected JSON value");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Parser::parseNumber(char First, double &Out) {
|
||||||
|
SmallString<24> S;
|
||||||
|
S.push_back(First);
|
||||||
|
while (isNumber(peek()))
|
||||||
|
S.push_back(next());
|
||||||
|
char *End;
|
||||||
|
Out = std::strtod(S.c_str(), &End);
|
||||||
|
return End == S.end() || parseError("Invalid number");
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Parser::parseString(std::string &Out) {
|
||||||
|
// leading quote was already consumed.
|
||||||
|
for (char C = next(); C != '"'; C = next()) {
|
||||||
|
if (LLVM_UNLIKELY(P == End))
|
||||||
|
return parseError("Unterminated string");
|
||||||
|
if (LLVM_UNLIKELY((C & 0x1f) == C))
|
||||||
|
return parseError("Control character in string");
|
||||||
|
if (LLVM_LIKELY(C != '\\')) {
|
||||||
|
Out.push_back(C);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Handle escape sequence.
|
||||||
|
switch (C = next()) {
|
||||||
|
case '"':
|
||||||
|
case '\\':
|
||||||
|
case '/':
|
||||||
|
Out.push_back(C);
|
||||||
|
break;
|
||||||
|
case 'b':
|
||||||
|
Out.push_back('\b');
|
||||||
|
break;
|
||||||
|
case 'f':
|
||||||
|
Out.push_back('\f');
|
||||||
|
break;
|
||||||
|
case 'n':
|
||||||
|
Out.push_back('\n');
|
||||||
|
break;
|
||||||
|
case 'r':
|
||||||
|
Out.push_back('\r');
|
||||||
|
break;
|
||||||
|
case 't':
|
||||||
|
Out.push_back('\t');
|
||||||
|
break;
|
||||||
|
case 'u':
|
||||||
|
if (!parseUnicode(Out))
|
||||||
|
return false;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return parseError("Invalid escape sequence");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Parser::encodeUtf8(uint32_t Rune, std::string &Out) {
|
||||||
|
if (Rune <= 0x7F) {
|
||||||
|
Out.push_back(Rune & 0x7F);
|
||||||
|
} else if (Rune <= 0x7FF) {
|
||||||
|
uint8_t FirstByte = 0xC0 | ((Rune & 0x7C0) >> 6);
|
||||||
|
uint8_t SecondByte = 0x80 | (Rune & 0x3F);
|
||||||
|
Out.push_back(FirstByte);
|
||||||
|
Out.push_back(SecondByte);
|
||||||
|
} else if (Rune <= 0xFFFF) {
|
||||||
|
uint8_t FirstByte = 0xE0 | ((Rune & 0xF000) >> 12);
|
||||||
|
uint8_t SecondByte = 0x80 | ((Rune & 0xFC0) >> 6);
|
||||||
|
uint8_t ThirdByte = 0x80 | (Rune & 0x3F);
|
||||||
|
Out.push_back(FirstByte);
|
||||||
|
Out.push_back(SecondByte);
|
||||||
|
Out.push_back(ThirdByte);
|
||||||
|
} else if (Rune <= 0x10FFFF) {
|
||||||
|
uint8_t FirstByte = 0xF0 | ((Rune & 0x1F0000) >> 18);
|
||||||
|
uint8_t SecondByte = 0x80 | ((Rune & 0x3F000) >> 12);
|
||||||
|
uint8_t ThirdByte = 0x80 | ((Rune & 0xFC0) >> 6);
|
||||||
|
uint8_t FourthByte = 0x80 | (Rune & 0x3F);
|
||||||
|
Out.push_back(FirstByte);
|
||||||
|
Out.push_back(SecondByte);
|
||||||
|
Out.push_back(ThirdByte);
|
||||||
|
Out.push_back(FourthByte);
|
||||||
|
} else {
|
||||||
|
llvm_unreachable("Invalid codepoint");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse a \uNNNN escape sequence, the \u have already been consumed.
|
||||||
|
// May parse multiple escapes in the presence of surrogate pairs.
|
||||||
|
bool Parser::parseUnicode(std::string &Out) {
|
||||||
|
// Note that invalid unicode is not a JSON error. It gets replaced by U+FFFD.
|
||||||
|
auto Invalid = [&] { Out.append(/* UTF-8 */ {'\xef', '\xbf', '\xbd'}); };
|
||||||
|
auto Parse4Hex = [this](uint16_t &Out) {
|
||||||
|
Out = 0;
|
||||||
|
char Bytes[] = {next(), next(), next(), next()};
|
||||||
|
for (unsigned char C : Bytes) {
|
||||||
|
if (!std::isxdigit(C))
|
||||||
|
return parseError("Invalid \\u escape sequence");
|
||||||
|
Out <<= 4;
|
||||||
|
Out |= (C > '9') ? (C & ~0x20) - 'A' + 10 : (C - '0');
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
uint16_t First;
|
||||||
|
if (!Parse4Hex(First))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// We loop to allow proper surrogate-pair error handling.
|
||||||
|
while (true) {
|
||||||
|
if (LLVM_LIKELY(First < 0xD800 || First >= 0xE000)) { // BMP.
|
||||||
|
encodeUtf8(First, Out);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (First >= 0xDC00) {
|
||||||
|
Invalid(); // Lone trailing surrogate.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We have a leading surrogate, and need a trailing one.
|
||||||
|
// Don't advance P: a lone surrogate is valid JSON (but invalid unicode)
|
||||||
|
if (P + 2 > End || *P != '\\' || *(P + 1) != 'u') {
|
||||||
|
Invalid(); // Lone leading not followed by \u...
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
P += 2;
|
||||||
|
uint16_t Second;
|
||||||
|
if (!Parse4Hex(Second))
|
||||||
|
return false;
|
||||||
|
if (Second < 0xDC00 && Second >= 0xE000) {
|
||||||
|
Invalid(); // Leading surrogate not followed by trailing.
|
||||||
|
First = Second; // Second escape still needs to be processed.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Valid surrogate pair.
|
||||||
|
encodeUtf8(0x10000 | ((First - 0xD800) << 10) | (Second - 0xDC00), Out);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Parser::parseError(const char *Msg) {
|
||||||
|
int Line = 1;
|
||||||
|
const char *StartOfLine = Start;
|
||||||
|
for (const char *X = Start; X < P; ++X) {
|
||||||
|
if (*X == 0x0A) {
|
||||||
|
++Line;
|
||||||
|
StartOfLine = X + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Error.emplace(
|
||||||
|
llvm::make_unique<ParseError>(Msg, Line, P - StartOfLine, P - Start));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
Expected<Expr> parse(StringRef JSON) {
|
||||||
|
Parser P(JSON);
|
||||||
|
json::Expr E = nullptr;
|
||||||
|
if (P.parseExpr(E))
|
||||||
|
if (P.assertEnd())
|
||||||
|
return std::move(E);
|
||||||
|
return P.takeError();
|
||||||
|
}
|
||||||
|
char ParseError::ID = 0;
|
||||||
|
|
||||||
} // namespace json
|
} // namespace json
|
||||||
} // namespace clangd
|
} // namespace clangd
|
||||||
} // namespace clang
|
} // namespace clang
|
||||||
|
|
@ -144,7 +448,7 @@ void clang::clangd::json::Expr::print(raw_ostream &OS,
|
||||||
bool Comma = false;
|
bool Comma = false;
|
||||||
OS << '{';
|
OS << '{';
|
||||||
I(Indent);
|
I(Indent);
|
||||||
for (const auto &P : as<Expr::Object>()) {
|
for (const auto &P : as<Expr::ObjectExpr>()) {
|
||||||
if (Comma)
|
if (Comma)
|
||||||
OS << ',';
|
OS << ',';
|
||||||
Comma = true;
|
Comma = true;
|
||||||
|
|
@ -164,7 +468,7 @@ void clang::clangd::json::Expr::print(raw_ostream &OS,
|
||||||
bool Comma = false;
|
bool Comma = false;
|
||||||
OS << '[';
|
OS << '[';
|
||||||
I(Indent);
|
I(Indent);
|
||||||
for (const auto &E : as<Expr::Array>()) {
|
for (const auto &E : as<Expr::ArrayExpr>()) {
|
||||||
if (Comma)
|
if (Comma)
|
||||||
OS << ',';
|
OS << ',';
|
||||||
Comma = true;
|
Comma = true;
|
||||||
|
|
@ -187,6 +491,25 @@ llvm::raw_ostream &operator<<(raw_ostream &OS, const Expr &E) {
|
||||||
E.print(OS, [](IndenterAction A) { /*ignore*/ });
|
E.print(OS, [](IndenterAction A) { /*ignore*/ });
|
||||||
return OS;
|
return OS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool operator==(const Expr &L, const Expr &R) {
|
||||||
|
if (L.kind() != R.kind())
|
||||||
|
return false;
|
||||||
|
switch (L.kind()) {
|
||||||
|
case Expr::Null:
|
||||||
|
return L.null() == R.null();
|
||||||
|
case Expr::Boolean:
|
||||||
|
return L.boolean() == R.boolean();
|
||||||
|
case Expr::Number:
|
||||||
|
return L.boolean() == R.boolean();
|
||||||
|
case Expr::String:
|
||||||
|
return L.string() == R.string();
|
||||||
|
case Expr::Array:
|
||||||
|
return *L.array() == *R.array();
|
||||||
|
case Expr::Object:
|
||||||
|
return *L.object() == *R.object();
|
||||||
|
}
|
||||||
|
}
|
||||||
} // namespace json
|
} // namespace json
|
||||||
} // namespace clangd
|
} // namespace clangd
|
||||||
} // namespace clang
|
} // namespace clang
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
//===--- JSONExpr.h - composable JSON expressions ---------------*- C++ -*-===//
|
//===--- JSONExpr.h - JSON expressions, parsing and serialization - C++ -*-===//
|
||||||
//
|
//
|
||||||
// The LLVM Compiler Infrastructure
|
// The LLVM Compiler Infrastructure
|
||||||
//
|
//
|
||||||
|
|
@ -7,6 +7,8 @@
|
||||||
//
|
//
|
||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
// FIXME: rename to JSON.h now that the scope is wider?
|
||||||
|
|
||||||
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_JSON_H
|
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_JSON_H
|
||||||
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_JSON_H
|
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_JSON_H
|
||||||
|
|
||||||
|
|
@ -14,6 +16,7 @@
|
||||||
|
|
||||||
#include "llvm/ADT/SmallVector.h"
|
#include "llvm/ADT/SmallVector.h"
|
||||||
#include "llvm/ADT/StringRef.h"
|
#include "llvm/ADT/StringRef.h"
|
||||||
|
#include "llvm/Support/Error.h"
|
||||||
#include "llvm/Support/FormatVariadic.h"
|
#include "llvm/Support/FormatVariadic.h"
|
||||||
#include "llvm/Support/raw_ostream.h"
|
#include "llvm/Support/raw_ostream.h"
|
||||||
|
|
||||||
|
|
@ -21,10 +24,12 @@ namespace clang {
|
||||||
namespace clangd {
|
namespace clangd {
|
||||||
namespace json {
|
namespace json {
|
||||||
|
|
||||||
// An Expr is an opaque temporary JSON structure used to compose documents.
|
// An Expr is an JSON value of unknown type.
|
||||||
// They can be copied, but should generally be moved.
|
// They can be copied, but should generally be moved.
|
||||||
//
|
//
|
||||||
// You can implicitly construct literals from:
|
// === Composing expressions ===
|
||||||
|
//
|
||||||
|
// You can implicitly construct Exprs from:
|
||||||
// - strings: std::string, SmallString, formatv, StringRef, char*
|
// - strings: std::string, SmallString, formatv, StringRef, char*
|
||||||
// (char*, and StringRef are references, not copies!)
|
// (char*, and StringRef are references, not copies!)
|
||||||
// - numbers
|
// - numbers
|
||||||
|
|
@ -39,25 +44,62 @@ namespace json {
|
||||||
// These can be list-initialized, or used to build up collections in a loop.
|
// These can be list-initialized, or used to build up collections in a loop.
|
||||||
// json::ary(Collection) converts all items in a collection to Exprs.
|
// json::ary(Collection) converts all items in a collection to Exprs.
|
||||||
//
|
//
|
||||||
|
// === Inspecting expressions ===
|
||||||
|
//
|
||||||
|
// Each Expr is one of the JSON kinds:
|
||||||
|
// null (nullptr_t)
|
||||||
|
// boolean (bool)
|
||||||
|
// number (double)
|
||||||
|
// string (StringRef)
|
||||||
|
// array (json::ary)
|
||||||
|
// object (json::obj)
|
||||||
|
//
|
||||||
|
// The kind can be queried directly, or implicitly via the typed accessors:
|
||||||
|
// if (Optional<StringRef> S = E.string())
|
||||||
|
// assert(E.kind() == Expr::String);
|
||||||
|
//
|
||||||
|
// Array and Object also have typed indexing accessors for easy traversal:
|
||||||
|
// Expected<Expr> E = parse(R"( {"options": {"font": "sans-serif"}} )");
|
||||||
|
// if (json::obj* O = E->object())
|
||||||
|
// if (json::obj* Opts = O->object("options"))
|
||||||
|
// if (Optional<StringRef> Font = Opts->string("font"))
|
||||||
|
// assert(Opts->at("font").kind() == Expr::String);
|
||||||
|
//
|
||||||
|
// === Serialization ===
|
||||||
|
//
|
||||||
// Exprs can be serialized to JSON:
|
// Exprs can be serialized to JSON:
|
||||||
// 1) raw_ostream << Expr // Basic formatting.
|
// 1) raw_ostream << Expr // Basic formatting.
|
||||||
// 2) raw_ostream << formatv("{0}", Expr) // Basic formatting.
|
// 2) raw_ostream << formatv("{0}", Expr) // Basic formatting.
|
||||||
// 3) raw_ostream << formatv("{0:2}", Expr) // Pretty-print with indent 2.
|
// 3) raw_ostream << formatv("{0:2}", Expr) // Pretty-print with indent 2.
|
||||||
|
//
|
||||||
|
// And parsed:
|
||||||
|
// Expected<Expr> E = json::parse("[1, 2, null]");
|
||||||
|
// assert(E && E->kind() == Expr::Array);
|
||||||
class Expr {
|
class Expr {
|
||||||
public:
|
public:
|
||||||
class Object;
|
enum Kind {
|
||||||
|
Null,
|
||||||
|
Boolean,
|
||||||
|
Number,
|
||||||
|
String,
|
||||||
|
Array,
|
||||||
|
Object,
|
||||||
|
};
|
||||||
|
class ObjectExpr;
|
||||||
class ObjectKey;
|
class ObjectKey;
|
||||||
class Array;
|
class ArrayExpr;
|
||||||
|
|
||||||
// It would be nice to have Expr() be null. But that would make {} null too...
|
// It would be nice to have Expr() be null. But that would make {} null too...
|
||||||
Expr(const Expr &M) { copyFrom(M); }
|
Expr(const Expr &M) { copyFrom(M); }
|
||||||
Expr(Expr &&M) { moveFrom(std::move(M)); }
|
Expr(Expr &&M) { moveFrom(std::move(M)); }
|
||||||
// "cheating" move-constructor for moving from initializer_list.
|
// "cheating" move-constructor for moving from initializer_list.
|
||||||
Expr(const Expr &&M) { moveFrom(std::move(M)); }
|
Expr(const Expr &&M) { moveFrom(std::move(M)); }
|
||||||
Expr(std::initializer_list<Expr> Elements) : Expr(Array(Elements)) {}
|
Expr(std::initializer_list<Expr> Elements) : Expr(ArrayExpr(Elements)) {}
|
||||||
Expr(Array &&Elements) : Type(T_Array) { create<Array>(std::move(Elements)); }
|
Expr(ArrayExpr &&Elements) : Type(T_Array) {
|
||||||
Expr(Object &&Properties) : Type(T_Object) {
|
create<ArrayExpr>(std::move(Elements));
|
||||||
create<Object>(std::move(Properties));
|
}
|
||||||
|
Expr(ObjectExpr &&Properties) : Type(T_Object) {
|
||||||
|
create<ObjectExpr>(std::move(Properties));
|
||||||
}
|
}
|
||||||
// Strings: types with value semantics.
|
// Strings: types with value semantics.
|
||||||
Expr(std::string &&V) : Type(T_String) { create<std::string>(std::move(V)); }
|
Expr(std::string &&V) : Type(T_String) { create<std::string>(std::move(V)); }
|
||||||
|
|
@ -104,6 +146,60 @@ public:
|
||||||
}
|
}
|
||||||
~Expr() { destroy(); }
|
~Expr() { destroy(); }
|
||||||
|
|
||||||
|
Kind kind() const {
|
||||||
|
switch (Type) {
|
||||||
|
case T_Null:
|
||||||
|
return Null;
|
||||||
|
case T_Boolean:
|
||||||
|
return Boolean;
|
||||||
|
case T_Number:
|
||||||
|
return Number;
|
||||||
|
case T_String:
|
||||||
|
case T_StringRef:
|
||||||
|
return String;
|
||||||
|
case T_Object:
|
||||||
|
return Object;
|
||||||
|
case T_Array:
|
||||||
|
return Array;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Typed accessors return None/nullptr if the Expr is not of this type.
|
||||||
|
llvm::Optional<std::nullptr_t> null() const {
|
||||||
|
if (LLVM_LIKELY(Type == T_Null))
|
||||||
|
return nullptr;
|
||||||
|
return llvm::None;
|
||||||
|
}
|
||||||
|
llvm::Optional<bool> boolean() const {
|
||||||
|
if (LLVM_LIKELY(Type == T_Null))
|
||||||
|
return as<bool>();
|
||||||
|
return llvm::None;
|
||||||
|
}
|
||||||
|
llvm::Optional<double> number() const {
|
||||||
|
if (LLVM_LIKELY(Type == T_Number))
|
||||||
|
return as<double>();
|
||||||
|
return llvm::None;
|
||||||
|
}
|
||||||
|
llvm::Optional<llvm::StringRef> string() const {
|
||||||
|
if (Type == T_String)
|
||||||
|
return llvm::StringRef(as<std::string>());
|
||||||
|
if (LLVM_LIKELY(Type == T_StringRef))
|
||||||
|
return as<llvm::StringRef>();
|
||||||
|
return llvm::None;
|
||||||
|
}
|
||||||
|
const ObjectExpr *object() const {
|
||||||
|
return LLVM_LIKELY(Type == T_Object) ? &as<ObjectExpr>() : nullptr;
|
||||||
|
}
|
||||||
|
ObjectExpr *object() {
|
||||||
|
return LLVM_LIKELY(Type == T_Object) ? &as<ObjectExpr>() : nullptr;
|
||||||
|
}
|
||||||
|
const ArrayExpr *array() const {
|
||||||
|
return LLVM_LIKELY(Type == T_Array) ? &as<ArrayExpr>() : nullptr;
|
||||||
|
}
|
||||||
|
ArrayExpr *array() {
|
||||||
|
return LLVM_LIKELY(Type == T_Array) ? &as<ArrayExpr>() : nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
friend llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Expr &);
|
friend llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Expr &);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
@ -137,10 +233,8 @@ private:
|
||||||
mutable ExprType Type;
|
mutable ExprType Type;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// ObjectKey is a used to capture keys in Expr::Objects. It's like Expr but:
|
// ObjectKey is a used to capture keys in Expr::ObjectExpr. Like Expr but:
|
||||||
// - only strings are allowed
|
// - only strings are allowed
|
||||||
// - it's copyable (for std::map)
|
|
||||||
// - we're slightly more eager to copy, to allow efficient key compares
|
|
||||||
// - it's optimized for the string literal case (Owned == nullptr)
|
// - it's optimized for the string literal case (Owned == nullptr)
|
||||||
class ObjectKey {
|
class ObjectKey {
|
||||||
public:
|
public:
|
||||||
|
|
@ -183,12 +277,12 @@ public:
|
||||||
llvm::StringRef Data;
|
llvm::StringRef Data;
|
||||||
};
|
};
|
||||||
|
|
||||||
class Object : public std::map<ObjectKey, Expr> {
|
class ObjectExpr : public std::map<ObjectKey, Expr> {
|
||||||
public:
|
public:
|
||||||
explicit Object() {}
|
explicit ObjectExpr() {}
|
||||||
// Use a custom struct for list-init, because pair forces extra copies.
|
// Use a custom struct for list-init, because pair forces extra copies.
|
||||||
struct KV;
|
struct KV;
|
||||||
explicit Object(std::initializer_list<KV> Properties);
|
explicit ObjectExpr(std::initializer_list<KV> Properties);
|
||||||
|
|
||||||
// Allow [] as if Expr was default-constructible as null.
|
// Allow [] as if Expr was default-constructible as null.
|
||||||
Expr &operator[](const ObjectKey &K) {
|
Expr &operator[](const ObjectKey &K) {
|
||||||
|
|
@ -199,15 +293,15 @@ public:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class Array : public std::vector<Expr> {
|
class ArrayExpr : public std::vector<Expr> {
|
||||||
public:
|
public:
|
||||||
explicit Array() {}
|
explicit ArrayExpr() {}
|
||||||
explicit Array(std::initializer_list<Expr> Elements) {
|
explicit ArrayExpr(std::initializer_list<Expr> Elements) {
|
||||||
reserve(Elements.size());
|
reserve(Elements.size());
|
||||||
for (const Expr &V : Elements)
|
for (const Expr &V : Elements)
|
||||||
emplace_back(std::move(V));
|
emplace_back(std::move(V));
|
||||||
};
|
};
|
||||||
template <typename Collection> explicit Array(const Collection &C) {
|
template <typename Collection> explicit ArrayExpr(const Collection &C) {
|
||||||
for (const auto &V : C)
|
for (const auto &V : C)
|
||||||
emplace_back(V);
|
emplace_back(V);
|
||||||
}
|
}
|
||||||
|
|
@ -215,23 +309,50 @@ public:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
mutable llvm::AlignedCharArrayUnion<bool, double, llvm::StringRef,
|
mutable llvm::AlignedCharArrayUnion<bool, double, llvm::StringRef,
|
||||||
std::string, Array, Object>
|
std::string, ArrayExpr, ObjectExpr>
|
||||||
Union;
|
Union;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Expr::Object::KV {
|
bool operator==(const Expr &, const Expr &);
|
||||||
|
inline bool operator!=(const Expr &L, const Expr &R) { return !(L == R); }
|
||||||
|
inline bool operator==(const Expr::ObjectKey &L, const Expr::ObjectKey &R) {
|
||||||
|
return llvm::StringRef(L) == llvm::StringRef(R);
|
||||||
|
}
|
||||||
|
inline bool operator!=(const Expr::ObjectKey &L, const Expr::ObjectKey &R) {
|
||||||
|
return !(L == R);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Expr::ObjectExpr::KV {
|
||||||
ObjectKey K;
|
ObjectKey K;
|
||||||
Expr V;
|
Expr V;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline Expr::Object::Object(std::initializer_list<KV> Properties) {
|
inline Expr::ObjectExpr::ObjectExpr(std::initializer_list<KV> Properties) {
|
||||||
for (const auto &P : Properties)
|
for (const auto &P : Properties)
|
||||||
emplace(std::move(P.K), std::move(P.V));
|
emplace(std::move(P.K), std::move(P.V));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Give Expr::{Object,Array} more convenient names for literal use.
|
// Give Expr::{Object,Array} more convenient names for literal use.
|
||||||
using obj = Expr::Object;
|
using obj = Expr::ObjectExpr;
|
||||||
using ary = Expr::Array;
|
using ary = Expr::ArrayExpr;
|
||||||
|
|
||||||
|
llvm::Expected<Expr> parse(llvm::StringRef JSON);
|
||||||
|
|
||||||
|
class ParseError : public llvm::ErrorInfo<ParseError> {
|
||||||
|
const char *Msg;
|
||||||
|
unsigned Line, Column, Offset;
|
||||||
|
|
||||||
|
public:
|
||||||
|
static char ID;
|
||||||
|
ParseError(const char *Msg, unsigned Line, unsigned Column, unsigned Offset)
|
||||||
|
: Msg(Msg), Line(Line), Column(Column), Offset(Offset) {}
|
||||||
|
void log(llvm::raw_ostream &OS) const override {
|
||||||
|
OS << llvm::formatv("[{0}:{1}, byte={2}]: {3}", Line, Column, Offset, Msg);
|
||||||
|
}
|
||||||
|
std::error_code convertToErrorCode() const override {
|
||||||
|
return llvm::inconvertibleErrorCode();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace json
|
} // namespace json
|
||||||
} // namespace clangd
|
} // namespace clangd
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,9 @@
|
||||||
namespace clang {
|
namespace clang {
|
||||||
namespace clangd {
|
namespace clangd {
|
||||||
namespace json {
|
namespace json {
|
||||||
|
void PrintTo(const Expr &E, std::ostream *OS) {
|
||||||
|
llvm::raw_os_ostream(*OS) << llvm::formatv("{0:2}", E);
|
||||||
|
}
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
std::string s(const Expr &E) { return llvm::formatv("{0}", E).str(); }
|
std::string s(const Expr &E) { return llvm::formatv("{0}", E).str(); }
|
||||||
|
|
@ -108,6 +111,77 @@ TEST(JSONExprTests, PrettyPrinting) {
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(JSONTest, Parse) {
|
||||||
|
auto Compare = [](llvm::StringRef S, Expr Expected) {
|
||||||
|
if (auto E = parse(S)) {
|
||||||
|
// Compare both string forms and with operator==, in case we have bugs.
|
||||||
|
EXPECT_EQ(*E, Expected);
|
||||||
|
EXPECT_EQ(sp(*E), sp(Expected));
|
||||||
|
} else {
|
||||||
|
handleAllErrors(E.takeError(), [S](const llvm::ErrorInfoBase &E) {
|
||||||
|
FAIL() << "Failed to parse JSON >>> " << S << " <<<: " << E.message();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Compare(R"(true)", true);
|
||||||
|
Compare(R"(false)", false);
|
||||||
|
Compare(R"(null)", nullptr);
|
||||||
|
|
||||||
|
Compare(R"(42)", 42);
|
||||||
|
Compare(R"(2.5)", 2.5);
|
||||||
|
Compare(R"(2e50)", 2e50);
|
||||||
|
Compare(R"(1.2e3456789)", 1.0 / 0.0);
|
||||||
|
|
||||||
|
Compare(R"("foo")", "foo");
|
||||||
|
Compare(R"("\"\\\b\f\n\r\t")", "\"\\\b\f\n\r\t");
|
||||||
|
Compare(R"("\u0000")", llvm::StringRef("\0", 1));
|
||||||
|
Compare("\"\x7f\"", "\x7f");
|
||||||
|
Compare(R"("\ud801\udc37")", "\U00010437"); // UTF16 surrogate pair escape.
|
||||||
|
Compare("\"\xE2\x82\xAC\xF0\x9D\x84\x9E\"", "\u20ac\U0001d11e"); // UTF8
|
||||||
|
Compare(R"("\ud801")", "\ufffd"); // Invalid codepoint.
|
||||||
|
|
||||||
|
Compare(R"({"":0,"":0})", obj{{"", 0}});
|
||||||
|
Compare(R"({"obj":{},"arr":[]})", obj{{"obj", obj{}}, {"arr", {}}});
|
||||||
|
Compare(R"({"\n":{"\u0000":[[[[]]]]}})",
|
||||||
|
obj{{"\n", obj{
|
||||||
|
{llvm::StringRef("\0", 1), {{{{}}}}},
|
||||||
|
}}});
|
||||||
|
Compare("\r[\n\t] ", {});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(JSONTest, ParseErrors) {
|
||||||
|
auto ExpectErr = [](llvm::StringRef Msg, llvm::StringRef S) {
|
||||||
|
if (auto E = parse(S)) {
|
||||||
|
// Compare both string forms and with operator==, in case we have bugs.
|
||||||
|
FAIL() << "Parsed JSON >>> " << S << " <<< but wanted error: " << Msg;
|
||||||
|
} else {
|
||||||
|
handleAllErrors(E.takeError(), [S, Msg](const llvm::ErrorInfoBase &E) {
|
||||||
|
EXPECT_THAT(E.message(), testing::HasSubstr(Msg)) << S;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
ExpectErr("Unexpected EOF", "");
|
||||||
|
ExpectErr("Unexpected EOF", "[");
|
||||||
|
ExpectErr("Text after end of document", "[][]");
|
||||||
|
ExpectErr("Text after end of document", "[][]");
|
||||||
|
ExpectErr("Invalid bareword", "fuzzy");
|
||||||
|
ExpectErr("Expected , or ]", "[2?]");
|
||||||
|
ExpectErr("Expected object key", "{a:2}");
|
||||||
|
ExpectErr("Expected : after object key", R"({"a",2})");
|
||||||
|
ExpectErr("Expected , or } after object property", R"({"a":2 "b":3})");
|
||||||
|
ExpectErr("Expected JSON value", R"([&%!])");
|
||||||
|
ExpectErr("Invalid number", "1e1.0");
|
||||||
|
ExpectErr("Unterminated string", R"("abc\"def)");
|
||||||
|
ExpectErr("Control character in string", "\"abc\ndef\"");
|
||||||
|
ExpectErr("Invalid escape sequence", R"("\030")");
|
||||||
|
ExpectErr("Invalid \\u escape sequence", R"("\usuck")");
|
||||||
|
ExpectErr("[3:3, byte=19]", R"({
|
||||||
|
"valid": 1,
|
||||||
|
invalid: 2
|
||||||
|
})");
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
} // namespace json
|
} // namespace json
|
||||||
} // namespace clangd
|
} // namespace clangd
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue