@@ -1735,7 +1735,7 @@ OBJS-libcommon = diagnostic-spec.o diagnostic.o diagnostic-color.o \
diagnostic-show-locus.o \
edit-context.o \
pretty-print.o intl.o \
- json.o \
+ json.o json-parsing.o \
sbitmap.o \
vec.o input.o hash-table.o ggc-none.o memory-block.o \
selftest.o selftest-diagnostic.o sort.o
new file mode 100644
@@ -0,0 +1,2391 @@
+/* JSON parsing
+ Copyright (C) 2017-2022 Free Software Foundation, Inc.
+ Contributed by David Malcolm <dmalcolm@redhat.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "json-parsing.h"
+#include "pretty-print.h"
+#include "math.h"
+#include "selftest.h"
+
+using namespace json;
+
+/* Declarations relating to parsing JSON, all within an
+ anonymous namespace. */
+
+namespace {
+
+/* A typedef representing a single unicode character. */
+
+typedef unsigned unichar;
+
+/* An enum for discriminating different kinds of JSON token. */
+
+enum token_id
+{
+ TOK_ERROR,
+
+ TOK_EOF,
+
+ /* Punctuation. */
+ TOK_OPEN_SQUARE,
+ TOK_OPEN_CURLY,
+ TOK_CLOSE_SQUARE,
+ TOK_CLOSE_CURLY,
+ TOK_COLON,
+ TOK_COMMA,
+
+ /* Literal names. */
+ TOK_TRUE,
+ TOK_FALSE,
+ TOK_NULL,
+
+ TOK_STRING,
+ TOK_FLOAT_NUMBER,
+ TOK_INTEGER_NUMBER
+};
+
+/* Human-readable descriptions of enum token_id. */
+
+static const char *token_id_name[] = {
+ "error",
+ "EOF",
+ "'['",
+ "'{'",
+ "']'",
+ "'}'",
+ "':'",
+ "','",
+ "'true'",
+ "'false'",
+ "'null'",
+ "string",
+ "number",
+ "number"
+};
+
+/* Tokens within the JSON lexer. */
+
+struct token
+{
+ /* The kind of token. */
+ enum token_id id;
+
+ /* The location of this token within the unicode
+ character stream. */
+ location_map::range range;
+
+ union
+ {
+ /* Value for TOK_ERROR and TOK_STRING. */
+ char *string;
+
+ /* Value for TOK_FLOAT_NUMBER. */
+ double float_number;
+
+ /* Value for TOK_INTEGER_NUMBER. */
+ long integer_number;
+ } u;
+};
+
+/* A class for lexing JSON. */
+
+class lexer
+{
+ public:
+ lexer (bool support_comments);
+ ~lexer ();
+ bool add_utf8 (size_t length, const char *utf8_buf, error **err_out);
+
+ const token *peek ();
+ void consume ();
+
+ private:
+ bool get_char (unichar &out_char, location_map::point *out_point);
+ void unget_char ();
+ location_map::point get_next_point () const;
+ static void dump_token (FILE *outf, const token *tok);
+ void lex_token (token *out);
+ void lex_string (token *out);
+ void lex_number (token *out, unichar first_char);
+ bool rest_of_literal (token *out, const char *suffix);
+ error *make_error (const char *msg);
+ bool consume_single_line_comment (token *out);
+ bool consume_multiline_comment (token *out);
+
+ private:
+ auto_vec<unichar> m_buffer;
+ int m_next_char_idx;
+ int m_next_char_line;
+ int m_next_char_column;
+ int m_prev_line_final_column; /* for handling unget_char after a '\n'. */
+
+ static const int MAX_TOKENS = 1;
+ token m_next_tokens[MAX_TOKENS];
+ int m_num_next_tokens;
+
+ bool m_support_comments;
+};
+
+/* A class for parsing JSON. */
+
+class parser
+{
+ public:
+ parser (error **err_out, location_map *out_loc_map,
+ bool support_comments);
+ ~parser ();
+ bool add_utf8 (size_t length, const char *utf8_buf, error **err_out);
+ value *parse_value (int depth);
+ object *parse_object (int depth);
+ array *parse_array (int depth);
+
+ bool seen_error_p () const { return *m_err_out; }
+ void require_eof ();
+
+ private:
+ location_map::point get_next_token_start ();
+ location_map::point get_next_token_end ();
+ void require (enum token_id tok_id);
+ enum token_id require_one_of (enum token_id tok_id_a, enum token_id tok_id_b);
+ void error_at (const location_map::range &r,
+ const char *fmt, ...) ATTRIBUTE_PRINTF_3;
+ void maybe_record_range (json::value *jv, const location_map::range &r);
+ void maybe_record_range (json::value *jv,
+ const location_map::point &start,
+ const location_map::point &end);
+
+ private:
+ lexer m_lexer;
+ error **m_err_out;
+ location_map *m_loc_map;
+};
+
+} // anonymous namespace for parsing implementation
+
+/* Parser implementation. */
+
+/* lexer's ctor. */
+
+lexer::lexer (bool support_comments)
+: m_buffer (), m_next_char_idx (0),
+ m_next_char_line (1), m_next_char_column (0),
+ m_prev_line_final_column (-1),
+ m_num_next_tokens (0),
+ m_support_comments (support_comments)
+{
+}
+
+/* lexer's dtor. */
+
+lexer::~lexer ()
+{
+ while (m_num_next_tokens > 0)
+ consume ();
+}
+
+/* Peek the next token. */
+
+const token *
+lexer::peek ()
+{
+ if (m_num_next_tokens == 0)
+ {
+ lex_token (&m_next_tokens[0]);
+ m_num_next_tokens++;
+ }
+ return &m_next_tokens[0];
+}
+
+/* Consume the next token. */
+
+void
+lexer::consume ()
+{
+ if (m_num_next_tokens == 0)
+ peek ();
+
+ gcc_assert (m_num_next_tokens > 0);
+ gcc_assert (m_num_next_tokens <= MAX_TOKENS);
+
+ if (0)
+ {
+ fprintf (stderr, "consuming token: ");
+ dump_token (stderr, &m_next_tokens[0]);
+ fprintf (stderr, "\n");
+ }
+
+ if (m_next_tokens[0].id == TOK_ERROR
+ || m_next_tokens[0].id == TOK_STRING)
+ free (m_next_tokens[0].u.string);
+
+ m_num_next_tokens--;
+ memmove (&m_next_tokens[0], &m_next_tokens[1],
+ sizeof (token) * m_num_next_tokens);
+}
+
+/* Add LENGTH bytes of UTF-8 encoded text from UTF8_BUF to this lexer's
+ buffer. */
+
+bool
+lexer::add_utf8 (size_t length, const char *utf8_buf, error **err_out)
+{
+ /* Adapted from charset.c:one_utf8_to_cppchar. */
+ static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01 };
+ static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+
+ const uchar *inbuf = (const unsigned char *) (utf8_buf);
+ const uchar **inbufp = &inbuf;
+ size_t *inbytesleftp = &length;
+
+ while (length > 0)
+ {
+ unichar c;
+ const uchar *inbuf = *inbufp;
+ size_t nbytes, i;
+
+ c = *inbuf;
+ if (c < 0x80)
+ {
+ m_buffer.safe_push (c);
+ *inbytesleftp -= 1;
+ *inbufp += 1;
+ continue;
+ }
+
+ /* The number of leading 1-bits in the first byte indicates how many
+ bytes follow. */
+ for (nbytes = 2; nbytes < 7; nbytes++)
+ if ((c & ~masks[nbytes-1]) == patns[nbytes-1])
+ goto found;
+ *err_out = make_error ("ill-formed UTF-8 sequence");
+ return false;
+ found:
+
+ if (*inbytesleftp < nbytes)
+ {
+ *err_out = make_error ("ill-formed UTF-8 sequence");
+ return false;
+ }
+
+ c = (c & masks[nbytes-1]);
+ inbuf++;
+ for (i = 1; i < nbytes; i++)
+ {
+ unichar n = *inbuf++;
+ if ((n & 0xC0) != 0x80)
+ {
+ *err_out = make_error ("ill-formed UTF-8 sequence");
+ return false;
+ }
+ c = ((c << 6) + (n & 0x3F));
+ }
+
+ /* Make sure the shortest possible encoding was used. */
+ if (( c <= 0x7F && nbytes > 1)
+ || (c <= 0x7FF && nbytes > 2)
+ || (c <= 0xFFFF && nbytes > 3)
+ || (c <= 0x1FFFFF && nbytes > 4)
+ || (c <= 0x3FFFFFF && nbytes > 5))
+ {
+ *err_out = make_error ("ill-formed UTF-8:"
+ " shortest possible encoding not used");
+ return false;
+ }
+
+ /* Make sure the character is valid. */
+ if (c > 0x7FFFFFFF || (c >= 0xD800 && c <= 0xDFFF))
+ {
+ *err_out = make_error ("ill-formed UTF-8: invalid character");
+ return false;
+ }
+
+ m_buffer.safe_push (c);
+ *inbufp = inbuf;
+ *inbytesleftp -= nbytes;
+ }
+ return true;
+}
+
+/* Attempt to get the next unicode character from this lexer's buffer.
+ If successful, write it to OUT_CHAR, and its location to *OUT_POINT,
+ and return true.
+ Otherwise, return false. */
+
+bool
+lexer::get_char (unichar &out_char, location_map::point *out_point)
+{
+ if (m_next_char_idx >= (int)m_buffer.length ())
+ return false;
+
+ if (out_point)
+ *out_point = get_next_point ();
+ out_char = m_buffer[m_next_char_idx++];
+
+ if (out_char == '\n')
+ {
+ m_next_char_line++;
+ m_prev_line_final_column = m_next_char_column;
+ m_next_char_column = 0;
+ }
+ else
+ m_next_char_column++;
+
+ return true;
+}
+
+/* Undo the last successful get_char. */
+
+void
+lexer::unget_char ()
+{
+ --m_next_char_idx;
+ if (m_next_char_column > 0)
+ --m_next_char_column;
+ else
+ {
+ m_next_char_line--;
+ m_next_char_column = m_prev_line_final_column;
+ /* We don't support more than one unget_char in a row. */
+ gcc_assert (m_prev_line_final_column != -1);
+ m_prev_line_final_column = -1;
+ }
+}
+
+/* Get the location of the next char. */
+
+location_map::point
+lexer::get_next_point () const
+{
+ location_map::point result;
+ result.m_unichar_idx = m_next_char_idx;
+ result.m_line = m_next_char_line;
+ result.m_column = m_next_char_column;
+ return result;
+}
+
+/* Print a textual representation of TOK to OUTF.
+ This is intended for debugging the lexer and parser,
+ rather than for user-facing output. */
+
+void
+lexer::dump_token (FILE *outf, const token *tok)
+{
+ switch (tok->id)
+ {
+ case TOK_ERROR:
+ fprintf (outf, "TOK_ERROR (\"%s\")", tok->u.string);
+ break;
+
+ case TOK_EOF:
+ fprintf (outf, "TOK_EOF");
+ break;
+
+ case TOK_OPEN_SQUARE:
+ fprintf (outf, "TOK_OPEN_SQUARE");
+ break;
+
+ case TOK_OPEN_CURLY:
+ fprintf (outf, "TOK_OPEN_CURLY");
+ break;
+
+ case TOK_CLOSE_SQUARE:
+ fprintf (outf, "TOK_CLOSE_SQUARE");
+ break;
+
+ case TOK_CLOSE_CURLY:
+ fprintf (outf, "TOK_CLOSE_CURLY");
+ break;
+
+ case TOK_COLON:
+ fprintf (outf, "TOK_COLON");
+ break;
+
+ case TOK_COMMA:
+ fprintf (outf, "TOK_COMMA");
+ break;
+
+ case TOK_TRUE:
+ fprintf (outf, "TOK_TRUE");
+ break;
+
+ case TOK_FALSE:
+ fprintf (outf, "TOK_FALSE");
+ break;
+
+ case TOK_NULL:
+ fprintf (outf, "TOK_NULL");
+ break;
+
+ case TOK_STRING:
+ fprintf (outf, "TOK_STRING (\"%s\")", tok->u.string);
+ break;
+
+ case TOK_FLOAT_NUMBER:
+ fprintf (outf, "TOK_FLOAT_NUMBER (%f)", tok->u.float_number);
+ break;
+
+ case TOK_INTEGER_NUMBER:
+ fprintf (outf, "TOK_INTEGER_NUMBER (%ld)", tok->u.integer_number);
+ break;
+
+ default:
+ gcc_unreachable ();
+ break;
+ }
+}
+
+/* Treat "//" as a comment to the end of the line.
+
+ This isn't compliant with the JSON spec,
+ but is very handy for writing DejaGnu tests.
+
+ Return true if EOF and populate *OUT, false otherwise. */
+
+bool
+lexer::consume_single_line_comment (token *out)
+{
+ while (1)
+ {
+ unichar next_char;
+ if (!get_char (next_char, NULL))
+ {
+ out->id = TOK_EOF;
+ location_map::point p = get_next_point ();
+ out->range.m_start = p;
+ out->range.m_end = p;
+ return true;
+ }
+ if (next_char == '\n')
+ return false;
+ }
+}
+
+/* Treat '/' '*' as a multiline comment until the next closing '*' '/'.
+
+ This isn't compliant with the JSON spec,
+ but is very handy for writing DejaGnu tests.
+
+ Return true if EOF and populate *OUT, false otherwise. */
+
+bool
+lexer::consume_multiline_comment (token *out)
+{
+ while (1)
+ {
+ unichar next_char;
+ if (!get_char (next_char, NULL))
+ {
+ out->id = TOK_ERROR;
+ gcc_unreachable (); // TODO
+ location_map::point p = get_next_point ();
+ out->range.m_start = p;
+ out->range.m_end = p;
+ return true;
+ }
+ if (next_char != '*')
+ continue;
+ if (!get_char (next_char, NULL))
+ {
+ out->id = TOK_ERROR;
+ gcc_unreachable (); // TODO
+ location_map::point p = get_next_point ();
+ out->range.m_start = p;
+ out->range.m_end = p;
+ return true;
+ }
+ if (next_char == '/')
+ return false;
+ }
+}
+
+/* Attempt to lex the input buffer, writing the next token to OUT.
+ On errors, TOK_ERROR (or TOK_EOF) is written to OUT. */
+
+void
+lexer::lex_token (token *out)
+{
+ /* Skip to next non-whitespace char. */
+ unichar next_char;
+ location_map::point start_point;
+ while (1)
+ {
+ if (!get_char (next_char, &start_point))
+ {
+ out->id = TOK_EOF;
+ location_map::point p = get_next_point ();
+ out->range.m_start = p;
+ out->range.m_end = p;
+ return;
+ }
+ if (m_support_comments)
+ if (next_char == '/')
+ {
+ location_map::point point;
+ unichar next_next_char;
+ if (get_char (next_next_char, &point))
+ {
+ switch (next_next_char)
+ {
+ case '/':
+ if (consume_single_line_comment (out))
+ return;
+ continue;
+ case '*':
+ if (consume_multiline_comment (out))
+ return;
+ continue;
+ default:
+ /* A stray single '/'. Break out of loop, so that we
+ handle it below as an unexpected character. */
+ goto non_whitespace;
+ }
+ }
+ }
+ if (next_char != ' '
+ && next_char != '\t'
+ && next_char != '\n'
+ && next_char != '\r')
+ break;
+ }
+
+ non_whitespace:
+
+ out->range.m_start = start_point;
+ out->range.m_end = start_point;
+
+ switch (next_char)
+ {
+ case '[':
+ out->id = TOK_OPEN_SQUARE;
+ break;
+
+ case '{':
+ out->id = TOK_OPEN_CURLY;
+ break;
+
+ case ']':
+ out->id = TOK_CLOSE_SQUARE;
+ break;
+
+ case '}':
+ out->id = TOK_CLOSE_CURLY;
+ break;
+
+ case ':':
+ out->id = TOK_COLON;
+ break;
+
+ case ',':
+ out->id = TOK_COMMA;
+ break;
+
+ case '"':
+ lex_string (out);
+ break;
+
+ case '-':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ lex_number (out, next_char);
+ break;
+
+ case 't':
+ /* Handle literal "true". */
+ if (rest_of_literal (out, "rue"))
+ {
+ out->id = TOK_TRUE;
+ break;
+ }
+ else
+ goto err;
+
+ case 'f':
+ /* Handle literal "false". */
+ if (rest_of_literal (out, "alse"))
+ {
+ out->id = TOK_FALSE;
+ break;
+ }
+ else
+ goto err;
+
+ case 'n':
+ /* Handle literal "null". */
+ if (rest_of_literal (out, "ull"))
+ {
+ out->id = TOK_NULL;
+ break;
+ }
+ else
+ goto err;
+
+ err:
+ default:
+ out->id = TOK_ERROR;
+ out->u.string = xasprintf ("unexpected character: '%c'", next_char);
+ break;
+ }
+}
+
+/* Having consumed an open-quote character from the lexer's buffer, attempt
+ to lex the rest of a JSON string, writing the result to OUT (or TOK_ERROR)
+ if an error occurred.
+ (ECMA-404 section 9; RFC 7159 section 7). */
+
+void
+lexer::lex_string (token *out)
+{
+ auto_vec<unichar> content;
+ bool still_going = true;
+ while (still_going)
+ {
+ unichar uc;
+ if (!get_char (uc, &out->range.m_end))
+ {
+ out->id = TOK_ERROR;
+ out->range.m_end = get_next_point ();
+ out->u.string = xstrdup ("EOF within string");
+ return;
+ }
+ switch (uc)
+ {
+ case '"':
+ still_going = false;
+ break;
+ case '\\':
+ {
+ unichar next_char;
+ if (!get_char (next_char, &out->range.m_end))
+ {
+ out->id = TOK_ERROR;
+ out->range.m_end = get_next_point ();
+ out->u.string = xstrdup ("EOF within string");;
+ return;
+ }
+ switch (next_char)
+ {
+ case '"':
+ case '\\':
+ case '/':
+ content.safe_push (next_char);
+ break;
+
+ case 'b':
+ content.safe_push ('\b');
+ break;
+
+ case 'f':
+ content.safe_push ('\f');
+ break;
+
+ case 'n':
+ content.safe_push ('\n');
+ break;
+
+ case 'r':
+ content.safe_push ('\r');
+ break;
+
+ case 't':
+ content.safe_push ('\t');
+ break;
+
+ case 'u':
+ {
+ unichar result = 0;
+ for (int i = 0; i < 4; i++)
+ {
+ unichar hexdigit;
+ if (!get_char (hexdigit, &out->range.m_end))
+ {
+ out->id = TOK_ERROR;
+ out->range.m_end = get_next_point ();
+ out->u.string = xstrdup ("EOF within string");
+ return;
+ }
+ result <<= 4;
+ if (hexdigit >= '0' && hexdigit <= '9')
+ result += hexdigit - '0';
+ else if (hexdigit >= 'a' && hexdigit <= 'f')
+ result += (hexdigit - 'a') + 10;
+ else if (hexdigit >= 'A' && hexdigit <= 'F')
+ result += (hexdigit - 'A') + 10;
+ else
+ {
+ out->id = TOK_ERROR;
+ out->range.m_start = out->range.m_end;
+ out->u.string = xstrdup ("bogus hex char");
+ return;
+ }
+ }
+ content.safe_push (result);
+ }
+ break;
+
+ default:
+ out->id = TOK_ERROR;
+ out->u.string = xstrdup ("unrecognized escape char");
+ return;
+ }
+ }
+ break;
+
+ default:
+ /* Reject unescaped control characters U+0000 through U+001F
+ (ECMA-404 section 9 para 1; RFC 7159 section 7 para 1). */
+ if (uc <= 0x1f)
+ {
+ out->id = TOK_ERROR;
+ out->range.m_start = out->range.m_end;
+ out->u.string = xstrdup ("unescaped control char");
+ return;
+ }
+
+ /* Otherwise, add regular unicode code point. */
+ content.safe_push (uc);
+ break;
+ }
+ }
+
+ out->id = TOK_STRING;
+
+ auto_vec<char> utf8_buf;
+ // Adapted from libcpp/charset.c:one_cppchar_to_utf8
+ for (unsigned i = 0; i < content.length (); i++)
+ {
+ static const uchar masks[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+ static const uchar limits[6] = { 0x80, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE };
+ size_t nbytes;
+ uchar buf[6], *p = &buf[6];
+ unichar c = content[i];
+
+ nbytes = 1;
+ if (c < 0x80)
+ *--p = c;
+ else
+ {
+ do
+ {
+ *--p = ((c & 0x3F) | 0x80);
+ c >>= 6;
+ nbytes++;
+ }
+ while (c >= 0x3F || (c & limits[nbytes-1]));
+ *--p = (c | masks[nbytes-1]);
+ }
+
+ while (p < &buf[6])
+ utf8_buf.safe_push (*p++);
+ }
+
+ out->u.string = XNEWVEC (char, utf8_buf.length () + 1);
+ for (unsigned i = 0; i < utf8_buf.length (); i++)
+ out->u.string[i] = utf8_buf[i];
+ out->u.string[utf8_buf.length ()] = '\0';
+}
+
+/* Having consumed FIRST_CHAR, an initial digit or '-' character from
+ the lexer's buffer attempt to lex the rest of a JSON number, writing
+ the result to OUT (or TOK_ERROR) if an error occurred.
+ (ECMA-404 section 8; RFC 7159 section 6). */
+
+void
+lexer::lex_number (token *out, unichar first_char)
+{
+ bool negate = false;
+ double value = 0.0;
+ if (first_char == '-')
+ {
+ negate = true;
+ if (!get_char (first_char, &out->range.m_end))
+ {
+ out->id = TOK_ERROR;
+ out->range.m_start = out->range.m_end;
+ out->u.string = xstrdup ("expected digit");
+ return;
+ }
+ }
+
+ if (first_char == '0')
+ value = 0.0;
+ else if (!ISDIGIT (first_char))
+ {
+ out->id = TOK_ERROR;
+ out->range.m_start = out->range.m_end;
+ out->u.string = xstrdup ("expected digit");
+ return;
+ }
+ else
+ {
+ /* Got a nonzero digit; expect zero or more digits. */
+ value = first_char - '0';
+ while (1)
+ {
+ unichar uc;
+ location_map::point point;
+ if (!get_char (uc, &point))
+ break;
+ if (ISDIGIT (uc))
+ {
+ value *= 10;
+ value += uc -'0';
+ out->range.m_end = point;
+ continue;
+ }
+ else
+ {
+ unget_char ();
+ break;
+ }
+ }
+ }
+
+ /* Optional '.', followed by one or more decimals. */
+ unichar next_char;
+ location_map::point point;
+ if (get_char (next_char, &point))
+ {
+ if (next_char == '.')
+ {
+ /* Parse decimal digits. */
+ bool had_digit = false;
+ double digit_factor = 0.1;
+ while (get_char (next_char, &point))
+ {
+ if (!ISDIGIT (next_char))
+ {
+ unget_char ();
+ break;
+ }
+ value += (next_char - '0') * digit_factor;
+ digit_factor *= 0.1;
+ had_digit = true;
+ out->range.m_end = point;
+ }
+ if (!had_digit)
+ {
+ out->id = TOK_ERROR;
+ out->range.m_start = point;
+ out->range.m_start = point;
+ out->u.string = xstrdup ("expected digit");
+ return;
+ }
+ }
+ else
+ unget_char ();
+ }
+
+ /* Parse 'e' and 'E'. */
+ unichar exponent_char;
+ if (get_char (exponent_char, &point))
+ {
+ if (exponent_char == 'e' || exponent_char == 'E')
+ {
+ /* Optional +/-. */
+ unichar sign_char;
+ int exponent = 0;
+ bool negate_exponent = false;
+ bool had_exponent_digit = false;
+ if (!get_char (sign_char, &point))
+ {
+ out->id = TOK_ERROR;
+ out->range.m_start = point;
+ out->range.m_start = point;
+ out->u.string = xstrdup ("EOF within exponent");
+ return;
+ }
+ if (sign_char == '-')
+ negate_exponent = true;
+ else if (sign_char == '+')
+ ;
+ else if (ISDIGIT (sign_char))
+ {
+ exponent = sign_char - '0';
+ had_exponent_digit = true;
+ }
+ else
+ {
+ out->id = TOK_ERROR;
+ out->range.m_start = point;
+ out->range.m_start = point;
+ out->u.string
+ = xstrdup ("expected '-','+' or digit within exponent");
+ return;
+ }
+ out->range.m_end = point;
+
+ /* One or more digits (we might have seen the digit above,
+ though). */
+ while (1)
+ {
+ unichar uc;
+ location_map::point point;
+ if (!get_char (uc, &point))
+ break;
+ if (ISDIGIT (uc))
+ {
+ exponent *= 10;
+ exponent += uc -'0';
+ had_exponent_digit = true;
+ out->range.m_end = point;
+ continue;
+ }
+ else
+ {
+ unget_char ();
+ break;
+ }
+ }
+ if (!had_exponent_digit)
+ {
+ out->id = TOK_ERROR;
+ out->range.m_start = point;
+ out->range.m_start = point;
+ out->u.string = xstrdup ("expected digit within exponent");
+ return;
+ }
+ if (negate_exponent)
+ exponent = -exponent;
+ value = value * pow (10, exponent);
+ }
+ else
+ unget_char ();
+ }
+
+ if (negate)
+ value = -value;
+
+ if (value == (long)value)
+ {
+ out->id = TOK_INTEGER_NUMBER;
+ out->u.integer_number = value;
+ }
+ else
+ {
+ out->id = TOK_FLOAT_NUMBER;
+ out->u.float_number = value;
+ }
+}
+
+/* Determine if the next characters to be lexed match SUFFIX.
+ SUFFIX must be pure ASCII and not contain newlines.
+ If so, consume the characters and return true.
+ Otherwise, return false. */
+
+bool
+lexer::rest_of_literal (token *out, const char *suffix)
+{
+ int suffix_idx = 0;
+ int buf_idx = m_next_char_idx;
+ while (1)
+ {
+ if (suffix[suffix_idx] == '\0')
+ {
+ m_next_char_idx += suffix_idx;
+ m_next_char_column += suffix_idx;
+ out->range.m_end.m_unichar_idx += suffix_idx;
+ out->range.m_end.m_column += suffix_idx;
+ return true;
+ }
+ if (buf_idx >= (int)m_buffer.length ())
+ return false;
+ /* This assumes that suffix is ASCII. */
+ if (m_buffer[buf_idx] != (unichar)suffix[suffix_idx])
+ return false;
+ buf_idx++;
+ suffix_idx++;
+ }
+}
+
+/* Create a new error instance for MSG, using the location of the next
+ character for the location of the error. */
+
+error *
+lexer::make_error (const char *msg)
+{
+ location_map::point p;
+ p.m_unichar_idx = m_next_char_idx;
+ p.m_line = m_next_char_line;
+ p.m_column = m_next_char_column;
+ location_map::range r;
+ r.m_start = p;
+ r.m_end = p;
+ return new error (r, xstrdup (msg));
+}
+
+/* parser's ctor. */
+
+parser::parser (error **err_out, location_map *out_loc_map,
+ bool support_comments)
+: m_lexer (support_comments), m_err_out (err_out), m_loc_map (out_loc_map)
+{
+ gcc_assert (err_out);
+ gcc_assert (*err_out == NULL);
+ *err_out = NULL;
+}
+
+/* parser's dtor. */
+
+parser::~parser ()
+{
+ if (m_loc_map)
+ m_loc_map->on_finished_parsing ();
+}
+
+/* Add LENGTH bytes of UTF-8 encoded text from UTF8_BUF to this parser's
+ lexer's buffer. */
+
+bool
+parser::add_utf8 (size_t length, const char *utf8_buf, error **err_out)
+{
+ return m_lexer.add_utf8 (length, utf8_buf, err_out);
+}
+
+/* Parse a JSON value (object, array, number, string, or literal).
+ (ECMA-404 section 5; RFC 7159 section 3). */
+
+value *
+parser::parse_value (int depth)
+{
+ const token *tok = m_lexer.peek ();
+
+ /* Avoid stack overflow with deeply-nested inputs; RFC 7159 section 9
+ states: "An implementation may set limits on the maximum depth
+ of nesting.".
+
+ Ideally we'd avoid this limit (e.g. by rewriting parse_value,
+ parse_object, and parse_array into a single function with a vec of
+ state). */
+ const int MAX_DEPTH = 100;
+ if (depth >= MAX_DEPTH)
+ {
+ error_at (tok->range, "maximum nesting depth exceeded: %i",
+ MAX_DEPTH);
+ return NULL;
+ }
+
+ switch (tok->id)
+ {
+ case TOK_OPEN_CURLY:
+ return parse_object (depth);
+
+ case TOK_STRING:
+ {
+ string *result = new string (tok->u.string);
+ m_lexer.consume ();
+ maybe_record_range (result, tok->range);
+ return result;
+ }
+
+ case TOK_OPEN_SQUARE:
+ return parse_array (depth);
+
+ case TOK_FLOAT_NUMBER:
+ {
+ float_number *result = new float_number (tok->u.float_number);
+ m_lexer.consume ();
+ maybe_record_range (result, tok->range);
+ return result;
+ }
+
+ case TOK_INTEGER_NUMBER:
+ {
+ integer_number *result = new integer_number (tok->u.integer_number);
+ m_lexer.consume ();
+ maybe_record_range (result, tok->range);
+ return result;
+ }
+
+ case TOK_TRUE:
+ {
+ literal *result = new literal (JSON_TRUE);
+ m_lexer.consume ();
+ maybe_record_range (result, tok->range);
+ return result;
+ }
+
+ case TOK_FALSE:
+ {
+ literal *result = new literal (JSON_FALSE);
+ m_lexer.consume ();
+ maybe_record_range (result, tok->range);
+ return result;
+ }
+
+ case TOK_NULL:
+ {
+ literal *result = new literal (JSON_NULL);
+ m_lexer.consume ();
+ maybe_record_range (result, tok->range);
+ return result;
+ }
+
+ case TOK_ERROR:
+ error_at (tok->range, "invalid JSON token: %s", tok->u.string);
+ return NULL;
+
+ default:
+ error_at (tok->range, "expected a JSON value but got %s",
+ token_id_name[tok->id]);
+ return NULL;
+ }
+}
+
+/* Parse a JSON object.
+ (ECMA-404 section 6; RFC 7159 section 4). */
+
+object *
+parser::parse_object (int depth)
+{
+ location_map::point start = get_next_token_start ();
+
+ require (TOK_OPEN_CURLY);
+
+ object *result = new object ();
+
+ const token *tok = m_lexer.peek ();
+ if (tok->id == TOK_CLOSE_CURLY)
+ {
+ location_map::point end = get_next_token_end ();
+ maybe_record_range (result, start, end);
+ require (TOK_CLOSE_CURLY);
+ return result;
+ }
+ if (tok->id != TOK_STRING)
+ {
+ error_at (tok->range,
+ "expected string for object key after '{'; got %s",
+ token_id_name[tok->id]);
+ return result;
+ }
+ while (!seen_error_p ())
+ {
+ tok = m_lexer.peek ();
+ if (tok->id != TOK_STRING)
+ {
+ error_at (tok->range,
+ "expected string for object key after ','; got %s",
+ token_id_name[tok->id]);
+ return result;
+ }
+ char *key = xstrdup (tok->u.string);
+ m_lexer.consume ();
+
+ require (TOK_COLON);
+
+ value *v = parse_value (depth + 1);
+ if (!v)
+ {
+ free (key);
+ return result;
+ }
+ /* We don't enforce uniqueness for keys. */
+ result->set (key, v);
+ free (key);
+
+ location_map::point end = get_next_token_end ();
+ if (require_one_of (TOK_COMMA, TOK_CLOSE_CURLY) == TOK_COMMA)
+ continue;
+ else
+ {
+ /* TOK_CLOSE_CURLY. */
+ maybe_record_range (result, start, end);
+ break;
+ }
+ }
+ return result;
+}
+
+/* Parse a JSON array.
+ (ECMA-404 section 7; RFC 7159 section 5). */
+
+array *
+parser::parse_array (int depth)
+{
+ location_map::point start = get_next_token_start ();
+ require (TOK_OPEN_SQUARE);
+
+ array *result = new array ();
+
+ const token *tok = m_lexer.peek ();
+ if (tok->id == TOK_CLOSE_SQUARE)
+ {
+ location_map::point end = get_next_token_end ();
+ maybe_record_range (result, start, end);
+ m_lexer.consume ();
+ return result;
+ }
+
+ while (!seen_error_p ())
+ {
+ value *v = parse_value (depth + 1);
+ if (!v)
+ return result;
+
+ result->append (v);
+
+ location_map::point end = get_next_token_end ();
+ if (require_one_of (TOK_COMMA, TOK_CLOSE_SQUARE) == TOK_COMMA)
+ continue;
+ else
+ {
+ /* TOK_CLOSE_SQUARE. */
+ maybe_record_range (result, start, end);
+ break;
+ }
+ }
+
+ return result;
+}
+
+/* Get the start point of the next token. */
+
+location_map::point
+parser::get_next_token_start ()
+{
+ const token *tok = m_lexer.peek ();
+ return tok->range.m_start;
+}
+
+/* Get the end point of the next token. */
+
+location_map::point
+parser::get_next_token_end ()
+{
+ const token *tok = m_lexer.peek ();
+ return tok->range.m_end;
+}
+
+/* Require an EOF, or fail if there is surplus input. */
+
+void
+parser::require_eof ()
+{
+ require (TOK_EOF);
+}
+
+/* Consume the next token, issuing an error if it is not of kind TOK_ID. */
+
+void
+parser::require (enum token_id tok_id)
+{
+ const token *tok = m_lexer.peek ();
+ if (tok->id != tok_id)
+ {
+ if (tok->id == TOK_ERROR)
+ error_at (tok->range, "expected %s; got bad token: %s",
+ token_id_name[tok_id], tok->u.string);
+ else
+ error_at (tok->range, "expected %s; got %s", token_id_name[tok_id],
+ token_id_name[tok->id]);
+ }
+ m_lexer.consume ();
+}
+
+/* Consume the next token, issuing an error if it is not of
+ kind TOK_ID_A or TOK_ID_B.
+ Return which kind it was. */
+
+enum token_id
+parser::require_one_of (enum token_id tok_id_a, enum token_id tok_id_b)
+{
+ const token *tok = m_lexer.peek ();
+ if ((tok->id != tok_id_a)
+ && (tok->id != tok_id_b))
+ {
+ if (tok->id == TOK_ERROR)
+ error_at (tok->range, "expected %s or %s; got bad token: %s",
+ token_id_name[tok_id_a], token_id_name[tok_id_b],
+ tok->u.string);
+ else
+ error_at (tok->range, "expected %s or %s; got %s",
+ token_id_name[tok_id_a], token_id_name[tok_id_b],
+ token_id_name[tok->id]);
+ }
+ enum token_id result = tok->id;
+ m_lexer.consume ();
+ return result;
+}
+
+/* Genarate a parsing error . If this is the first error that has occurred on
+ the parser, store it within the parser's *m_err_out.
+ Otherwise do nothing. */
+
+void
+parser::error_at (const location_map::range &r, const char *fmt, ...)
+{
+ if (m_err_out == NULL)
+ return;
+ /* Only record the first error. */
+ if (*m_err_out)
+ return;
+
+ va_list ap;
+ va_start (ap, fmt);
+ char *formatted_msg = xvasprintf (fmt, ap);
+ va_end (ap);
+
+ *m_err_out = new error (r, formatted_msg);
+}
+
+/* Record that JV has range R within the input file. */
+
+void
+parser::maybe_record_range (json::value *jv, const location_map::range &r)
+{
+ if (m_loc_map)
+ m_loc_map->record_range_for_value (jv, r);
+}
+
+/* Record that JV has range START to END within the input file. */
+
+void
+parser::maybe_record_range (json::value *jv,
+ const location_map::point &start,
+ const location_map::point &end)
+{
+ if (m_loc_map)
+ {
+ location_map::range r;
+ r.m_start = start;
+ r.m_end = end;
+ m_loc_map->record_range_for_value (jv, r);
+ }
+}
+
+/* Attempt to parse the UTF-8 encoded buffer at UTF8_BUF
+ of the given LENGTH.
+ If ALLOW_COMMENTS is true, then allow C and C++ style-comments in the
+ buffer, as an extension to JSON, otherwise forbid them.
+ If successful, return a non-NULL json::value *.
+ if there was a problem, return NULL and write an error
+ message to err_out, which must be deleted by the caller.
+ If OUT_LOC_MAP is non-NULL, notify *OUT_LOC_MAP about
+ source locations of nodes seen during parsing. */
+
+value *
+json::parse_utf8_string (size_t length,
+ const char *utf8_buf,
+ bool allow_comments,
+ error **err_out,
+ location_map *out_loc_map)
+{
+ gcc_assert (err_out);
+ gcc_assert (*err_out == NULL);
+
+ parser p (err_out, out_loc_map, allow_comments);
+ if (!p.add_utf8 (length, utf8_buf, err_out))
+ return NULL;
+ value *result = p.parse_value (0);
+ if (!p.seen_error_p ())
+ p.require_eof ();
+ if (p.seen_error_p ())
+ {
+ gcc_assert (*err_out);
+ delete result;
+ return NULL;
+ }
+ return result;
+}
+
+/* Attempt to parse the nil-terminated UTF-8 encoded buffer at
+ UTF8_BUF.
+ If ALLOW_COMMENTS is true, then allow C and C++ style-comments in the
+ buffer, as an extension to JSON, otherwise forbid them.
+ If successful, return a non-NULL json::value *.
+ if there was a problem, return NULL and write an error
+ message to err_out, which must be deleted by the caller.
+ If OUT_LOC_MAP is non-NULL, notify *OUT_LOC_MAP about
+ source locations of nodes seen during parsing. */
+
+value *
+json::parse_utf8_string (const char *utf8,
+ bool allow_comments,
+ error **err_out,
+ location_map *out_loc_map)
+{
+ return parse_utf8_string (strlen (utf8), utf8, allow_comments,
+ err_out, out_loc_map);
+}
+
+
+#if CHECKING_P
+
+namespace selftest {
+
+/* Selftests. */
+
+/* Implementation detail of ASSERT_RANGE_EQ. */
+
+static void
+assert_point_eq (const location &loc,
+ const location_map::point &actual_point,
+ size_t exp_unichar_idx, int exp_line, int exp_column)
+{
+ ASSERT_EQ_AT (loc, actual_point.m_unichar_idx, exp_unichar_idx);
+ ASSERT_EQ_AT (loc, actual_point.m_line, exp_line);
+ ASSERT_EQ_AT (loc, actual_point.m_column, exp_column);
+}
+
+/* Implementation detail of ASSERT_RANGE_EQ. */
+
+static void
+assert_range_eq (const location &loc,
+ const location_map::range &actual_range,
+ /* Expected location. */
+ size_t start_unichar_idx, int start_line, int start_column,
+ size_t end_unichar_idx, int end_line, int end_column)
+{
+ assert_point_eq (loc, actual_range.m_start,
+ start_unichar_idx, start_line, start_column);
+ assert_point_eq (loc, actual_range.m_end,
+ end_unichar_idx, end_line, end_column);
+}
+
+/* Assert that ACTUAL_RANGE starts at
+ (START_UNICHAR_IDX, START_LINE, START_COLUMN)
+ and ends at (END_UNICHAR_IDX, END_LINE, END_COLUMN). */
+
+#define ASSERT_RANGE_EQ(ACTUAL_RANGE, \
+ START_UNICHAR_IDX, START_LINE, START_COLUMN, \
+ END_UNICHAR_IDX, END_LINE, END_COLUMN) \
+ assert_range_eq ((SELFTEST_LOCATION), (ACTUAL_RANGE), \
+ (START_UNICHAR_IDX), (START_LINE), (START_COLUMN), \
+ (END_UNICHAR_IDX), (END_LINE), (END_COLUMN))
+
+/* Implementation detail of ASSERT_ERR_EQ. */
+
+static void
+assert_err_eq (const location &loc,
+ json::error *actual_err,
+ /* Expected location. */
+ size_t start_unichar_idx, int start_line, int start_column,
+ size_t end_unichar_idx, int end_line, int end_column,
+ const char *expected_msg)
+{
+ ASSERT_TRUE_AT (loc, actual_err);
+ const location_map::range &actual_range = actual_err->get_range ();
+ ASSERT_EQ_AT (loc, actual_range.m_start.m_unichar_idx, start_unichar_idx);
+ ASSERT_EQ_AT (loc, actual_range.m_start.m_line, start_line);
+ ASSERT_EQ_AT (loc, actual_range.m_start.m_column, start_column);
+ ASSERT_EQ_AT (loc, actual_range.m_end.m_unichar_idx, end_unichar_idx);
+ ASSERT_EQ_AT (loc, actual_range.m_end.m_line, end_line);
+ ASSERT_EQ_AT (loc, actual_range.m_end.m_column, end_column);
+ ASSERT_STREQ_AT (loc, actual_err->get_msg (), expected_msg);
+}
+
+/* Assert that ACTUAL_ERR is a non-NULL json::error *,
+ with message EXPECTED_MSG, and that its location starts
+ at (START_UNICHAR_IDX, START_LINE, START_COLUMN)
+ and ends at (END_UNICHAR_IDX, END_LINE, END_COLUMN). */
+
+#define ASSERT_ERR_EQ(ACTUAL_ERR, \
+ START_UNICHAR_IDX, START_LINE, START_COLUMN, \
+ END_UNICHAR_IDX, END_LINE, END_COLUMN, \
+ EXPECTED_MSG) \
+ assert_err_eq ((SELFTEST_LOCATION), (ACTUAL_ERR), \
+ (START_UNICHAR_IDX), (START_LINE), (START_COLUMN), \
+ (END_UNICHAR_IDX), (END_LINE), (END_COLUMN), \
+ (EXPECTED_MSG))
+
+/* Verify that the JSON lexer works as expected. */
+
+static void
+test_lexer ()
+{
+ error *err = NULL;
+ lexer l (false);
+ const char *str
+ /* 0 1 2 3 4 . */
+ /* 01234567890123456789012345678901234567890123456789. */
+ = (" 1066 -1 \n"
+ " -273.15 1e6\n"
+ " [ ] null true false { } \"foo\" \n");
+ l.add_utf8 (strlen (str), str, &err);
+ ASSERT_EQ (err, NULL);
+
+ /* Line 1. */
+ {
+ const size_t line_offset = 0;
+
+ /* Expect token: "1066" in columns 4-7. */
+ {
+ const token *tok = l.peek ();
+ ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER);
+ ASSERT_EQ (tok->u.integer_number, 1066);
+ ASSERT_RANGE_EQ (tok->range,
+ line_offset + 4, 1, 4,
+ line_offset + 7, 1, 7);
+ l.consume ();
+ }
+ /* Expect token: "-1" in columns 11-12. */
+ {
+ const token *tok = l.peek ();
+ ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER);
+ ASSERT_EQ (tok->u.integer_number, -1);
+ ASSERT_RANGE_EQ (tok->range,
+ line_offset + 11, 1, 11,
+ line_offset + 12, 1, 12);
+ l.consume ();
+ }
+ }
+
+ /* Line 2. */
+ {
+ const size_t line_offset = 16;
+
+ /* Expect token: "-273.15" in columns 4-10. */
+ {
+ const token *tok = l.peek ();
+ ASSERT_EQ (tok->id, TOK_FLOAT_NUMBER);
+ ASSERT_EQ (int(tok->u.float_number), int(-273.15));
+ ASSERT_RANGE_EQ (tok->range,
+ line_offset + 4, 2, 4,
+ line_offset + 10, 2, 10);
+ l.consume ();
+ }
+ /* Expect token: "1e6" in columns 12-14. */
+ {
+ const token *tok = l.peek ();
+ ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER);
+ ASSERT_EQ (tok->u.integer_number, 1000000);
+ ASSERT_RANGE_EQ (tok->range,
+ line_offset + 12, 2, 12,
+ line_offset + 14, 2, 14);
+ l.consume ();
+ }
+ }
+
+ /* Line 3. */
+ {
+ const size_t line_offset = 32;
+
+ /* Expect token: "[". */
+ {
+ const token *tok = l.peek ();
+ ASSERT_EQ (tok->id, TOK_OPEN_SQUARE);
+ ASSERT_RANGE_EQ (tok->range,
+ line_offset + 2, 3, 2,
+ line_offset + 2, 3, 2);
+ l.consume ();
+ }
+ /* Expect token: "]". */
+ {
+ const token *tok = l.peek ();
+ ASSERT_EQ (tok->id, TOK_CLOSE_SQUARE);
+ ASSERT_RANGE_EQ (tok->range,
+ line_offset + 6, 3, 6,
+ line_offset + 6, 3, 6);
+ l.consume ();
+ }
+ /* Expect token: "null". */
+ {
+ const token *tok = l.peek ();
+ ASSERT_EQ (tok->id, TOK_NULL);
+ ASSERT_RANGE_EQ (tok->range,
+ line_offset + 8, 3, 8,
+ line_offset + 11, 3, 11);
+ l.consume ();
+ }
+ /* Expect token: "true". */
+ {
+ const token *tok = l.peek ();
+ ASSERT_EQ (tok->id, TOK_TRUE);
+ ASSERT_RANGE_EQ (tok->range,
+ line_offset + 15, 3, 15,
+ line_offset + 18, 3, 18);
+ l.consume ();
+ }
+ /* Expect token: "false". */
+ {
+ const token *tok = l.peek ();
+ ASSERT_EQ (tok->id, TOK_FALSE);
+ ASSERT_RANGE_EQ (tok->range,
+ line_offset + 21, 3, 21,
+ line_offset + 25, 3, 25);
+ l.consume ();
+ }
+ /* Expect token: "{". */
+ {
+ const token *tok = l.peek ();
+ ASSERT_EQ (tok->id, TOK_OPEN_CURLY);
+ ASSERT_RANGE_EQ (tok->range,
+ line_offset + 28, 3, 28,
+ line_offset + 28, 3, 28);
+ l.consume ();
+ }
+ /* Expect token: "}". */
+ {
+ const token *tok = l.peek ();
+ ASSERT_EQ (tok->id, TOK_CLOSE_CURLY);
+ ASSERT_RANGE_EQ (tok->range,
+ line_offset + 31, 3, 31,
+ line_offset + 31, 3, 31);
+ l.consume ();
+ }
+ /* Expect token: "\"foo\"". */
+ {
+ const token *tok = l.peek ();
+ ASSERT_EQ (tok->id, TOK_STRING);
+ ASSERT_RANGE_EQ (tok->range,
+ line_offset + 34, 3, 34,
+ line_offset + 38, 3, 38);
+ l.consume ();
+ }
+ }
+}
+
+/* Verify that the JSON lexer complains about single-line comments
+ when comments are disabled. */
+
+static void
+test_lexing_unsupported_single_line_comment ()
+{
+ error *err = NULL;
+ lexer l (false);
+ const char *str
+ /* 0 1 2 3 4 . */
+ /* 01234567890123456789012345678901234567890123456789. */
+ = (" 1066 // Hello world\n");
+ l.add_utf8 (strlen (str), str, &err);
+ ASSERT_EQ (err, NULL);
+
+ /* Line 1. */
+ {
+ const size_t line_offset = 0;
+ const int line_1 = 1;
+
+ /* Expect token: "1066" in columns 4-7. */
+ {
+ const token *tok = l.peek ();
+ ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER);
+ ASSERT_EQ (tok->u.integer_number, 1066);
+ ASSERT_RANGE_EQ (tok->range,
+ line_offset + 4, line_1, 4,
+ line_offset + 7, line_1, 7);
+ l.consume ();
+ }
+
+ /* Expect error. */
+ {
+ const token *tok = l.peek ();
+ ASSERT_EQ (tok->id, TOK_ERROR);
+ ASSERT_STREQ (tok->u.string, "unexpected character: '/'");
+ ASSERT_RANGE_EQ (tok->range,
+ line_offset + 11, line_1, 11,
+ line_offset + 11, line_1, 11);
+ l.consume ();
+ }
+ }
+}
+
+/* Verify that the JSON lexer complains about multiline comments
+ when comments are disabled. */
+
+static void
+test_lexing_unsupported_multiline_comment ()
+{
+ error *err = NULL;
+ lexer l (false);
+ const char *str
+ /* 0 1 2 3 4 . */
+ /* 01234567890123456789012345678901234567890123456789. */
+ = (" 1066 /* Hello world\n"
+ " continuation of comment\n"
+ " end of comment */ 42\n");
+ l.add_utf8 (strlen (str), str, &err);
+ ASSERT_EQ (err, NULL);
+
+ /* Line 1. */
+ {
+ const size_t line_offset = 0;
+ const int line_1 = 1;
+
+ /* Expect token: "1066" in line 1, columns 4-7. */
+ {
+ const token *tok = l.peek ();
+ ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER);
+ ASSERT_EQ (tok->u.integer_number, 1066);
+ ASSERT_RANGE_EQ (tok->range,
+ line_offset + 4, line_1, 4,
+ line_offset + 7, line_1, 7);
+ l.consume ();
+ }
+
+ /* Expect error. */
+ {
+ const token *tok = l.peek ();
+ ASSERT_EQ (tok->id, TOK_ERROR);
+ ASSERT_STREQ (tok->u.string, "unexpected character: '/'");
+ ASSERT_RANGE_EQ (tok->range,
+ line_offset + 11, line_1, 11,
+ line_offset + 11, line_1, 11);
+ l.consume ();
+ }
+ }
+}
+
+/* Verify that the JSON lexer handles single-line comments
+ when comments are enabled. */
+
+static void
+test_lexing_supported_single_line_comment ()
+{
+ error *err = NULL;
+ lexer l (true);
+ const char *str
+ /* 0 1 2 3 4 . */
+ /* 01234567890123456789012345678901234567890123456789. */
+ = (" 1066 // Hello world\n"
+ " 42 // etc\n");
+ l.add_utf8 (strlen (str), str, &err);
+ ASSERT_EQ (err, NULL);
+
+ const size_t line_1_offset = 0;
+ const size_t line_2_offset = 26;
+ const size_t line_3_offset = line_2_offset + 17;
+
+ /* Expect token: "1066" in line 1, columns 4-7. */
+ {
+ const int line_1 = 1;
+ const token *tok = l.peek ();
+ ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER);
+ ASSERT_EQ (tok->u.integer_number, 1066);
+ ASSERT_RANGE_EQ (tok->range,
+ line_1_offset + 4, line_1, 4,
+ line_1_offset + 7, line_1, 7);
+ l.consume ();
+ }
+
+ /* Expect token: "42" in line 2, columns 5-6. */
+ {
+ const int line_2 = 2;
+ const token *tok = l.peek ();
+ ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER);
+ ASSERT_EQ (tok->u.integer_number, 42);
+ ASSERT_RANGE_EQ (tok->range,
+ line_2_offset + 5, line_2, 5,
+ line_2_offset + 6, line_2, 6);
+ l.consume ();
+ }
+
+ /* Expect EOF. */
+ {
+ const int line_3 = 3;
+ const token *tok = l.peek ();
+ ASSERT_EQ (tok->id, TOK_EOF);
+ ASSERT_RANGE_EQ (tok->range,
+ line_3_offset + 0, line_3, 0,
+ line_3_offset + 0, line_3, 0);
+ l.consume ();
+ }
+}
+
+/* Verify that the JSON lexer handles multiline comments
+ when comments are enabled. */
+
+static void
+test_lexing_supported_multiline_comment ()
+{
+ error *err = NULL;
+ lexer l (true);
+ const char *str
+ /* 0 1 2 3 4 . */
+ /* 01234567890123456789012345678901234567890123456789. */
+ = (" 1066 /* Hello world\n"
+ " continuation of comment\n"
+ " end of comment */ 42\n");
+ l.add_utf8 (strlen (str), str, &err);
+ ASSERT_EQ (err, NULL);
+
+ const size_t line_1_offset = 0;
+ const size_t line_2_offset = 26;
+ const size_t line_3_offset = line_2_offset + 25;
+ const size_t line_4_offset = line_3_offset + 23;
+
+ /* Expect token: "1066" in line 1, columns 4-7. */
+ {
+ const int line_1 = 1;
+ const token *tok = l.peek ();
+ ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER);
+ ASSERT_EQ (tok->u.integer_number, 1066);
+ ASSERT_RANGE_EQ (tok->range,
+ line_1_offset + 4, line_1, 4,
+ line_1_offset + 7, line_1, 7);
+ l.consume ();
+ }
+
+ /* Expect token: "42" in line 3, columns 20-21. */
+ {
+ const int line_3 = 3;
+ const token *tok = l.peek ();
+ ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER);
+ ASSERT_EQ (tok->u.integer_number, 42);
+ ASSERT_RANGE_EQ (tok->range,
+ line_3_offset + 20, line_3, 20,
+ line_3_offset + 21, line_3, 21);
+ l.consume ();
+ }
+
+ /* Expect EOF. */
+ {
+ const int line_4 = 4;
+ const token *tok = l.peek ();
+ ASSERT_EQ (tok->id, TOK_EOF);
+ ASSERT_RANGE_EQ (tok->range,
+ line_4_offset + 0, line_4, 0,
+ line_4_offset + 0, line_4, 0);
+ l.consume ();
+ }
+}
+
+/* Concrete implementation of location_map for use in
+ JSON parsing selftests. */
+
+class test_location_map : public location_map
+{
+public:
+ void record_range_for_value (json::value *jv, const range &r) final override
+ {
+ m_map.put (jv, r);
+ }
+
+ range *get_range_for_value (json::value *jv)
+ {
+ return m_map.get (jv);
+ }
+
+private:
+ hash_map<json::value *, range> m_map;
+};
+
+/* Verify that parse_utf8_string works as expected. */
+
+static void
+test_parse_string ()
+{
+ const int line_1 = 1;
+ test_location_map loc_map;
+ error *err = NULL;
+ json::value *jv = parse_utf8_string ("\"foo\"", false, &err, &loc_map);
+ ASSERT_EQ (err, NULL);
+ ASSERT_EQ (jv->get_kind (), JSON_STRING);
+ ASSERT_STREQ (as_a <json::string *> (jv)->get_string (), "foo");
+ assert_print_eq (*jv, "\"foo\"");
+ location_map::range *range = loc_map.get_range_for_value (jv);
+ ASSERT_TRUE (range);
+ ASSERT_RANGE_EQ (*range,
+ 0, line_1, 0,
+ 4, line_1, 4);
+ delete jv;
+
+ const char *contains_quotes = "\"before \\\"quoted\\\" after\"";
+ jv = parse_utf8_string (contains_quotes, false, &err, &loc_map);
+ ASSERT_EQ (err, NULL);
+ ASSERT_EQ (jv->get_kind (), JSON_STRING);
+ ASSERT_STREQ (as_a <json::string *> (jv)->get_string (),
+ "before \"quoted\" after");
+ assert_print_eq (*jv, contains_quotes);
+ range = loc_map.get_range_for_value (jv);
+ ASSERT_TRUE (range);
+ ASSERT_RANGE_EQ (*range,
+ 0, line_1, 0,
+ 24, line_1, 24);
+ delete jv;
+
+ /* Test of non-ASCII input. This string is the Japanese word "mojibake",
+ written as C octal-escaped UTF-8. */
+ const char *mojibake = (/* Opening quote. */
+ "\""
+ /* U+6587 CJK UNIFIED IDEOGRAPH-6587
+ UTF-8: 0xE6 0x96 0x87
+ C octal escaped UTF-8: \346\226\207. */
+ "\346\226\207"
+ /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
+ UTF-8: 0xE5 0xAD 0x97
+ C octal escaped UTF-8: \345\255\227. */
+ "\345\255\227"
+ /* U+5316 CJK UNIFIED IDEOGRAPH-5316
+ UTF-8: 0xE5 0x8C 0x96
+ C octal escaped UTF-8: \345\214\226. */
+ "\345\214\226"
+ /* U+3051 HIRAGANA LETTER KE
+ UTF-8: 0xE3 0x81 0x91
+ C octal escaped UTF-8: \343\201\221. */
+ "\343\201\221"
+ /* Closing quote. */
+ "\"");
+ jv = parse_utf8_string (mojibake, false, &err, &loc_map);
+ ASSERT_EQ (err, NULL);
+ ASSERT_EQ (jv->get_kind (), JSON_STRING);
+ /* Result of get_string should be UTF-8 encoded, without quotes. */
+ ASSERT_STREQ (as_a <json::string *> (jv)->get_string (),
+ "\346\226\207" "\345\255\227" "\345\214\226" "\343\201\221");
+ /* Result of dump should be UTF-8 encoded, with quotes. */
+ assert_print_eq (*jv, mojibake);
+ range = loc_map.get_range_for_value (jv);
+ ASSERT_TRUE (range);
+ ASSERT_RANGE_EQ (*range,
+ 0, line_1, 0,
+ 5, line_1, 5);
+ delete jv;
+
+ /* Test of \u-escaped unicode. This is "mojibake" again, as above. */
+ const char *escaped_unicode = "\"\\u6587\\u5b57\\u5316\\u3051\"";
+ jv = parse_utf8_string (escaped_unicode, false, &err, &loc_map);
+ ASSERT_EQ (err, NULL);
+ ASSERT_EQ (jv->get_kind (), JSON_STRING);
+ /* Result of get_string should be UTF-8 encoded, without quotes. */
+ ASSERT_STREQ (as_a <json::string *> (jv)->get_string (),
+ "\346\226\207" "\345\255\227" "\345\214\226" "\343\201\221");
+ /* Result of dump should be UTF-8 encoded, with quotes. */
+ assert_print_eq (*jv, mojibake);
+ range = loc_map.get_range_for_value (jv);
+ ASSERT_TRUE (range);
+ ASSERT_RANGE_EQ (*range,
+ 0, line_1, 0,
+ 25, line_1, 25);
+ delete jv;
+}
+
+/* Verify that we can parse various kinds of JSON numbers. */
+
+static void
+test_parse_number ()
+{
+ const int line_1 = 1;
+ test_location_map loc_map;
+ json::value *jv;
+
+ error *err = NULL;
+ jv = parse_utf8_string ("42", false, &err, &loc_map);
+ ASSERT_EQ (err, NULL);
+ ASSERT_EQ (jv->get_kind (), JSON_INTEGER);
+ ASSERT_EQ (as_a <json::integer_number *> (jv)->get (), 42.0);
+ assert_print_eq (*jv, "42");
+ location_map::range *range = loc_map.get_range_for_value (jv);
+ ASSERT_TRUE (range);
+ ASSERT_RANGE_EQ (*range,
+ 0, line_1, 0,
+ 1, line_1, 1);
+ delete jv;
+
+ /* Negative number. */
+ jv = parse_utf8_string ("-17", false, &err, &loc_map);
+ ASSERT_EQ (err, NULL);
+ ASSERT_EQ (jv->get_kind (), JSON_INTEGER);
+ ASSERT_EQ (as_a<json::integer_number *> (jv)->get (), -17.0);
+ assert_print_eq (*jv, "-17");
+ range = loc_map.get_range_for_value (jv);
+ ASSERT_TRUE (range);
+ ASSERT_RANGE_EQ (*range,
+ 0, line_1, 0,
+ 2, line_1, 2);
+ delete jv;
+
+ /* Decimal. */
+ jv = parse_utf8_string ("3.141", false, &err, &loc_map);
+ ASSERT_EQ (err, NULL);
+ ASSERT_EQ (JSON_FLOAT, jv->get_kind ());
+ ASSERT_EQ (3.141, ((json::float_number *)jv)->get ());
+ assert_print_eq (*jv, "3.141");
+ range = loc_map.get_range_for_value (jv);
+ ASSERT_TRUE (range);
+ ASSERT_RANGE_EQ (*range,
+ 0, line_1, 0,
+ 4, line_1, 4);
+ delete jv;
+
+ /* Exponents. */
+ jv = parse_utf8_string ("3.141e+0", false, &err, &loc_map);
+ ASSERT_EQ (err, NULL);
+ ASSERT_EQ (jv->get_kind (), JSON_FLOAT);
+ ASSERT_EQ (as_a <json::float_number *> (jv)->get (), 3.141);
+ assert_print_eq (*jv, "3.141");
+ range = loc_map.get_range_for_value (jv);
+ ASSERT_TRUE (range);
+ ASSERT_RANGE_EQ (*range,
+ 0, line_1, 0,
+ 7, line_1, 7);
+ delete jv;
+
+ jv = parse_utf8_string ("42e2", false, &err, &loc_map);
+ ASSERT_EQ (err, NULL);
+ ASSERT_EQ (jv->get_kind (), JSON_INTEGER);
+ ASSERT_EQ (as_a <json::integer_number *> (jv)->get (), 4200);
+ assert_print_eq (*jv, "4200");
+ range = loc_map.get_range_for_value (jv);
+ ASSERT_TRUE (range);
+ ASSERT_RANGE_EQ (*range,
+ 0, line_1, 0,
+ 3, line_1, 3);
+ delete jv;
+
+ jv = parse_utf8_string ("42e-1", false, &err, &loc_map);
+ ASSERT_EQ (err, NULL);
+ ASSERT_EQ (jv->get_kind (), JSON_FLOAT);
+ ASSERT_EQ (as_a <json::float_number *> (jv)->get (), 4.2);
+ assert_print_eq (*jv, "4.2");
+ range = loc_map.get_range_for_value (jv);
+ ASSERT_TRUE (range);
+ ASSERT_RANGE_EQ (*range,
+ 0, line_1, 0,
+ 4, line_1, 4);
+ delete jv;
+}
+
+/* Verify that JSON array parsing works. */
+
+static void
+test_parse_array ()
+{
+ const int line_1 = 1;
+ test_location_map loc_map;
+ json::value *jv;
+
+ error *err = NULL;
+ jv = parse_utf8_string ("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", false,
+ &err, &loc_map);
+ ASSERT_EQ (err, NULL);
+ ASSERT_EQ (jv->get_kind (), JSON_ARRAY);
+ json::array *arr = as_a <json::array *> (jv);
+ ASSERT_EQ (arr->length (), 10);
+ location_map::range *range = loc_map.get_range_for_value (jv);
+ ASSERT_TRUE (range);
+ ASSERT_RANGE_EQ (*range,
+ 0, line_1, 0,
+ 29, line_1, 29);
+ for (int i = 0; i < 10; i++)
+ {
+ json::value *element = arr->get (i);
+ ASSERT_EQ (element->get_kind (), JSON_INTEGER);
+ ASSERT_EQ (as_a <json::integer_number *> (element)->get (), i);
+ range = loc_map.get_range_for_value (element);
+ ASSERT_TRUE (range);
+ const int offset = 1 + (i * 3);
+ ASSERT_RANGE_EQ (*range,
+ offset, line_1, offset,
+ offset, line_1, offset);
+ }
+ assert_print_eq (*jv, "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]");
+
+ delete jv;
+}
+
+/* Verify that JSON object parsing works. */
+
+static void
+test_parse_object ()
+{
+ const int line_1 = 1;
+ test_location_map loc_map;
+ error *err = NULL;
+ json::value *jv
+ /* 0 1 2 3 . */
+ /* 01 2345 678 9012 345 6789 0123456789012. */
+ = parse_utf8_string ("{\"foo\": \"bar\", \"baz\": [42, null]}",
+ false, &err, &loc_map);
+ ASSERT_EQ (err, NULL);
+ ASSERT_NE (jv, NULL);
+ ASSERT_EQ (jv->get_kind (), JSON_OBJECT);
+ location_map::range *range = loc_map.get_range_for_value (jv);
+ ASSERT_TRUE (range);
+ ASSERT_RANGE_EQ (*range,
+ 0, line_1, 0,
+ 32, line_1, 32);
+ json::object *jo = static_cast <json::object *> (jv);
+
+ json::value *foo_value = jo->get ("foo");
+ ASSERT_NE (foo_value, NULL);
+ ASSERT_EQ (foo_value->get_kind (), JSON_STRING);
+ ASSERT_STREQ (as_a <json::string *> (foo_value)->get_string (), "bar");
+ range = loc_map.get_range_for_value (foo_value);
+ ASSERT_TRUE (range);
+ ASSERT_RANGE_EQ (*range,
+ 8, line_1, 8,
+ 12, line_1, 12);
+
+ json::value *baz_value = jo->get ("baz");
+ ASSERT_NE (baz_value, NULL);
+ ASSERT_EQ (baz_value->get_kind (), JSON_ARRAY);
+ range = loc_map.get_range_for_value (baz_value);
+ ASSERT_TRUE (range);
+ ASSERT_RANGE_EQ (*range,
+ 22, line_1, 22,
+ 31, line_1, 31);
+
+ json::array *baz_array = as_a <json::array *> (baz_value);
+ ASSERT_EQ (baz_array->length (), 2);
+
+ json::value *element0 = baz_array->get (0);
+ ASSERT_EQ (as_a <json::integer_number *> (element0)->get (), 42);
+ range = loc_map.get_range_for_value (element0);
+ ASSERT_TRUE (range);
+ ASSERT_RANGE_EQ (*range,
+ 23, line_1, 23,
+ 24, line_1, 24);
+
+ json::value *element1 = baz_array->get (1);
+ ASSERT_EQ (element1->get_kind (), JSON_NULL);
+ range = loc_map.get_range_for_value (element1);
+ ASSERT_TRUE (range);
+ ASSERT_RANGE_EQ (*range,
+ 27, line_1, 27,
+ 30, line_1, 30);
+
+ delete jv;
+}
+
+/* Verify that the JSON literals "true", "false" and "null" are parsed
+ correctly. */
+
+static void
+test_parse_literals ()
+{
+ const int line_1 = 1;
+ test_location_map loc_map;
+ json::value *jv;
+ error *err = NULL;
+ jv = parse_utf8_string ("true", false, &err, &loc_map);
+ ASSERT_EQ (err, NULL);
+ ASSERT_NE (jv, NULL);
+ ASSERT_EQ (jv->get_kind (), JSON_TRUE);
+ assert_print_eq (*jv, "true");
+ location_map::range *range = loc_map.get_range_for_value (jv);
+ ASSERT_TRUE (range);
+ ASSERT_RANGE_EQ (*range,
+ 0, line_1, 0,
+ 3, line_1, 3);
+ delete jv;
+
+ jv = parse_utf8_string ("false", false, &err, &loc_map);
+ ASSERT_EQ (err, NULL);
+ ASSERT_NE (jv, NULL);
+ ASSERT_EQ (jv->get_kind (), JSON_FALSE);
+ assert_print_eq (*jv, "false");
+ range = loc_map.get_range_for_value (jv);
+ ASSERT_TRUE (range);
+ ASSERT_RANGE_EQ (*range,
+ 0, line_1, 0,
+ 4, line_1, 4);
+ delete jv;
+
+ jv = parse_utf8_string ("null", false, &err, &loc_map);
+ ASSERT_EQ (err, NULL);
+ ASSERT_NE (jv, NULL);
+ ASSERT_EQ (jv->get_kind (), JSON_NULL);
+ assert_print_eq (*jv, "null");
+ range = loc_map.get_range_for_value (jv);
+ ASSERT_TRUE (range);
+ ASSERT_RANGE_EQ (*range,
+ 0, line_1, 0,
+ 3, line_1, 3);
+ delete jv;
+}
+
+/* Verify that we can parse a simple JSON-RPC request. */
+
+static void
+test_parse_jsonrpc ()
+{
+ test_location_map loc_map;
+ error *err = NULL;
+ const char *request
+ /* 0 1 2 3 4. */
+ /* 01 23456789 012 3456 789 0123456 789 012345678 90. */
+ = ("{\"jsonrpc\": \"2.0\", \"method\": \"subtract\",\n"
+ /* 0 1 2 3 4. */
+ /* 0 1234567 8901234567890 1234 56789012345678 90. */
+ " \"params\": [42, 23], \"id\": 1}");
+ const int line_1 = 1;
+ const int line_2 = 2;
+ const size_t line_2_offset = 41;
+ json::value *jv = parse_utf8_string (request, false, &err, &loc_map);
+ ASSERT_EQ (err, NULL);
+ ASSERT_NE (jv, NULL);
+ location_map::range *range = loc_map.get_range_for_value (jv);
+ ASSERT_TRUE (range);
+ ASSERT_RANGE_EQ (*range,
+ 0, line_1, 0,
+ line_2_offset + 28, line_2, 28);
+ delete jv;
+}
+
+/* Verify that we can parse an empty JSON object. */
+
+static void
+test_parse_empty_object ()
+{
+ const int line_1 = 1;
+ test_location_map loc_map;
+ error *err = NULL;
+ json::value *jv = parse_utf8_string ("{}", false, &err, &loc_map);
+ ASSERT_EQ (err, NULL);
+ ASSERT_NE (jv, NULL);
+ ASSERT_EQ (jv->get_kind (), JSON_OBJECT);
+ assert_print_eq (*jv, "{}");
+ location_map::range *range = loc_map.get_range_for_value (jv);
+ ASSERT_TRUE (range);
+ ASSERT_RANGE_EQ (*range,
+ 0, line_1, 0,
+ 1, line_1, 1);
+ delete jv;
+}
+
+/* Verify that comment-parsing can be enabled or disabled. */
+
+static void
+test_parsing_comments ()
+{
+ const char *str = ("// foo\n"
+ "/*...\n"
+ "...*/ 42 // bar\n"
+ "/* etc */\n");
+
+ /* Parsing with comment support disabled. */
+ {
+ error *err = NULL;
+ json::value *jv = parse_utf8_string (str, false, &err, NULL);
+ ASSERT_NE (err, NULL);
+ ASSERT_STREQ (err->get_msg (),
+ "invalid JSON token: unexpected character: '/'");
+ ASSERT_EQ (jv, NULL);
+ }
+
+ /* Parsing with comment support enabled. */
+ {
+ error *err = NULL;
+ json::value *jv = parse_utf8_string (str, true, &err, NULL);
+ ASSERT_EQ (err, NULL);
+ ASSERT_NE (jv, NULL);
+ ASSERT_EQ (jv->get_kind (), JSON_INTEGER);
+ ASSERT_EQ (((json::integer_number *)jv)->get (), 42);
+ delete jv;
+ }
+}
+
+/* Verify that we can parse an empty JSON string. */
+
+static void
+test_error_empty_string ()
+{
+ const int line_1 = 1;
+ error *err = NULL;
+ json::value *jv = parse_utf8_string ("", false, &err, NULL);
+ ASSERT_ERR_EQ (err,
+ 0, line_1, 0,
+ 0, line_1, 0,
+ "expected a JSON value but got EOF");
+ ASSERT_EQ (jv, NULL);
+ delete err;
+}
+
+/* Verify that JSON parsing gracefully handles an invalid token. */
+
+static void
+test_error_bad_token ()
+{
+ const int line_1 = 1;
+ error *err = NULL;
+ json::value *jv = parse_utf8_string (" not valid ", false, &err, NULL);
+ ASSERT_ERR_EQ (err,
+ 2, line_1, 2,
+ 2, line_1, 2,
+ "invalid JSON token: unexpected character: 'n'");
+ ASSERT_EQ (jv, NULL);
+ delete err;
+}
+
+/* Verify that JSON parsing gracefully handles a missing comma
+ within an object. */
+
+static void
+test_error_object_with_missing_comma ()
+{
+ const int line_1 = 1;
+ error *err = NULL;
+ /* 0 1 2. */
+ /* 01 2345 6789012 3456 7890. */
+ const char *json = "{\"foo\" : 42 \"bar\"";
+ json::value *jv = parse_utf8_string (json, false, &err, NULL);
+ ASSERT_ERR_EQ (err,
+ 12, line_1, 12,
+ 16, line_1, 16,
+ "expected ',' or '}'; got string");
+ ASSERT_EQ (jv, NULL);
+ delete err;
+}
+
+/* Verify that JSON parsing gracefully handles a missing comma
+ within an array. */
+
+static void
+test_error_array_with_missing_comma ()
+{
+ const int line_1 = 1;
+ error *err = NULL;
+ /* 01234567. */
+ const char *json = "[0, 1 42]";
+ json::value *jv = parse_utf8_string (json, false, &err, NULL);
+ ASSERT_ERR_EQ (err,
+ 6, line_1, 6,
+ 7, line_1, 7,
+ "expected ',' or ']'; got number");
+ ASSERT_EQ (jv, NULL);
+ delete err;
+}
+
+/* Run all of the selftests within this file. */
+
+void
+json_parser_cc_tests ()
+{
+ test_lexer ();
+ test_lexing_unsupported_single_line_comment ();
+ test_lexing_unsupported_multiline_comment ();
+ test_lexing_supported_single_line_comment ();
+ test_lexing_supported_multiline_comment ();
+ test_parse_string ();
+ test_parse_number ();
+ test_parse_array ();
+ test_parse_object ();
+ test_parse_literals ();
+ test_parse_jsonrpc ();
+ test_parse_empty_object ();
+ test_parsing_comments ();
+ test_error_empty_string ();
+ test_error_bad_token ();
+ test_error_object_with_missing_comma ();
+ test_error_array_with_missing_comma ();
+}
+
+} // namespace selftest
+
+#endif /* #if CHECKING_P */
new file mode 100644
@@ -0,0 +1,94 @@
+/* JSON parsing
+ Copyright (C) 2017-2022 Free Software Foundation, Inc.
+ Contributed by David Malcolm <dmalcolm@redhat.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef GCC_JSON_PARSING_H
+#define GCC_JSON_PARSING_H
+
+#include "json.h"
+
+namespace json
+{
+
+/* Declarations for parsing JSON to a json::value * tree. */
+
+/* Abstract base class for recording what the locations of JSON values
+ were as they parsed. */
+
+class location_map
+{
+public:
+ /* A point within the JSON input file. */
+ struct point
+ {
+ size_t m_unichar_idx; /* zero-based. */
+ int m_line; /* one-based. */
+ int m_column; /* zero-based unichar count. */
+ };
+
+ /* A range of points within the JSON input file.
+ Both endpoints are part of the range. */
+ struct range
+ {
+ point m_start;
+ point m_end;
+ };
+
+ virtual ~location_map () {}
+ virtual void record_range_for_value (json::value *jv, const range &r) = 0;
+ virtual void on_finished_parsing () {}
+};
+
+/* Class for recording an error within a JSON file. */
+
+class error
+{
+public:
+ error (const location_map::range &r, char *msg)
+ : m_range (r), m_msg (msg)
+ {
+ }
+ ~error ()
+ {
+ free (m_msg);
+ }
+
+ const location_map::range &get_range () const { return m_range; }
+ const char *get_msg () const { return m_msg; }
+
+private:
+ location_map::range m_range;
+ char *m_msg;
+};
+
+/* Functions for parsing JSON buffers. */
+
+extern value *parse_utf8_string (size_t length,
+ const char *utf8_buf,
+ bool allow_comments,
+ error **err_out,
+ location_map *out_loc_map);
+extern value *parse_utf8_string (const char *utf8,
+ bool allow_comments,
+ error **err_out,
+ location_map *out_loc_map);
+
+} // namespace json
+
+#endif /* GCC_JSON_PARSING_H */
@@ -264,7 +264,7 @@ namespace selftest {
/* Verify that JV->print () prints EXPECTED_JSON. */
-static void
+void
assert_print_eq (const json::value &jv, const char *expected_json)
{
pretty_printer pp;
@@ -27,8 +27,8 @@ along with GCC; see the file COPYING3. If not see
and http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
and https://tools.ietf.org/html/rfc7159
- Supports creating a DOM-like tree of json::value *, and then dumping
- json::value * to text. */
+ Supports parsing text into a DOM-like tree of json::value *, directly
+ creating such trees, and dumping json::value * to text. */
namespace json
{
@@ -114,6 +114,11 @@ class array : public value
void append (value *v);
+ value **begin () { return m_elements.begin (); }
+ value **end () { return m_elements.end (); }
+ size_t length () const { return m_elements.length (); }
+ value *get (size_t idx) const { return m_elements[idx]; }
+
private:
auto_vec<value *> m_elements;
};
@@ -188,4 +193,54 @@ class literal : public value
} // namespace json
+template <>
+template <>
+inline bool
+is_a_helper <json::object *>::test (json::value *jv)
+{
+ return jv->get_kind () == json::JSON_OBJECT;
+}
+
+template <>
+template <>
+inline bool
+is_a_helper <json::array *>::test (json::value *jv)
+{
+ return jv->get_kind () == json::JSON_ARRAY;
+}
+
+template <>
+template <>
+inline bool
+is_a_helper <json::float_number *>::test (json::value *jv)
+{
+ return jv->get_kind () == json::JSON_FLOAT;
+}
+
+template <>
+template <>
+inline bool
+is_a_helper <json::integer_number *>::test (json::value *jv)
+{
+ return jv->get_kind () == json::JSON_INTEGER;
+}
+
+template <>
+template <>
+inline bool
+is_a_helper <json::string *>::test (json::value *jv)
+{
+ return jv->get_kind () == json::JSON_STRING;
+}
+
+#if CHECKING_P
+
+namespace selftest {
+
+extern void assert_print_eq (const json::value &jv, const char *expected_json);
+
+} // namespace selftest
+
+#endif /* #if CHECKING_P */
+
#endif /* GCC_JSON_H */
@@ -74,6 +74,7 @@ selftest::run_tests ()
opt_suggestions_cc_tests ();
opts_cc_tests ();
json_cc_tests ();
+ json_parser_cc_tests ();
cgraph_cc_tests ();
optinfo_emit_json_cc_tests ();
ordered_hash_map_tests_cc_tests ();
@@ -237,6 +237,7 @@ extern void hash_map_tests_cc_tests ();
extern void hash_set_tests_cc_tests ();
extern void input_cc_tests ();
extern void json_cc_tests ();
+extern void json_parser_cc_tests ();
extern void optinfo_emit_json_cc_tests ();
extern void opts_cc_tests ();
extern void ordered_hash_map_tests_cc_tests ();