2
* Copyright (c) 2015, 2024, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation.
9
* This code is distributed in the hope that it will be useful, but WITHOUT
10
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12
* version 2 for more details (a copy is included in the LICENSE file that
13
* accompanied this code).
15
* You should have received a copy of the GNU General Public License version
16
* 2 along with this work; if not, write to the Free Software Foundation,
17
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
* or visit www.oracle.com if you need additional information or have any
26
* This is not really json in the state it is now.
28
* - Double quotes around the key in an object is not enforced.
29
* i.e you can write: { foo : "bar" } instead of { "foo" : "bar" }.
30
* - Comments are allowed.
31
* - The last element in an object or array can have an ending comma.
34
#include "precompiled.hpp"
35
#include "utilities/json.hpp"
36
#include "utilities/ostream.hpp"
39
static const char* strchrnul_(const char *s, int c) {
40
const char* tmp = strchr(s, c);
41
return tmp == nullptr ? s + strlen(s) : tmp;
44
JSON::JSON(const char* text, bool silent, outputStream* st)
45
: _st(st), start(text), pos(text), mark(text),
46
level(0), line(1), column(0), silent(silent), _valid(true)
51
assert(start != nullptr, "Need something to parse");
52
if (start == nullptr) {
54
error(INTERNAL_ERROR, "JSON parser was called with a string that was null.");
56
_valid = parse_json_value();
64
bool JSON::parse_json_value() {
72
// Must start with object or array
77
if (parse_json_object() == false) {
83
error(SYNTAX_ERROR, "Only one top level object/array is allowed.");
91
if (parse_json_array() == false) {
97
error(SYNTAX_ERROR, "Only one top level object/array is allowed.");
105
error(SYNTAX_ERROR, "EOS was encountered before any json declarations");
109
error(SYNTAX_ERROR, "Json must start with an object or an array.");
112
} else { // level > 0
115
return parse_json_object();
118
return parse_json_array();
121
return parse_json_string();
124
case '1': case '2': case '3':
125
case '4': case '5': case '6':
126
case '7': case '8': case '9':
127
return parse_json_number();
130
return parse_json_symbol("true", JSON_TRUE);
133
return parse_json_symbol("false", JSON_FALSE);
136
return parse_json_symbol("null", JSON_NULL);
139
error(SYNTAX_ERROR, "EOS was encountered when expecting a json value.");
143
error(SYNTAX_ERROR, "Could not parse as a json value (did you forget to quote your strings?).");
149
// Should only be called when we actually have the start of an object
150
// Otherwise it is an internal error
151
bool JSON::parse_json_object() {
152
NOT_PRODUCT(const char* prev_pos);
156
// Check that we are not called in error
157
if (expect_any("{", "object start", INTERNAL_ERROR) <= 0) {
161
if (!callback(JSON_OBJECT_BEGIN, nullptr, level++)) {
169
error(SYNTAX_ERROR, "EOS when expecting an object key or object end");
173
} else if (c == '}') {
174
// We got here from either empty object "{}" or ending comma "{a:1,}"
179
NOT_PRODUCT(prev_pos = pos);
180
if (parse_json_key() == false) {
183
assert(pos > prev_pos, "parsing stalled");
187
if (expect_any(":", "object key-value separator") <= 0) {
193
NOT_PRODUCT(prev_pos = pos);
194
if (parse_json_value() == false) {
197
assert(pos > prev_pos, "parsing stalled");
201
if (expect_any(",}", "value separator or object end") <= 0) {
209
assert(c == '}', "array parsing ended without object end token ('}')");
210
return callback(JSON_OBJECT_END, nullptr, --level);
213
// Should only be called when we actually have the start of an array
214
// Otherwise it is an internal error
215
bool JSON::parse_json_array() {
216
NOT_PRODUCT(const char* prev_pos);
220
// Check that we are not called in error
221
if (expect_any("[", "array start character", INTERNAL_ERROR) <= 0) {
225
if (!callback(JSON_ARRAY_BEGIN, nullptr, level++)) {
233
error(SYNTAX_ERROR, "EOS when expecting a json value or array end");
237
} else if (c == ']') {
238
// We got here from either empty array "[]" or ending comma "[1,]"
244
NOT_PRODUCT(prev_pos = pos);
245
if (parse_json_value() == false) {
248
assert(pos > prev_pos, "parsing stalled");
252
if (expect_any(",]", "value separator or array end") <= 0) {
260
assert(c == ']', "array parsing ended without array end token (']')");
261
return callback(JSON_ARRAY_END, nullptr, --level);
264
bool JSON::parse_json_string(bool key) {
269
if (expect_any("\"", "string start character", INTERNAL_ERROR) <= 0) {
273
end = strchr(pos, '"'); // TODO: escapes
274
if (end == nullptr) {
275
error(SYNTAX_ERROR, "String started here never ended. Expected \'\"\' before EOS.");
280
v.str.length = end - pos;
283
if (expect_any("\"", "string end character", INTERNAL_ERROR) <= 0) {
288
return callback(JSON_KEY, &v, level);
290
return callback(JSON_STRING, &v, level);
294
// TODO: hotspot equivalents?
295
static bool is_alpha(u_char c) {
296
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
298
static bool is_numeric(u_char c) {
299
return (c >= '0' && c <= '9');
301
static bool is_alnum(u_char c) {
302
return is_alpha(c) || is_numeric(c);
304
static bool is_word(u_char c) {
305
return c == '_' || is_alnum(c);
308
// Allow object keys to be without quotation,
309
// but then restrict to ([a-zA-Z0-9_])+
310
bool JSON::parse_json_key() {
318
return parse_json_string(true);
324
error(SYNTAX_ERROR, "Got EOS when expecting an object key.");
326
} else if (is_word(c) == false) {
327
error(SYNTAX_ERROR, "Expected an object key, which can be a double-quoted (\") string or a simple string (only alphanumeric characters and underscore, separated by whitespace) that doesn't need to be quoted.");
333
// Allow the key to be delimited by control characters and the object key-value separator ':'
334
if (c <= ' ' || c == ':') {
336
} else if (is_word(c) == false) {
337
error(SYNTAX_ERROR, "Object key need to be quoted, or consist entirely of alphanumeric characters and underscores.");
344
v.str.length = pos - begin;
345
return callback(JSON_KEY, &v, level);
348
bool JSON::parse_json_number() {
355
// Parsing number - for simplicity ints are limited to 2**53
356
// sscanf as a double and check if part is 0.
357
tokens = sscanf(pos, "%lf%n", &double_value, &read);
358
assert(tokens <= 1, "scanf implementation that counts as a token, parsing json numbers will always fail");
360
assert(read > 0, "sanity");
362
if (floor(double_value) == double_value) {
363
// No exponent - treat as an int
364
v.int_value = (int)double_value;
365
if (!callback(JSON_NUMBER_INT, &v, level)) {
369
v.double_value = double_value;
370
if (!callback(JSON_NUMBER_FLOAT, &v, level)) {
378
error(SYNTAX_ERROR, "Couldn't parse json number (note that exponents are not supported).");
382
bool JSON::parse_json_symbol(const char* name, JSON_TYPE symbol) {
383
if (expect_string(name, "maybe you forgot to quote your strings?") == false) {
387
return callback(symbol, nullptr, level);
390
void JSON::mark_pos() {
391
assert((mark == start || *(mark - 1)) != 0, "buffer overrun");
392
assert(mark <= pos, "mark runahead");
398
assert(c != 0, "pos buffer overrun?");
409
assert(mark <= pos, "mark runahead");
413
assert((pos == start || *(pos - 1)) != 0, "buffer overrun");
426
// Peek ahead i chars (0 is same as peek())
427
u_char JSON::peek(size_t i) {
433
while (i > 0 && c != 0) {
442
* Check that one of the expected characters is next in the stream.
443
* If not, it is an error.
444
* Returns 0 if EOS is encountered.
445
* Returns -1 if the next character was not one of the expected.
446
* Otherwise consumes and returns the expected character that was encountered.
448
int JSON::expect_any(const char* valid_chars, const char* error_msg, JSON_ERROR e) {
452
len = strlen(valid_chars);
453
assert(len > 0, "need non-empty string");
457
error(e, "Got EOS when expecting %s (%s\'%s\').", error_msg, len > 1 ? "one of " : "", valid_chars);
460
for (size_t i = 0; i < len; i++) {
461
if (c == valid_chars[i]) {
465
error(e, "Expected %s (%s\'%s\').", error_msg, len > 1 ? "one of " : "", valid_chars);
470
* Check that the expected string is next in the stream.
471
* If not, it is an error.
472
* Consumes the expected characters if they are present.
473
* Returns true if the expected characters were present, otherwise false.
475
bool JSON::expect_string(const char* expected_string, const char* error_msg, JSON_ERROR e) {
476
u_char c, expected_char;
479
assert(expected_string != nullptr, "need non-null string");
480
len = strlen(expected_string);
481
assert(len > 0, "need non-empty string");
483
for (size_t i = 0; i < len; i++) {
484
expected_char = expected_string[i];
485
assert(expected_char > ' ', "not sane for control characters");
486
if (expected_char <= ' ') {
487
error(INTERNAL_ERROR, "expect got a control char");
491
error(e, "EOS encountered when expecting %s (\"%s\")", error_msg, expected_string);
493
} else if (c != expected_char) {
494
error(e, "Expected \"%s\" (%s)", expected_string, error_msg);
504
* Returns number of characters skipped.
506
size_t JSON::skip(size_t i) {
511
for (j = i; c != 0 && j > 0; j--) {
518
* Skip whitespace and comments.
519
* Returns the first token after whitespace/comments without consuming it
520
* Returns 0 if EOS is encountered.
521
* Returns -1 if there is an error
523
int JSON::skip_to_token() {
529
c = skip_line_comment();
530
} else if (c2 == '*') {
531
c = skip_block_comment();
536
// Fall through to keep checking if there
537
// are more whitespace / comments to skip
539
if (c == 0 || c > ' ') {
548
* Skip to, and return the wanted char without consuming it
549
* Returns 0 if EOS is encountered.
551
u_char JSON::skip_to(u_char want) {
552
// We want the bookkeeping done in next().
553
// Otherwise strchr could have been used.
557
if (c == 0 || c == want) {
565
* Should only be called when we actually have a line comment to skip.
566
* Otherwise it is an internal error.
568
* Will return the first token after the line comment without consuming it.
569
* Returns 0 if EOS is encountered.
571
u_char JSON::skip_line_comment() {
574
// Check that we are not called in error
575
expect_any("/", "line comment start", INTERNAL_ERROR);
576
expect_any("/", "line comment start", INTERNAL_ERROR);
587
* Should only be called when we actually have a block comment to skip.
588
* Otherwise it is an internal error.
590
* Returns the first token after the block comment without consuming it.
591
* Returns -1 if EOS is encountered in the middle of a comment.
593
int JSON::skip_block_comment() {
596
// Check that we are not called in error.
597
if (peek() != '/' || peek(1) != '*') {
598
// Let expect handle EOS.
599
expect_string("/*", "block comment start", INTERNAL_ERROR);
605
current = strchrnul_(current, '*');
607
if (current[0] == 0 || current[1] == 0) {
608
// Advance error marker to start of block comment
610
error(SYNTAX_ERROR, "Block comment started here never ended. Expected \"*/\" before EOS.");
614
if (current[1] == '/') {
616
if (expect_string("*/", "block comment end", INTERNAL_ERROR) == false) {
619
// Found block comment end
626
const char* JSON::strerror(JSON_ERROR e) {
629
return "Syntax error";
631
return "Internal error";
635
return "Value error";
637
ShouldNotReachHere();
638
return "Unknown error";
642
void JSON::error(JSON_ERROR e, const char* format, ...) {
646
const char* line_start;
652
_st->print("%s on line %u byte %u: ", JSON::strerror(e), line, column + 1);
653
va_start(args, format);
654
_st->vprint(format, args);
658
line_start = mark - column;
659
assert(line_start >= start, "out of bounds");
660
assert(line_start <= mark, "out of bounds");
661
assert(line_start == start || line_start[-1] == '\n', "line counting error");
666
_st->print_cr("EOS.");
681
// Skip to newline or EOS
682
tmp = strchrnul_(mark, '\n');
683
line_length = tmp - line_start;
685
_st->print_cr("%s", line_start);