llama

Форк
0
/
test-grammar-integration.cpp 
1310 строк · 35.0 Кб
1
#ifdef NDEBUG
2
#undef NDEBUG
3
#endif
4

5
#include "unicode.h"
6
#include "llama-grammar.h"
7
#include "json-schema-to-grammar.h"
8

9
#include <cassert>
10
#include <string>
11
#include <vector>
12

13
using json = nlohmann::ordered_json;
14

15
static llama_grammar * build_grammar(const std::string & grammar_str) {
16
    return llama_grammar_init_impl(nullptr, grammar_str.c_str(), "root");
17
}
18

19
static bool test_build_grammar_fails(const std::string & grammar_str) {
20
    fprintf(stderr, "⚫ Testing failure for grammar: %s\n", grammar_str.c_str());
21
    bool grammar_fails = false;
22
    llama_grammar * grammar = build_grammar(grammar_str);
23
    if (grammar != nullptr) {
24
        fprintf(stderr, "  ❌ Expected build failure, but succeeded\n");
25
    } else {
26
        grammar_fails = true;
27
        fprintf(stdout, "  ✅︎\n");
28
    }
29
    return grammar_fails;
30
}
31

32
static bool match_string(const std::string & input, llama_grammar * grammar) {
33
    const auto cpts = unicode_cpts_from_utf8(input);
34

35
    const llama_grammar_rules  & rules      = llama_grammar_get_rules (grammar);
36
          llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(grammar);
37

38
    for (const auto & cpt : cpts) {
39
        const llama_grammar_stacks stacks_prev = llama_grammar_get_stacks(grammar); // copy
40

41
        llama_grammar_accept(rules, stacks_prev, cpt, stacks_cur);
42

43
        if (stacks_cur.empty()) {
44
            // no stacks means that the grammar failed to match at this point
45
            return false;
46
        }
47
    }
48

49
    for (const auto & stack : stacks_cur) {
50
        if (stack.empty()) {
51
            // An empty stack means that the grammar has been completed
52
            return true;
53
        }
54
    }
55

56
    return false;
57
}
58

59
static void test(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
60
    fprintf(stderr, "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str());
61
    fflush(stderr);
62

63
    auto * grammar = build_grammar(grammar_str);
64

65
    // Save the original grammar stacks so that we can reset after every new string we want to test
66
    const llama_grammar_stacks stacks_org = llama_grammar_get_stacks(grammar);
67

68
    llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(grammar);
69

70
    fprintf(stderr, "  🔵 Valid strings:\n");
71

72
    // Passing strings
73
    for (const auto & test_string : passing_strings) {
74
        fprintf(stderr, "    \"%s\" ", test_string.c_str());
75
        fflush(stderr);
76

77
        bool matched = match_string(test_string, grammar);
78

79
        if (!matched) {
80
            fprintf(stderr, "❌ (failed to match)\n");
81

82
            // DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed.
83
            // DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf
84
            FILE* grammar_file = fopen("test-grammar-integration.grammar.gbnf", "w");
85
            if (grammar_file) {
86
                fprintf(grammar_file, "%s", grammar_str.c_str());
87
                fclose(grammar_file);
88
            }
89

90
            // DEBUG: Write the test string to test-grammar-integration.string.txt
91
            FILE* string_file = fopen("test-grammar-integration.string.txt", "w");
92
            if (string_file) {
93
                fprintf(string_file, "%s", test_string.c_str());
94
                fclose(string_file);
95
            }
96

97
            fprintf(stderr, "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following command:     ./llama-gbnf-validator test-grammar-integration.grammar.gbnf test-grammar-integration.string.txt\n\n");
98
        } else {
99
            fprintf(stdout, "✅︎\n");
100
        }
101

102
        assert(matched);
103

104
        // Reset the grammar stacks
105
        stacks_cur = stacks_org;
106
    }
107

108
    fprintf(stderr, "  🟠 Invalid strings:\n");
109

110
    // Failing strings
111
    for (const auto & test_string : failing_strings) {
112
        fprintf(stderr, "    \"%s\" ", test_string.c_str());
113
        fflush(stderr);
114

115
        bool matched = match_string(test_string, grammar);
116

117
        if (matched) {
118
            fprintf(stderr, "❌ (incorrectly matched)\n");
119
        } else {
120
            fprintf(stdout, "✅︎\n");
121
        }
122
        assert(!matched);
123

124
        // Reset the grammar stacks
125
        stacks_cur = stacks_org;
126
    }
127

128
    // Clean up allocated memory
129
    llama_grammar_free_impl(grammar);
130
}
131
static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
132
    test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings);
133
}
134
static void test_schema(const std::string & test_desc, const std::string & schema_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
135
    test(test_desc + ". Schema: " + schema_str, json_schema_to_grammar(json::parse(schema_str)), passing_strings, failing_strings);
136
}
137

138
static void test_simple_grammar() {
139
    test_schema(
140
        "min 0",
141
        R"""({
142
            "type": "integer",
143
            "minimum": 0
144
        })""",
145
        // Passing strings
146
        {
147
            "0",
148
            "10",
149
            "12",
150
            "10000",
151
        },
152
        // Failing strings
153
        {
154
            "-1",
155
            "-10",
156
            "-10000",
157
            "-100000000000000000000000000000000",
158
            "100000000000000000000000000000000",
159
            "00",
160
            "01",
161
            "-0",
162
        }
163
    );
164
    test_schema(
165
        "min 2",
166
        // Schema
167
        R"""({
168
            "type": "integer",
169
            "minimum": 2
170
        })""",
171
        // Passing strings
172
        {
173
            "2",
174
            "3",
175
            "4",
176
            "10",
177
            "20",
178
            "1234567890000000",
179
        },
180
        // Failing strings
181
        {
182
            "0",
183
            "1",
184
            "-1",
185
            "-100",
186
            "0",
187
            "1",
188
            "01",
189
            "02",
190
            "12345678900000000",
191
        }
192
    );
193
    test_schema(
194
        "min 456",
195
        R"""({
196
            "type": "integer",
197
            "minimum": 456
198
        })""",
199
        // Passing strings
200
        {
201
            "456",
202
            "4560",
203
            "457",
204
            "460",
205
            "500",
206
        },
207
        // Failing strings
208
        {
209
            "455",
210
            "356",
211
            "50",
212
            "050",
213
            "-1",
214
            "-456",
215
        }
216
    );
217
    test_schema(
218
        "min -123",
219
        R"""({
220
            "type": "integer",
221
            "minimum": -123
222
        })""",
223
        // Passing strings
224
        {
225
            "-123",
226
            "-122",
227
            "-11",
228
            "-1",
229
            "0",
230
            "1",
231
            "123",
232
            "1234",
233
            "2345",
234
        },
235
        // Failing strings
236
        {
237
            "-1234",
238
            "-124",
239
        }
240
    );
241

242
    test_schema(
243
        "max 9999",
244
        // Schema
245
        R"""({
246
            "type": "integer",
247
            "maximum": 9999
248
        })""",
249
        // Passing strings
250
        {
251
            "-99999",
252
            "0",
253
            "9999",
254
        },
255
        // Failing strings
256
        {
257
            "10000",
258
            "99991",
259
        }
260
    );
261
    test_schema(
262
        "max -9999",
263
        // Schema
264
        R"""({
265
            "type": "integer",
266
            "maximum": -9999
267
        })""",
268
        // Passing strings
269
        {
270
            "-10000",
271
            "-9999",
272
        },
273
        // Failing strings
274
        {
275
            "-9998",
276
            "0",
277
            "9999",
278
        }
279
    );
280
    test_schema(
281
        "min 5 max 30",
282
        // Schema
283
        R"""({
284
            "type": "integer",
285
            "minimum": 5,
286
            "maximum": 30
287
        })""",
288
        // Passing strings
289
        {
290
            "5",
291
            "10",
292
            "30",
293
        },
294
        // Failing strings
295
        {
296
            "05",
297
            "4",
298
            "-1",
299
            "31",
300
            "123",
301
            "0123",
302
        }
303
    );
304
    test_schema(
305
        "min -1 max 1",
306
        R"""({
307
            "type": "integer",
308
            "minimum": -1,
309
            "maximum": 1
310
        })""",
311
        // Passing strings
312
        {
313
            "-1",
314
            "0",
315
            "1",
316
        },
317
        // Failing strings
318
        {
319
            "-11",
320
            "-10",
321
            "-2",
322
            "2",
323
            "10",
324
            "11",
325
        }
326
    );
327
    test_schema(
328
        "min -123 max 42",
329
        R"""({
330
            "type": "integer",
331
            "minimum": -123,
332
            "maximum": 42
333
        })""",
334
        // Passing strings
335
        {
336
            "-123",
337
            "-122",
338
            "-13",
339
            "-11",
340
            "-2",
341
            "-1",
342
            "0",
343
            "1",
344
            "5",
345
            "10",
346
            "39",
347
            "40",
348
            "42",
349
        },
350
        // Failing strings
351
        {
352
            "-0123",
353
            "-124",
354
            "-1123",
355
            "-200",
356
            "43",
357
            "123",
358
            "0123",
359
        }
360
    );
361
    test_schema(
362
        "exclusive min / max",
363
        // Schema
364
        R"""({
365
            "type": "integer",
366
            "exclusiveMinimum": 0,
367
            "exclusiveMaximum": 10000
368
        })""",
369
        // Passing strings
370
        {
371
            "1",
372
            "9999",
373
        },
374
        // Failing strings
375
        {
376
            "0",
377
            "01",
378
            "10000",
379
            "99999",
380
        }
381
    );
382

383
    // Test case for a simple grammar
384
    test_grammar(
385
        "simple grammar",
386
        R"""(
387
            root ::= expr
388
            expr ::= term ("+" term)*
389
            term ::= number
390
            number ::= [0-9]+)""",
391
        // Passing strings
392
        {
393
            "42",
394
            "1+2+3+4+5",
395
            "123+456",
396
        },
397
        // Failing strings
398
        {
399
            "+",
400
            "/ 3",
401
            "1+2+3+4+5+",
402
            "12a45",
403
        }
404
    );
405
}
406

407
static void test_complex_grammar() {
408
    // Test case for a more complex grammar, with both failure strings and success strings
409
    test_grammar(
410
        "medium complexity grammar",
411
        // Grammar
412
        R"""(
413
            root ::= expression
414
            expression ::= term ws (("+"|"-") ws term)*
415
            term ::= factor ws (("*"|"/") ws factor)*
416
            factor ::= number | variable | "(" expression ")" | function-call
417
            number ::= [0-9]+
418
            variable ::= [a-zA-Z_][a-zA-Z0-9_]*
419
            function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
420
            ws ::= [ \t\n\r]?)""",
421
        // Passing strings
422
        {
423
            "42",
424
            "1*2*3*4*5",
425
            "x",
426
            "x+10",
427
            "x1+y2",
428
            "(a+b)*(c-d)",
429
            "func()",
430
            "func(x,y+2)",
431
            "a*(b+c)-d/e",
432
            "f(g(x),h(y,z))",
433
            "x + 10",
434
            "x1 + y2",
435
            "(a + b) * (c - d)",
436
            "func()",
437
            "func(x, y + 2)",
438
            "a * (b + c) - d / e",
439
            "f(g(x), h(y, z))",
440
            "123+456",
441
            "123*456*789-123/456+789*123",
442
            "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
443
        },
444
        // Failing strings
445
        {
446
            "+",
447
            "/ 3x",
448
            "x + + y",
449
            "a * / b",
450
            "func(,)",
451
            "func(x y)",
452
            "(a + b",
453
            "x + y)",
454
            "a + b * (c - d",
455
            "42 +",
456
            "x +",
457
            "x + 10 +",
458
            "(a + b) * (c - d",
459
            "func(",
460
            "func(x, y + 2",
461
            "a * (b + c) - d /",
462
            "f(g(x), h(y, z)",
463
            "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
464
        }
465
    );
466
}
467

468
static void test_special_chars() {
469
    // A collection of tests to exercise special characters such as "."
470
    test_grammar(
471
        "special characters",
472
        // Grammar
473
        R"""(
474
            root ::= ... "abc" ...
475
            )""",
476
        // Passing strings
477
        {
478
            "abcabcabc",
479
            "aaaabcccc",
480
            // NOTE: Also ensures that multi-byte characters still count as a single character
481
            "🔵🟠✅abc❌🟠🔵"
482
        },
483
        // Failing strings
484
        {
485
            "aaabcccc",
486
            "aaaaabcccc",
487
            "aaaabccc",
488
            "aaaabccccc",
489
            "🔵🟠✅❌abc❌✅🟠🔵",
490
            "🔵🟠abc🟠🔵"
491
        }
492
    );
493
}
494

495
static void test_quantifiers() {
496
    // A collection of tests to exercise * + and ? quantifiers
497

498
    test_grammar(
499
        "* quantifier",
500
        // Grammar
501
        R"""(root ::= "a"*)""",
502
        // Passing strings
503
        {
504
            "",
505
            "a",
506
            "aaaaa",
507
            "aaaaaaaaaaaaaaaaaa",
508
            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
509
        },
510
        // Failing strings
511
        {
512
            "b",
513
            "ab",
514
            "aab",
515
            "ba",
516
            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
517
        }
518
    );
519
    test_grammar(
520
        "+ quantifier",
521
        // Grammar
522
        R"""(root ::= "a"+)""",
523
        // Passing strings
524
        {
525
            "a",
526
            "aaaaa",
527
            "aaaaaaaaaaaaaaaaaa",
528
            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
529
        },
530
        // Failing strings
531
        {
532
            "",
533
            "b",
534
            "ab",
535
            "aab",
536
            "ba",
537
            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
538
        }
539
    );
540
    test_grammar(
541
        "? quantifier",
542
        // Grammar
543
        R"""(root ::= "a"?)""",
544
        // Passing strings
545
        {
546
            "",
547
            "a"
548
        },
549
        // Failing strings
550
        {
551
            "b",
552
            "ab",
553
            "aa",
554
            "ba",
555
        }
556
    );
557
    test_grammar(
558
        "mixed quantifiers",
559
        // Grammar
560
        R"""(
561
            root ::= cons+ vowel* cons? (vowel cons)*
562
            vowel ::= [aeiouy]
563
            cons ::= [bcdfghjklmnpqrstvwxyz]
564
            )""",
565
        // Passing strings
566
        {
567
            "yes",
568
            "no",
569
            "noyes",
570
            "crwth",
571
            "four",
572
            "bryyyy",
573
        },
574
        // Failing strings
575
        {
576
            "yess",
577
            "yesno",
578
            "forty",
579
            "catyyy",
580
        }
581
    );
582
    test_grammar(
583
        "simple exact repetition",
584
        // Grammar
585
        R"""(
586
            root ::= [ab]{4}
587
        )""",
588
        // Passing strings
589
        {
590
            "aaaa",
591
            "bbbb",
592
            "abab",
593
        },
594
        // Failing strings
595
        {
596
            "a",
597
            "b",
598
            "aaaaa",
599
        }
600
    );
601
    test_grammar(
602
        "simple min repetition",
603
        // Grammar
604
        R"""(
605
            root ::= [ab]{4,}
606
        )""",
607
        // Passing strings
608
        {
609
            "aaaa",
610
            "aaaaab",
611
            "bbbb",
612
            "ababab",
613
        },
614
        // Failing strings
615
        {
616
            "",
617
            "aba",
618
        }
619
    );
620
    test_grammar(
621
        "simple max repetition",
622
        // Grammar
623
        R"""(
624
            root ::= [ab]{0,4}
625
        )""",
626
        // Passing strings
627
        {
628
            "",
629
            "a",
630
            "aa",
631
            "aaa",
632
            "aaab",
633
        },
634
        // Failing strings
635
        {
636
            "aaaaa",
637
        }
638
    );
639
    test_grammar(
640
        "min / max repetition",
641
        // Grammar
642
        R"""(
643
            root ::= ("0x" [A-F0-9]{2} " "?){3,5}
644
        )""",
645
        // Passing strings
646
        {
647
            "0xFF 0x12 0xAB",
648
            "0xFF 0x12 0xAB 0x00 0x00",
649
        },
650
        // Failing strings
651
        {
652
            "",
653
            "0xFF",
654
            "0xFF 0x12",
655
            "0xFF 0x12 0xAB 0x00 0x00 0x00",
656
        }
657
    );
658
}
659

660
static void test_failure_missing_root() {
661
    fprintf(stderr, "⚫ Testing missing root node:\n");
662
    // Test case for a grammar that is missing a root rule
663
    const std::string grammar_str = R"""(
664
        rot ::= expr
665
        expr ::= term ("+" term)*
666
        term ::= number
667
        number ::= [0-9]+)""";
668

669
    llama_grammar_parser parsed_grammar;
670
    parsed_grammar.parse(grammar_str.c_str());
671

672
    // Ensure we parsed correctly
673
    assert(!parsed_grammar.rules.empty());
674

675
    // Ensure we do NOT have a root node
676
    assert(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end());
677
    fprintf(stderr, "  ✅︎ Passed\n");
678
}
679

680
static void test_failure_missing_reference() {
681
    fprintf(stderr, "⚫ Testing missing reference node:\n");
682

683
    // Test case for a grammar that is missing a referenced rule
684
    const std::string grammar_str =
685
        R"""(root ::= expr
686
        expr ::= term ("+" term)*
687
        term ::= numero
688
        number ::= [0-9]+)""";
689

690
    fprintf(stderr, "    Expected error:  ");
691

692
    llama_grammar_parser parsed_grammar;
693
    parsed_grammar.parse(grammar_str.c_str());
694

695
    // Ensure we did NOT parsed correctly
696
    assert(parsed_grammar.rules.empty());
697

698
    fprintf(stderr, "    End of expected error.\n");
699
    fprintf(stderr, "  ✅︎ Passed\n");
700
}
701

702
static void test_failure_left_recursion() {
703
    fprintf(stderr, "⚫ Testing left recursion detection:\n");
704

705
    // Test simple left recursion detection
706
    const std::string simple_str = R"""(root ::= "a" | root "a")""";
707
    assert(test_build_grammar_fails(simple_str));
708

709
    // Test more complicated left recursion detection
710
    const std::string medium_str = R"""(
711
        root ::= asdf
712
        asdf ::= "a" | asdf "a"
713
        )""";
714
    assert(test_build_grammar_fails(medium_str));
715

716
    // Test even more complicated left recursion detection
717
    const std::string hard_str = R"""(
718
        root ::= asdf
719
        asdf ::= "a" | foo "b"
720
        foo ::= "c" | asdf "d" | "e")""";
721
    assert(test_build_grammar_fails(hard_str));
722

723
    // Test yet even more complicated left recursion detection
724
    const std::string hardest_str = R"""(
725
        root ::= asdf
726
        asdf ::= "a" | foo "b"
727
        foo ::= "c" | empty asdf "d" | "e"
728
        empty ::= "blah" | )""";
729
    assert(test_build_grammar_fails(hardest_str));
730

731
    fprintf(stderr, "  ✅︎ Passed\n");
732
}
733

734
static void test_json_schema() {
735
    // Note that this is similar to the regular grammar tests,
736
    //  but we convert each json schema to a grammar before parsing.
737
    // Otherwise, this test structure is the same.
738

739
    test_schema(
740
        "empty schema (object)",
741
        // Schema
742
        R"""(
743
            {}
744
        )""",
745
        // Passing strings
746
        {
747
            R"""({})""",
748
            R"""({"foo": "bar"})""",
749
        },
750
        // Failing strings
751
        {
752
            "",
753
            "[]",
754
            "null",
755
            R"""("")""",
756
            "true",
757
        }
758
    );
759

760
    test_schema(
761
        "exotic formats (list)",
762
        // Schema
763
        R"""({
764
            "items": [
765
                { "format": "date" },
766
                { "format": "uuid" },
767
                { "format": "time" },
768
                { "format": "date-time" }
769
            ]
770
        })""",
771
        // Passing strings
772
        {
773
            // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
774
            // "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
775
            R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""",
776
            //R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
777
            //R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
778
        },
779
        // Failing strings
780
        {
781
            R"""(["foo", "bar"])""",
782
            R"""(["12345678-1234-1234-1234-1234567890ab"])""",
783
        }
784
    );
785

786
    test_schema(
787
        "string",
788
        // Schema
789
        R"""({
790
            "type": "string"
791
        })""",
792
        // Passing strings
793
        {
794
            R"""("foo")""",
795
            R"""("bar")""",
796
            R"""("")""",
797
        },
798
        // Failing strings
799
        {
800
            R"""({})""",
801
            R"""("foo": "bar")""",
802
        }
803
    );
804

805
    test_schema(
806
        "string w/ min length 1",
807
        // Schema
808
        R"""({
809
            "type": "string",
810
            "minLength": 1
811
        })""",
812
        // Passing strings
813
        {
814
            R"""("foo")""",
815
            R"""("bar")""",
816
        },
817
        // Failing strings
818
        {
819
            R"""("")""",
820
            R"""({})""",
821
            R"""("foo": "bar")""",
822
        }
823
    );
824

825
    test_schema(
826
        "string w/ min length 3",
827
        // Schema
828
        R"""({
829
                "type": "string",
830
                "minLength": 3
831
        })""",
832
        // Passing strings
833
        {
834
            R"""("foo")""",
835
            R"""("bar")""",
836
            R"""("foobar")""",
837
        },
838
        // Failing strings
839
        {
840
            R"""("")""",
841
            R"""("f")""",
842
            R"""("fo")""",
843
        }
844
    );
845

846
    test_schema(
847
        "string w/ max length",
848
        // Schema
849
        R"""({
850
            "type": "string",
851
            "maxLength": 3
852
        })""",
853
        // Passing strings
854
        {
855
            R"""("foo")""",
856
            R"""("bar")""",
857
            R"""("")""",
858
            R"""("f")""",
859
            R"""("fo")""",
860
        },
861
        // Failing strings
862
        {
863
            R"""("foobar")""",
864
        }
865
    );
866

867
    test_schema(
868
        "string w/ min & max length",
869
        // Schema
870
        R"""({
871
            "type": "string",
872
            "minLength": 1,
873
            "maxLength": 4
874
        })""",
875
        // Passing strings
876
        {
877
            R"""("foo")""",
878
            R"""("bar")""",
879
            R"""("f")""",
880
            R"""("barf")""",
881
        },
882
        // Failing strings
883
        {
884
            R"""("")""",
885
            R"""("barfo")""",
886
            R"""("foobar")""",
887
        }
888
    );
889

890
    test_schema(
891
        "boolean",
892
        // Schema
893
        R"""({
894
            "type": "boolean"
895
        })""",
896
        // Passing strings
897
        {
898
            "true",
899
            "false",
900
        },
901
        // Failing strings
902
        {
903
            R"""("")""",
904
            R"""("true")""",
905
            R"""(True)""",
906
            R"""(FALSE)""",
907
        }
908
    );
909

910
    test_schema(
911
        "integer",
912
        // Schema
913
        R"""({
914
            "type": "integer"
915
        })""",
916
        // Passing strings
917
        {
918
            R"""(0)""",
919
            R"""(12345)""",
920
            R"""(1234567890123456)""",
921
        },
922
        // Failing strings
923
        {
924
            R"""()""",
925
            R"""(01)""",
926
            R"""(007)""",
927
            R"""(12345678901234567  )""",
928
        }
929
    );
930

931
    test_schema(
932
        "string const",
933
        // Schema
934
        R"""({
935
            "const": "foo"
936
        })""",
937
        // Passing strings
938
        {
939
            R"""("foo")""",
940
        },
941
        // Failing strings
942
        {
943
            R"""(foo)""",
944
            R"""("bar")""",
945
        }
946
    );
947

948
    test_schema(
949
        "non-string const",
950
        // Schema
951
        R"""({
952
            "const": true
953
        })""",
954
        // Passing strings
955
        {
956
            R"""(true)""",
957
        },
958
        // Failing strings
959
        {
960
            R"""()""",
961
            R"""(foo)""",
962
            R"""("true")""",
963
        }
964
    );
965

966
    test_schema(
967
        "non-string const",
968
        // Schema
969
        R"""({
970
            "enum": ["red", "amber", "green", null, 42, ["foo"]]
971
        })""",
972
        // Passing strings
973
        {
974
            R"""("red")""",
975
            R"""(null)""",
976
            R"""(42)""",
977
            R"""(["foo"])""",
978
        },
979
        // Failing strings
980
        {
981
            R"""()""",
982
            R"""(420)""",
983
            R"""(true)""",
984
            R"""(foo)""",
985
        }
986
    );
987

988
    test_schema(
989
        "simple pattern",
990
        // Schema
991
        R"""({
992
            "pattern": "^[a-zA-Z0-9_-]*$"
993
        })""",
994
        // Passing strings
995
        {
996
            R"""("")""",
997
            R"""("He_llo-12")""",
998
        },
999
        // Failing strings
1000
        {
1001
            R"""("!")""",
1002
            R"""("Hello World")""",
1003
        }
1004
    );
1005

1006
    test_schema(
1007
        "pattern with escapes",
1008
        // Schema
1009
        R"""({
1010
            "pattern": "^a\\^\\$\\.\\[\\]\\(\\)\\|\\{\\}\\*\\+\\?b$"
1011
        })""",
1012
        // Passing strings
1013
        {
1014
            R"""("a^$.[]()|{}*+?b")""",
1015
        },
1016
        // Failing strings
1017
        {
1018
            R"""("ab")""",
1019
        }
1020
    );
1021

1022
    test_schema(
1023
        "",
1024
        // Schema
1025
        R"""(
1026
            {
1027
                "type": ["array", "null"],
1028
                "items": { "type": "string" }
1029
            }
1030
        )""",
1031
        // Passing strings
1032
        {
1033
            "null",
1034
            "[]",
1035
            "[\"123\"]",
1036
            "[\"foo\", \"bar\"]",
1037
        },
1038
        // Failing strings
1039
        {
1040
            "",
1041
            "[123]",
1042
            "\"foo\"",
1043
            "[\"foo\", 42]",
1044
        }
1045
    );
1046

1047
    test_schema(
1048
        "min+max items",
1049
        // Schema
1050
        R"""({
1051
            "items": {
1052
                "type": ["number", "integer"]
1053
            },
1054
            "minItems": 3,
1055
            "maxItems": 5
1056
        })""",
1057
        // Passing strings
1058
        {
1059
            R"""([1, 2, 3])""",
1060
            R"""([1, 2, 3, 4])""",
1061
            R"""([1, 2, 3, 4, 5])""",
1062
        },
1063
        // Failing strings
1064
        {
1065
            R"""([1, 2])""",
1066
            R"""([1, 2, 3, 4, 5, 6])""",
1067
            R"""(1)""",
1068
        }
1069
    );
1070

1071
    // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
1072
    test_schema(
1073
        "object properties",
1074
        // Schema
1075
        R"""({
1076
            "type": "object",
1077
            "properties": {
1078
                "number": { "type": "number" },
1079
                "street_name": { "type": "string" },
1080
                "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
1081
            }
1082
        })""",
1083
        // Passing strings
1084
        {
1085
            R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
1086
            // "By default, leaving out properties is valid"
1087
            R"""({ "street_name": "Pennsylvania" })""",
1088
            R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
1089
            // "By extension, even an empty object is valid"
1090
            R"""({})""",
1091
            R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
1092
        },
1093
        // Failing strings
1094
        {
1095
            // Change datatype from number to string
1096
            R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
1097
            // Reorder properties
1098
            R"""({ "street_name": "Pennsylvania", "number": 1600 })""",
1099
            // Reorder properties
1100
            R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
1101
            // "Additional properties default to false for generation, even though the spec says true.
1102
            R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
1103

1104
        }
1105
    );
1106

1107
    test_schema(
1108
        "additional properties can't override other properties",
1109
        R"""({
1110
            "properties": {
1111
                "a": {"type": "integer"},
1112
                "b": {"type": "integer"}
1113
            },
1114
            "additionalProperties": true
1115
        })""",
1116
        // Passing strings
1117
        {
1118
            R"""({"a": 42})""",
1119
            R"""({"c": ""})""",
1120
            R"""({"a": 42, "c": ""})""",
1121
            R"""({"a_": ""})""",
1122
        },
1123
        // Failing strings
1124
        {
1125
            R"""()""",
1126
            R"""({"a": ""})""",
1127
            R"""({"a": "", "b": ""})""",
1128
        }
1129
    );
1130

1131
    // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
1132
    test_schema(
1133
        "object properties, additionalProperties: true",
1134
        // Schema
1135
        R"""({
1136
            "type": "object",
1137
            "properties": {
1138
                "number": { "type": "number" },
1139
                "street_name": { "type": "string" },
1140
                "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
1141
            },
1142
            "additionalProperties": true
1143
        })""",
1144
        // Passing strings
1145
        {
1146
            // "By extension, even an empty object is valid"
1147
            R"""({})""",
1148
            R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
1149
            // "By default, leaving out properties is valid"
1150
            R"""({ "street_name": "Pennsylvania" })""",
1151
            R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
1152
            // "By default, providing additional properties is valid"
1153
            R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
1154
            R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
1155
        },
1156
        // Failing strings
1157
        {
1158
            // Change datatype from number to string
1159
            R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
1160
            // Reorder properties
1161
            R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""",
1162
        }
1163
    );
1164

1165
    // Additional properties: false
1166
    test_schema(
1167
        "required + optional props each in original order",
1168
        // Schema
1169
        R"""({
1170
            "type": "object",
1171
            "properties": {
1172
                "number": { "type": "number" },
1173
                "street_name": { "type": "string" },
1174
                "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
1175
            },
1176
            "additionalProperties": false
1177
        })""",
1178
        // Passing strings
1179
        {
1180
            R"""({ "street_name": "Pennsylvania" })""",
1181
            R"""({ "number": 1600, "street_type":"Avenue"})""",
1182
            R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
1183
            R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
1184
            // Spaces are permitted around enum values
1185
            R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
1186
        },
1187
        // Failing strings
1188
        {
1189
            // Reorder properties
1190
            R"""({ "street_type": "Avenue", "number": 1600 })""",
1191
            // Add "direction"
1192
            R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""",
1193
        }
1194
    );
1195

1196
    test_schema(
1197
        "required + optional props each in original order",
1198
        // Schema
1199
        R"""({
1200
            "properties": {
1201
                "b": {"type": "string"},
1202
                "a": {"type": "string"},
1203
                "d": {"type": "string"},
1204
                "c": {"type": "string"}
1205
            },
1206
            "required": ["a", "b"],
1207
            "additionalProperties": false
1208
        })""",
1209
        // Passing strings
1210
        {
1211
            R"""({"b": "foo", "a": "bar"})""",
1212
            R"""({"b":"foo","a":"bar","d":"qux"})""",
1213
            R"""({"b":"foo", "a":"bar", "d":"qux", "c":"baz"})""",
1214
        },
1215
        // Failing strings
1216
        {
1217
            R"""({"a": "foo", "b": "bar"})""",
1218
            R"""({"b": "bar"})""",
1219
            R"""({"a": "foo", "c": "baz"})""",
1220
            R"""({"a":"foo", "b":"bar", "c":"baz", "d":"qux"})""",
1221
        }
1222
    );
1223

1224
    // NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties
1225
    test_schema(
1226
        "required props",
1227
        // Schema
1228
        R"""({
1229
            "$schema": "https://json-schema.org/draft/2020-12/schema",
1230
            "$id": "https://example.com/product.schema.json",
1231
            "title": "Product",
1232
            "description": "A product from Acme's catalog",
1233
            "type": "object",
1234
            "properties": {
1235
                "productId": {
1236
                "description": "The unique identifier for a product",
1237
                "type": "integer"
1238
                },
1239
                "productName": {
1240
                "description": "Name of the product",
1241
                "type": "string"
1242
                },
1243
                "price": {
1244
                "description": "The price of the product",
1245
                "type": "number",
1246
                "exclusiveMinimum": 0
1247
                },
1248
                "tags": {
1249
                "description": "Tags for the product",
1250
                "type": "array",
1251
                "items": {
1252
                    "type": "string"
1253
                },
1254
                "minItems": 1,
1255
                "uniqueItems": true
1256
                },
1257
                "dimensions": {
1258
                "type": "object",
1259
                "properties": {
1260
                    "length": {
1261
                    "type": "number"
1262
                    },
1263
                    "width": {
1264
                    "type": "number"
1265
                    },
1266
                    "height": {
1267
                    "type": "number"
1268
                    }
1269
                },
1270
                "required": [ "length", "width", "height" ]
1271
                }
1272
            },
1273
            "required": [ "productId", "productName", "price" ]
1274
        })""",
1275
        // Passing strings
1276
        {
1277
            R"""({"productId": 1, "productName": "A green door", "price": 12.50})""",
1278
            R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"]})""",
1279
            R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"], "dimensions": {"length": 785, "width": 250.5, "height": -0.359}})""",
1280
        },
1281
        // Failing strings
1282
        {
1283
            R"""({})""", // Missing all required properties
1284
            R"""({"productName": "A green door", "price": 12.50, "productId": 1})""", // Out of order properties
1285
            // TODO: The following line should fail, but currently it passes. `exclusiveMinimum` is not supported, as it would likely be too difficult to implement.
1286
            //  Perhaps special checks for minimum and maximum values of 0 could be added (since that's relatively easy to do with grammars), but anything else would likely be too complex.
1287
            // R"""({"productId": 1, "productName": "A green door", "price": -12.50})""",
1288
            R"""({"productId": 1, "productName": "A green door"})""", // Missing required property (price)
1289
            R"""({"productName": "A green door", "price": 12.50})""", // Missing required property (productId)
1290
            R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": []})""", // tags is empty, but minItems is 1
1291
            R"""({"productId": 1, "productName": "A green door", "price": 12.50, "dimensions": {"length": 785, "width": 250.5, "height": -0.359}, "tags": ["home", "green"]})""", // Tags and dimensions are out of order
1292
            // TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement.
1293
            // R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""",
1294
        }
1295
    );
1296
}
1297

1298
int main() {
1299
    fprintf(stdout, "Running grammar integration tests...\n");
1300
    test_simple_grammar();
1301
    test_complex_grammar();
1302
    test_special_chars();
1303
    test_quantifiers();
1304
    test_failure_missing_root();
1305
    test_failure_missing_reference();
1306
    test_failure_left_recursion();
1307
    test_json_schema();
1308
    fprintf(stdout, "All tests passed.\n");
1309
    return 0;
1310
}
1311

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.