aboutsummaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorEuAndreh <eu@euandre.org>2024-12-10 12:29:03 -0300
committerEuAndreh <eu@euandre.org>2024-12-10 12:29:03 -0300
commit8359c047aaebe274a2d811d61922b571ca7d10df (patch)
tree070e0ed93d27a842776ada805eeb4270e7e3c806 /tests
parentStart building test files (diff)
downloadcotia-8359c047aaebe274a2d811d61922b571ca7d10df.tar.gz
cotia-8359c047aaebe274a2d811d61922b571ca7d10df.tar.xz
Namespace packages with "urubu/"
Diffstat (limited to 'tests')
-rw-r--r--tests/unit/compressor/compressor_test.go122
-rw-r--r--tests/unit/driver/lexer/lexer_test.go932
-rw-r--r--tests/unit/driver/parser/conflict_test.go524
-rw-r--r--tests/unit/driver/parser/lac_test.go120
-rw-r--r--tests/unit/driver/parser/parser_test.go833
-rw-r--r--tests/unit/driver/parser/semantic_action_test.go227
-rw-r--r--tests/unit/driver/parser/syntax_error_test.go306
-rw-r--r--tests/unit/grammar/first_test.go219
-rw-r--r--tests/unit/grammar/grammar_test.go3381
-rw-r--r--tests/unit/grammar/lalr1_test.go187
-rw-r--r--tests/unit/grammar/lexical/compiler_test.go338
-rw-r--r--tests/unit/grammar/lexical/dfa/dfa_test.go121
-rw-r--r--tests/unit/grammar/lexical/dfa/symbol_position_test.go79
-rw-r--r--tests/unit/grammar/lexical/dfa/tree_test.go257
-rw-r--r--tests/unit/grammar/lexical/parser/lexer_test.go524
-rw-r--r--tests/unit/grammar/lexical/parser/parser_test.go1389
-rw-r--r--tests/unit/grammar/lr0_test.go448
-rw-r--r--tests/unit/grammar/parsing_table_test.go387
-rw-r--r--tests/unit/grammar/symbol/symbol_test.go159
-rw-r--r--tests/unit/grammar/test_helper_test.go68
-rw-r--r--tests/unit/spec/grammar/parser/lexer_test.go236
-rw-r--r--tests/unit/spec/grammar/parser/parser_test.go1211
-rw-r--r--tests/unit/spec/test/parser_test.go411
-rw-r--r--tests/unit/tester/tester_test.go169
-rw-r--r--tests/unit/utf8/utf8_test.go181
25 files changed, 12829 insertions, 0 deletions
diff --git a/tests/unit/compressor/compressor_test.go b/tests/unit/compressor/compressor_test.go
new file mode 100644
index 0000000..621b731
--- /dev/null
+++ b/tests/unit/compressor/compressor_test.go
@@ -0,0 +1,122 @@
+package compressor
+
+import (
+ "fmt"
+ "testing"
+)
+
+func TestCompressor_Compress(t *testing.T) {
+ x := 0 // an empty value
+
+ allCompressors := func() []Compressor {
+ return []Compressor{
+ NewUniqueEntriesTable(),
+ NewRowDisplacementTable(x),
+ }
+ }
+
+ tests := []struct {
+ original []int
+ rowCount int
+ colCount int
+ compressors []Compressor
+ }{
+ {
+ original: []int{
+ 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1,
+ },
+ rowCount: 3,
+ colCount: 5,
+ compressors: allCompressors(),
+ },
+ {
+ original: []int{
+ x, x, x, x, x,
+ x, x, x, x, x,
+ x, x, x, x, x,
+ },
+ rowCount: 3,
+ colCount: 5,
+ compressors: allCompressors(),
+ },
+ {
+ original: []int{
+ 1, 1, 1, 1, 1,
+ x, x, x, x, x,
+ 1, 1, 1, 1, 1,
+ },
+ rowCount: 3,
+ colCount: 5,
+ compressors: allCompressors(),
+ },
+ {
+ original: []int{
+ 1, x, 1, 1, 1,
+ 1, 1, x, 1, 1,
+ 1, 1, 1, x, 1,
+ },
+ rowCount: 3,
+ colCount: 5,
+ compressors: allCompressors(),
+ },
+ }
+ for i, tt := range tests {
+ for _, comp := range tt.compressors {
+ t.Run(fmt.Sprintf("%T #%v", comp, i), func(t *testing.T) {
+ dup := make([]int, len(tt.original))
+ copy(dup, tt.original)
+
+ orig, err := NewOriginalTable(tt.original, tt.colCount)
+ if err != nil {
+ t.Fatal(err)
+ }
+ err = comp.Compress(orig)
+ if err != nil {
+ t.Fatal(err)
+ }
+ rowCount, colCount := comp.OriginalTableSize()
+ if rowCount != tt.rowCount || colCount != tt.colCount {
+ t.Fatalf("unexpected table size; want: %vx%v, got: %vx%v", tt.rowCount, tt.colCount, rowCount, colCount)
+ }
+ for i := 0; i < tt.rowCount; i++ {
+ for j := 0; j < tt.colCount; j++ {
+ v, err := comp.Lookup(i, j)
+ if err != nil {
+ t.Fatal(err)
+ }
+ expected := tt.original[i*tt.colCount+j]
+ if v != expected {
+ t.Fatalf("unexpected entry (%v, %v); want: %v, got: %v", i, j, expected, v)
+ }
+ }
+ }
+
+ // Calling with out-of-range indexes should be an error.
+ if _, err := comp.Lookup(0, -1); err == nil {
+ t.Fatalf("expected error didn't occur (0, -1)")
+ }
+ if _, err := comp.Lookup(-1, 0); err == nil {
+ t.Fatalf("expected error didn't occur (-1, 0)")
+ }
+ if _, err := comp.Lookup(rowCount-1, colCount); err == nil {
+ t.Fatalf("expected error didn't occur (%v, %v)", rowCount-1, colCount)
+ }
+ if _, err := comp.Lookup(rowCount, colCount-1); err == nil {
+ t.Fatalf("expected error didn't occur (%v, %v)", rowCount, colCount-1)
+ }
+
+ // The compressor must not break the original table.
+ for i := 0; i < tt.rowCount; i++ {
+ for j := 0; j < tt.colCount; j++ {
+ idx := i*tt.colCount + j
+ if tt.original[idx] != dup[idx] {
+ t.Fatalf("the original table is broken (%v, %v); want: %v, got: %v", i, j, dup[idx], tt.original[idx])
+ }
+ }
+ }
+ })
+ }
+ }
+}
diff --git a/tests/unit/driver/lexer/lexer_test.go b/tests/unit/driver/lexer/lexer_test.go
new file mode 100644
index 0000000..a3d0231
--- /dev/null
+++ b/tests/unit/driver/lexer/lexer_test.go
@@ -0,0 +1,932 @@
+package lexer
+
+import (
+ "bytes"
+ "fmt"
+ "strings"
+ "testing"
+
+ "urubu/grammar/lexical"
+ spec "urubu/spec/grammar"
+)
+
+func newLexEntry(modes []string, kind string, pattern string, push string, pop bool) *lexical.LexEntry {
+ ms := []spec.LexModeName{}
+ for _, m := range modes {
+ ms = append(ms, spec.LexModeName(m))
+ }
+ return &lexical.LexEntry{
+ Kind: spec.LexKindName(kind),
+ Pattern: pattern,
+ Modes: ms,
+ Push: spec.LexModeName(push),
+ Pop: pop,
+ }
+}
+
+func newLexEntryDefaultNOP(kind string, pattern string) *lexical.LexEntry {
+ return &lexical.LexEntry{
+ Kind: spec.LexKindName(kind),
+ Pattern: pattern,
+ Modes: []spec.LexModeName{
+ spec.LexModeNameDefault,
+ },
+ }
+}
+
+func newLexEntryFragment(kind string, pattern string) *lexical.LexEntry {
+ return &lexical.LexEntry{
+ Kind: spec.LexKindName(kind),
+ Pattern: pattern,
+ Fragment: true,
+ }
+}
+
+func newToken(modeID ModeID, kindID KindID, modeKindID ModeKindID, lexeme []byte) *Token {
+ return &Token{
+ ModeID: modeID,
+ KindID: kindID,
+ ModeKindID: modeKindID,
+ Lexeme: lexeme,
+ }
+}
+
+func newTokenDefault(kindID int, modeKindID int, lexeme []byte) *Token {
+ return newToken(
+ ModeID(spec.LexModeIDDefault.Int()),
+ KindID(spec.LexKindID(kindID).Int()),
+ ModeKindID(spec.LexModeKindID(modeKindID).Int()),
+ lexeme,
+ )
+}
+
+func newEOFToken(modeID ModeID, modeName string) *Token {
+ return &Token{
+ ModeID: modeID,
+ ModeKindID: 0,
+ EOF: true,
+ }
+}
+
+func newEOFTokenDefault() *Token {
+ return newEOFToken(ModeID(spec.LexModeIDDefault.Int()), spec.LexModeNameDefault.String())
+}
+
+func newInvalidTokenDefault(lexeme []byte) *Token {
+ return &Token{
+ ModeID: ModeID(spec.LexModeIDDefault.Int()),
+ ModeKindID: 0,
+ Lexeme: lexeme,
+ Invalid: true,
+ }
+}
+
+func withPos(tok *Token, bytePos int, byteLen int, row int, col int) *Token {
+ tok.BytePos = bytePos
+ tok.ByteLen = byteLen
+ tok.Row = row
+ tok.Col = col
+ return tok
+}
+
+func TestLexer_Next(t *testing.T) {
+ test := []struct {
+ lspec *lexical.LexSpec
+ src string
+ tokens []*Token
+ passiveModeTran bool
+ tran func(l *Lexer, tok *Token) error
+ }{
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ newLexEntryDefaultNOP("t1", "(a|b)*abb"),
+ newLexEntryDefaultNOP("t2", " +"),
+ },
+ },
+ src: "abb aabb aaabb babb bbabb abbbabb",
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte("abb")), 0, 3, 0, 0),
+ withPos(newTokenDefault(2, 2, []byte(" ")), 3, 1, 0, 3),
+ withPos(newTokenDefault(1, 1, []byte("aabb")), 4, 4, 0, 4),
+ withPos(newTokenDefault(2, 2, []byte(" ")), 8, 1, 0, 8),
+ withPos(newTokenDefault(1, 1, []byte("aaabb")), 9, 5, 0, 9),
+ withPos(newTokenDefault(2, 2, []byte(" ")), 14, 1, 0, 14),
+ withPos(newTokenDefault(1, 1, []byte("babb")), 15, 4, 0, 15),
+ withPos(newTokenDefault(2, 2, []byte(" ")), 19, 1, 0, 19),
+ withPos(newTokenDefault(1, 1, []byte("bbabb")), 20, 5, 0, 20),
+ withPos(newTokenDefault(2, 2, []byte(" ")), 25, 1, 0, 25),
+ withPos(newTokenDefault(1, 1, []byte("abbbabb")), 26, 7, 0, 26),
+ withPos(newEOFTokenDefault(), 33, 0, 0, 33),
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ newLexEntryDefaultNOP("t1", "b?a+"),
+ newLexEntryDefaultNOP("t2", "(ab)?(cd)+"),
+ newLexEntryDefaultNOP("t3", " +"),
+ },
+ },
+ src: "ba baaa a aaa abcd abcdcdcd cd cdcdcd",
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte("ba")), 0, 2, 0, 0),
+ withPos(newTokenDefault(3, 3, []byte(" ")), 2, 1, 0, 2),
+ withPos(newTokenDefault(1, 1, []byte("baaa")), 3, 4, 0, 3),
+ withPos(newTokenDefault(3, 3, []byte(" ")), 7, 1, 0, 7),
+ withPos(newTokenDefault(1, 1, []byte("a")), 8, 1, 0, 8),
+ withPos(newTokenDefault(3, 3, []byte(" ")), 9, 1, 0, 9),
+ withPos(newTokenDefault(1, 1, []byte("aaa")), 10, 3, 0, 10),
+ withPos(newTokenDefault(3, 3, []byte(" ")), 13, 1, 0, 13),
+ withPos(newTokenDefault(2, 2, []byte("abcd")), 14, 4, 0, 14),
+ withPos(newTokenDefault(3, 3, []byte(" ")), 18, 1, 0, 18),
+ withPos(newTokenDefault(2, 2, []byte("abcdcdcd")), 19, 8, 0, 19),
+ withPos(newTokenDefault(3, 3, []byte(" ")), 27, 1, 0, 27),
+ withPos(newTokenDefault(2, 2, []byte("cd")), 28, 2, 0, 28),
+ withPos(newTokenDefault(3, 3, []byte(" ")), 30, 1, 0, 30),
+ withPos(newTokenDefault(2, 2, []byte("cdcdcd")), 31, 6, 0, 31),
+ withPos(newEOFTokenDefault(), 37, 0, 0, 37),
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ newLexEntryDefaultNOP("t1", "."),
+ },
+ },
+ src: string([]byte{
+ 0x00,
+ 0x7f,
+ 0xc2, 0x80,
+ 0xdf, 0xbf,
+ 0xe1, 0x80, 0x80,
+ 0xec, 0xbf, 0xbf,
+ 0xed, 0x80, 0x80,
+ 0xed, 0x9f, 0xbf,
+ 0xee, 0x80, 0x80,
+ 0xef, 0xbf, 0xbf,
+ 0xf0, 0x90, 0x80, 0x80,
+ 0xf0, 0xbf, 0xbf, 0xbf,
+ 0xf1, 0x80, 0x80, 0x80,
+ 0xf3, 0xbf, 0xbf, 0xbf,
+ 0xf4, 0x80, 0x80, 0x80,
+ 0xf4, 0x8f, 0xbf, 0xbf,
+ }),
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte{0x00}), 0, 1, 0, 0),
+ withPos(newTokenDefault(1, 1, []byte{0x7f}), 1, 1, 0, 1),
+ withPos(newTokenDefault(1, 1, []byte{0xc2, 0x80}), 2, 2, 0, 2),
+ withPos(newTokenDefault(1, 1, []byte{0xdf, 0xbf}), 4, 2, 0, 3),
+ withPos(newTokenDefault(1, 1, []byte{0xe1, 0x80, 0x80}), 6, 3, 0, 4),
+ withPos(newTokenDefault(1, 1, []byte{0xec, 0xbf, 0xbf}), 9, 3, 0, 5),
+ withPos(newTokenDefault(1, 1, []byte{0xed, 0x80, 0x80}), 12, 3, 0, 6),
+ withPos(newTokenDefault(1, 1, []byte{0xed, 0x9f, 0xbf}), 15, 3, 0, 7),
+ withPos(newTokenDefault(1, 1, []byte{0xee, 0x80, 0x80}), 18, 3, 0, 8),
+ withPos(newTokenDefault(1, 1, []byte{0xef, 0xbf, 0xbf}), 21, 3, 0, 9),
+ withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x80}), 24, 4, 0, 10),
+ withPos(newTokenDefault(1, 1, []byte{0xf0, 0xbf, 0xbf, 0xbf}), 28, 4, 0, 11),
+ withPos(newTokenDefault(1, 1, []byte{0xf1, 0x80, 0x80, 0x80}), 32, 4, 0, 12),
+ withPos(newTokenDefault(1, 1, []byte{0xf3, 0xbf, 0xbf, 0xbf}), 36, 4, 0, 13),
+ withPos(newTokenDefault(1, 1, []byte{0xf4, 0x80, 0x80, 0x80}), 40, 4, 0, 14),
+ withPos(newTokenDefault(1, 1, []byte{0xf4, 0x8f, 0xbf, 0xbf}), 44, 4, 0, 15),
+ withPos(newEOFTokenDefault(), 48, 0, 0, 16),
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ newLexEntryDefaultNOP("t1", "[ab.*+?|()[\\]]"),
+ },
+ },
+ src: "ab.*+?|()[]",
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte("a")), 0, 1, 0, 0),
+ withPos(newTokenDefault(1, 1, []byte("b")), 1, 1, 0, 1),
+ withPos(newTokenDefault(1, 1, []byte(".")), 2, 1, 0, 2),
+ withPos(newTokenDefault(1, 1, []byte("*")), 3, 1, 0, 3),
+ withPos(newTokenDefault(1, 1, []byte("+")), 4, 1, 0, 4),
+ withPos(newTokenDefault(1, 1, []byte("?")), 5, 1, 0, 5),
+ withPos(newTokenDefault(1, 1, []byte("|")), 6, 1, 0, 6),
+ withPos(newTokenDefault(1, 1, []byte("(")), 7, 1, 0, 7),
+ withPos(newTokenDefault(1, 1, []byte(")")), 8, 1, 0, 8),
+ withPos(newTokenDefault(1, 1, []byte("[")), 9, 1, 0, 9),
+ withPos(newTokenDefault(1, 1, []byte("]")), 10, 1, 0, 10),
+ withPos(newEOFTokenDefault(), 11, 0, 0, 11),
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ // all 1 byte characters except null character (U+0000)
+ //
+ // NOTE:
+ // vartan cannot handle the null character in patterns because lexical.lexer,
+ // specifically read() and restore(), recognizes the null characters as that a symbol doesn't exist.
+ // If a pattern needs a null character, use code point expression \u{0000}.
+ newLexEntryDefaultNOP("char_1_byte", "[\x01-\x7f]"),
+ },
+ },
+ src: string([]byte{
+ 0x01,
+ 0x02,
+ 0x7e,
+ 0x7f,
+ }),
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte{0x01}), 0, 1, 0, 0),
+ withPos(newTokenDefault(1, 1, []byte{0x02}), 1, 1, 0, 1),
+ withPos(newTokenDefault(1, 1, []byte{0x7e}), 2, 1, 0, 2),
+ withPos(newTokenDefault(1, 1, []byte{0x7f}), 3, 1, 0, 3),
+ withPos(newEOFTokenDefault(), 4, 0, 0, 4),
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ // all 2 byte characters
+ newLexEntryDefaultNOP("char_2_byte", "[\xc2\x80-\xdf\xbf]"),
+ },
+ },
+ src: string([]byte{
+ 0xc2, 0x80,
+ 0xc2, 0x81,
+ 0xdf, 0xbe,
+ 0xdf, 0xbf,
+ }),
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte{0xc2, 0x80}), 0, 2, 0, 0),
+ withPos(newTokenDefault(1, 1, []byte{0xc2, 0x81}), 2, 2, 0, 1),
+ withPos(newTokenDefault(1, 1, []byte{0xdf, 0xbe}), 4, 2, 0, 2),
+ withPos(newTokenDefault(1, 1, []byte{0xdf, 0xbf}), 6, 2, 0, 3),
+ withPos(newEOFTokenDefault(), 8, 0, 0, 4),
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ // All bytes are the same.
+ newLexEntryDefaultNOP("char_3_byte", "[\xe0\xa0\x80-\xe0\xa0\x80]"),
+ },
+ },
+ src: string([]byte{
+ 0xe0, 0xa0, 0x80,
+ }),
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0x80}), 0, 3, 0, 0),
+ withPos(newEOFTokenDefault(), 3, 0, 0, 1),
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ // The first two bytes are the same.
+ newLexEntryDefaultNOP("char_3_byte", "[\xe0\xa0\x80-\xe0\xa0\xbf]"),
+ },
+ },
+ src: string([]byte{
+ 0xe0, 0xa0, 0x80,
+ 0xe0, 0xa0, 0x81,
+ 0xe0, 0xa0, 0xbe,
+ 0xe0, 0xa0, 0xbf,
+ }),
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0x80}), 0, 3, 0, 0),
+ withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0x81}), 3, 3, 0, 1),
+ withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0xbe}), 6, 3, 0, 2),
+ withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0xbf}), 9, 3, 0, 3),
+ withPos(newEOFTokenDefault(), 12, 0, 0, 4),
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ // The first byte are the same.
+ newLexEntryDefaultNOP("char_3_byte", "[\xe0\xa0\x80-\xe0\xbf\xbf]"),
+ },
+ },
+ src: string([]byte{
+ 0xe0, 0xa0, 0x80,
+ 0xe0, 0xa0, 0x81,
+ 0xe0, 0xbf, 0xbe,
+ 0xe0, 0xbf, 0xbf,
+ }),
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0x80}), 0, 3, 0, 0),
+ withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0x81}), 3, 3, 0, 1),
+ withPos(newTokenDefault(1, 1, []byte{0xe0, 0xbf, 0xbe}), 6, 3, 0, 2),
+ withPos(newTokenDefault(1, 1, []byte{0xe0, 0xbf, 0xbf}), 9, 3, 0, 3),
+ withPos(newEOFTokenDefault(), 12, 0, 0, 4),
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ // all 3 byte characters
+ newLexEntryDefaultNOP("char_3_byte", "[\xe0\xa0\x80-\xef\xbf\xbf]"),
+ },
+ },
+ src: string([]byte{
+ 0xe0, 0xa0, 0x80,
+ 0xe0, 0xa0, 0x81,
+ 0xe0, 0xbf, 0xbe,
+ 0xe0, 0xbf, 0xbf,
+ 0xe1, 0x80, 0x80,
+ 0xe1, 0x80, 0x81,
+ 0xec, 0xbf, 0xbe,
+ 0xec, 0xbf, 0xbf,
+ 0xed, 0x80, 0x80,
+ 0xed, 0x80, 0x81,
+ 0xed, 0x9f, 0xbe,
+ 0xed, 0x9f, 0xbf,
+ 0xee, 0x80, 0x80,
+ 0xee, 0x80, 0x81,
+ 0xef, 0xbf, 0xbe,
+ 0xef, 0xbf, 0xbf,
+ }),
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0x80}), 0, 3, 0, 0),
+ withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0x81}), 3, 3, 0, 1),
+ withPos(newTokenDefault(1, 1, []byte{0xe0, 0xbf, 0xbe}), 6, 3, 0, 2),
+ withPos(newTokenDefault(1, 1, []byte{0xe0, 0xbf, 0xbf}), 9, 3, 0, 3),
+ withPos(newTokenDefault(1, 1, []byte{0xe1, 0x80, 0x80}), 12, 3, 0, 4),
+ withPos(newTokenDefault(1, 1, []byte{0xe1, 0x80, 0x81}), 15, 3, 0, 5),
+ withPos(newTokenDefault(1, 1, []byte{0xec, 0xbf, 0xbe}), 18, 3, 0, 6),
+ withPos(newTokenDefault(1, 1, []byte{0xec, 0xbf, 0xbf}), 21, 3, 0, 7),
+ withPos(newTokenDefault(1, 1, []byte{0xed, 0x80, 0x80}), 24, 3, 0, 8),
+ withPos(newTokenDefault(1, 1, []byte{0xed, 0x80, 0x81}), 27, 3, 0, 9),
+ withPos(newTokenDefault(1, 1, []byte{0xed, 0x9f, 0xbe}), 30, 3, 0, 10),
+ withPos(newTokenDefault(1, 1, []byte{0xed, 0x9f, 0xbf}), 33, 3, 0, 11),
+ withPos(newTokenDefault(1, 1, []byte{0xee, 0x80, 0x80}), 36, 3, 0, 12),
+ withPos(newTokenDefault(1, 1, []byte{0xee, 0x80, 0x81}), 39, 3, 0, 13),
+ withPos(newTokenDefault(1, 1, []byte{0xef, 0xbf, 0xbe}), 42, 3, 0, 14),
+ withPos(newTokenDefault(1, 1, []byte{0xef, 0xbf, 0xbf}), 45, 3, 0, 15),
+ withPos(newEOFTokenDefault(), 48, 0, 0, 16),
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ // All bytes are the same.
+ newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf0\x90\x80\x80]"),
+ },
+ },
+ src: string([]byte{
+ 0xf0, 0x90, 0x80, 0x80,
+ }),
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x80}), 0, 4, 0, 0),
+ withPos(newEOFTokenDefault(), 4, 0, 0, 1),
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ // The first 3 bytes are the same.
+ newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf0\x90\x80\xbf]"),
+ },
+ },
+ src: string([]byte{
+ 0xf0, 0x90, 0x80, 0x80,
+ 0xf0, 0x90, 0x80, 0x81,
+ 0xf0, 0x90, 0x80, 0xbe,
+ 0xf0, 0x90, 0x80, 0xbf,
+ }),
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x80}), 0, 4, 0, 0),
+ withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x81}), 4, 4, 0, 1),
+ withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0xbe}), 8, 4, 0, 2),
+ withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0xbf}), 12, 4, 0, 3),
+ withPos(newEOFTokenDefault(), 16, 0, 0, 4),
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ // The first 2 bytes are the same.
+ newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf0\x90\xbf\xbf]"),
+ },
+ },
+ src: string([]byte{
+ 0xf0, 0x90, 0x80, 0x80,
+ 0xf0, 0x90, 0x80, 0x81,
+ 0xf0, 0x90, 0xbf, 0xbe,
+ 0xf0, 0x90, 0xbf, 0xbf,
+ }),
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x80}), 0, 4, 0, 0),
+ withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x81}), 4, 4, 0, 1),
+ withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0xbf, 0xbe}), 8, 4, 0, 2),
+ withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0xbf, 0xbf}), 12, 4, 0, 3),
+ withPos(newEOFTokenDefault(), 16, 0, 0, 4),
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ // The first byte are the same.
+ newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf0\xbf\xbf\xbf]"),
+ },
+ },
+ src: string([]byte{
+ 0xf0, 0x90, 0x80, 0x80,
+ 0xf0, 0x90, 0x80, 0x81,
+ 0xf0, 0xbf, 0xbf, 0xbe,
+ 0xf0, 0xbf, 0xbf, 0xbf,
+ }),
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x80}), 0, 4, 0, 0),
+ withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x81}), 4, 4, 0, 1),
+ withPos(newTokenDefault(1, 1, []byte{0xf0, 0xbf, 0xbf, 0xbe}), 8, 4, 0, 2),
+ withPos(newTokenDefault(1, 1, []byte{0xf0, 0xbf, 0xbf, 0xbf}), 12, 4, 0, 3),
+ withPos(newEOFTokenDefault(), 16, 0, 0, 4),
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ // all 4 byte characters
+ newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf4\x8f\xbf\xbf]"),
+ },
+ },
+ src: string([]byte{
+ 0xf0, 0x90, 0x80, 0x80,
+ 0xf0, 0x90, 0x80, 0x81,
+ 0xf0, 0xbf, 0xbf, 0xbe,
+ 0xf0, 0xbf, 0xbf, 0xbf,
+ 0xf1, 0x80, 0x80, 0x80,
+ 0xf1, 0x80, 0x80, 0x81,
+ 0xf3, 0xbf, 0xbf, 0xbe,
+ 0xf3, 0xbf, 0xbf, 0xbf,
+ 0xf4, 0x80, 0x80, 0x80,
+ 0xf4, 0x80, 0x80, 0x81,
+ 0xf4, 0x8f, 0xbf, 0xbe,
+ 0xf4, 0x8f, 0xbf, 0xbf,
+ }),
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x80}), 0, 4, 0, 0),
+ withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x81}), 4, 4, 0, 1),
+ withPos(newTokenDefault(1, 1, []byte{0xf0, 0xbf, 0xbf, 0xbe}), 8, 4, 0, 2),
+ withPos(newTokenDefault(1, 1, []byte{0xf0, 0xbf, 0xbf, 0xbf}), 12, 4, 0, 3),
+ withPos(newTokenDefault(1, 1, []byte{0xf1, 0x80, 0x80, 0x80}), 16, 4, 0, 4),
+ withPos(newTokenDefault(1, 1, []byte{0xf1, 0x80, 0x80, 0x81}), 20, 4, 0, 5),
+ withPos(newTokenDefault(1, 1, []byte{0xf3, 0xbf, 0xbf, 0xbe}), 24, 4, 0, 6),
+ withPos(newTokenDefault(1, 1, []byte{0xf3, 0xbf, 0xbf, 0xbf}), 28, 4, 0, 7),
+ withPos(newTokenDefault(1, 1, []byte{0xf4, 0x80, 0x80, 0x80}), 32, 4, 0, 8),
+ withPos(newTokenDefault(1, 1, []byte{0xf4, 0x80, 0x80, 0x81}), 36, 4, 0, 9),
+ withPos(newTokenDefault(1, 1, []byte{0xf4, 0x8f, 0xbf, 0xbe}), 40, 4, 0, 10),
+ withPos(newTokenDefault(1, 1, []byte{0xf4, 0x8f, 0xbf, 0xbf}), 44, 4, 0, 11),
+ withPos(newEOFTokenDefault(), 48, 0, 0, 12),
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ newLexEntryDefaultNOP("non_number", "[^0-9]+[0-9]"),
+ },
+ },
+ src: "foo9",
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte("foo9")), 0, 4, 0, 0),
+ withPos(newEOFTokenDefault(), 4, 0, 0, 4),
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ newLexEntryDefaultNOP("char_1_byte", "\\u{006E}"),
+ newLexEntryDefaultNOP("char_2_byte", "\\u{03BD}"),
+ newLexEntryDefaultNOP("char_3_byte", "\\u{306B}"),
+ newLexEntryDefaultNOP("char_4_byte", "\\u{01F638}"),
+ },
+ },
+ src: "nνに😸",
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte{0x6E}), 0, 1, 0, 0),
+ withPos(newTokenDefault(2, 2, []byte{0xCE, 0xBD}), 1, 2, 0, 1),
+ withPos(newTokenDefault(3, 3, []byte{0xE3, 0x81, 0xAB}), 3, 3, 0, 2),
+ withPos(newTokenDefault(4, 4, []byte{0xF0, 0x9F, 0x98, 0xB8}), 6, 4, 0, 3),
+ withPos(newEOFTokenDefault(), 10, 0, 0, 4),
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ newLexEntryDefaultNOP("code_points_alt", "[\\u{006E}\\u{03BD}\\u{306B}\\u{01F638}]"),
+ },
+ },
+ src: "nνに😸",
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte{0x6E}), 0, 1, 0, 0),
+ withPos(newTokenDefault(1, 1, []byte{0xCE, 0xBD}), 1, 2, 0, 1),
+ withPos(newTokenDefault(1, 1, []byte{0xE3, 0x81, 0xAB}), 3, 3, 0, 2),
+ withPos(newTokenDefault(1, 1, []byte{0xF0, 0x9F, 0x98, 0xB8}), 6, 4, 0, 3),
+ withPos(newEOFTokenDefault(), 10, 0, 0, 4),
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ newLexEntryDefaultNOP("t1", "\\f{a2c}\\f{d2f}+"),
+ newLexEntryFragment("a2c", "abc"),
+ newLexEntryFragment("d2f", "def"),
+ },
+ },
+ src: "abcdefdefabcdef",
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte("abcdefdef")), 0, 9, 0, 0),
+ withPos(newTokenDefault(1, 1, []byte("abcdef")), 9, 6, 0, 9),
+ withPos(newEOFTokenDefault(), 15, 0, 0, 15),
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ newLexEntryDefaultNOP("t1", "(\\f{a2c}|\\f{d2f})+"),
+ newLexEntryFragment("a2c", "abc"),
+ newLexEntryFragment("d2f", "def"),
+ },
+ },
+ src: "abcdefdefabc",
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte("abcdefdefabc")), 0, 12, 0, 0),
+ withPos(newEOFTokenDefault(), 12, 0, 0, 12),
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ newLexEntryDefaultNOP("t1", "\\f{a2c_or_d2f}+"),
+ newLexEntryFragment("a2c_or_d2f", "\\f{a2c}|\\f{d2f}"),
+ newLexEntryFragment("a2c", "abc"),
+ newLexEntryFragment("d2f", "def"),
+ },
+ },
+ src: "abcdefdefabc",
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte("abcdefdefabc")), 0, 12, 0, 0),
+ withPos(newEOFTokenDefault(), 12, 0, 0, 12),
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ newLexEntryDefaultNOP("white_space", ` *`),
+ newLexEntry([]string{"default"}, "string_open", `"`, "string", false),
+ newLexEntry([]string{"string"}, "escape_sequence", `\\[n"\\]`, "", false),
+ newLexEntry([]string{"string"}, "char_sequence", `[^"\\]*`, "", false),
+ newLexEntry([]string{"string"}, "string_close", `"`, "", true),
+ },
+ },
+ src: `"" "Hello world.\n\"Hello world.\""`,
+ tokens: []*Token{
+ withPos(newToken(1, 2, 2, []byte(`"`)), 0, 1, 0, 0),
+ withPos(newToken(2, 5, 3, []byte(`"`)), 1, 1, 0, 1),
+ withPos(newToken(1, 1, 1, []byte(` `)), 2, 1, 0, 2),
+ withPos(newToken(1, 2, 2, []byte(`"`)), 3, 1, 0, 3),
+ withPos(newToken(2, 4, 2, []byte(`Hello world.`)), 4, 12, 0, 4),
+ withPos(newToken(2, 3, 1, []byte(`\n`)), 16, 2, 0, 16),
+ withPos(newToken(2, 3, 1, []byte(`\"`)), 18, 2, 0, 18),
+ withPos(newToken(2, 4, 2, []byte(`Hello world.`)), 20, 12, 0, 20),
+ withPos(newToken(2, 3, 1, []byte(`\"`)), 32, 2, 0, 32),
+ withPos(newToken(2, 5, 3, []byte(`"`)), 34, 1, 0, 34),
+ withPos(newEOFTokenDefault(), 35, 0, 0, 35),
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ // `white_space` is enabled in multiple modes.
+ newLexEntry([]string{"default", "state_a", "state_b"}, "white_space", ` *`, "", false),
+ newLexEntry([]string{"default"}, "char_a", `a`, "state_a", false),
+ newLexEntry([]string{"state_a"}, "char_b", `b`, "state_b", false),
+ newLexEntry([]string{"state_a"}, "back_from_a", `<`, "", true),
+ newLexEntry([]string{"state_b"}, "back_from_b", `<`, "", true),
+ },
+ },
+ src: ` a b < < `,
+ tokens: []*Token{
+ withPos(newToken(1, 1, 1, []byte(` `)), 0, 1, 0, 0),
+ withPos(newToken(1, 2, 2, []byte(`a`)), 1, 1, 0, 1),
+ withPos(newToken(2, 1, 1, []byte(` `)), 2, 1, 0, 2),
+ withPos(newToken(2, 3, 2, []byte(`b`)), 3, 1, 0, 3),
+ withPos(newToken(3, 1, 1, []byte(` `)), 4, 1, 0, 4),
+ withPos(newToken(3, 5, 2, []byte(`<`)), 5, 1, 0, 5),
+ withPos(newToken(2, 1, 1, []byte(` `)), 6, 1, 0, 6),
+ withPos(newToken(2, 4, 3, []byte(`<`)), 7, 1, 0, 7),
+ withPos(newToken(1, 1, 1, []byte(` `)), 8, 1, 0, 8),
+ withPos(newEOFTokenDefault(), 9, 0, 0, 9),
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ newLexEntry([]string{"default", "mode_1", "mode_2"}, "white_space", ` *`, "", false),
+ newLexEntry([]string{"default"}, "char", `.`, "", false),
+ newLexEntry([]string{"default"}, "push_1", `-> 1`, "", false),
+ newLexEntry([]string{"mode_1"}, "push_2", `-> 2`, "", false),
+ newLexEntry([]string{"mode_1"}, "pop_1", `<-`, "", false),
+ newLexEntry([]string{"mode_2"}, "pop_2", `<-`, "", false),
+ },
+ },
+ src: `-> 1 -> 2 <- <- a`,
+ tokens: []*Token{
+ withPos(newToken(1, 3, 3, []byte(`-> 1`)), 0, 4, 0, 0),
+ withPos(newToken(2, 1, 1, []byte(` `)), 4, 1, 0, 4),
+ withPos(newToken(2, 4, 2, []byte(`-> 2`)), 5, 4, 0, 5),
+ withPos(newToken(3, 1, 1, []byte(` `)), 9, 1, 0, 9),
+ withPos(newToken(3, 6, 2, []byte(`<-`)), 10, 2, 0, 10),
+ withPos(newToken(2, 1, 1, []byte(` `)), 12, 1, 0, 12),
+ withPos(newToken(2, 5, 3, []byte(`<-`)), 13, 2, 0, 13),
+ withPos(newToken(1, 1, 1, []byte(` `)), 15, 1, 0, 15),
+ withPos(newToken(1, 2, 2, []byte(`a`)), 16, 1, 0, 16),
+ withPos(newEOFTokenDefault(), 17, 0, 0, 17),
+ },
+ passiveModeTran: true,
+ tran: func(l *Lexer, tok *Token) error {
+ switch l.spec.ModeName(l.Mode()) {
+ case "default":
+ switch tok.KindID {
+ case 3: // push_1
+ l.PushMode(2)
+ }
+ case "mode_1":
+ switch tok.KindID {
+ case 4: // push_2
+ l.PushMode(3)
+ case 5: // pop_1
+ return l.PopMode()
+ }
+ case "mode_2":
+ switch tok.KindID {
+ case 6: // pop_2
+ return l.PopMode()
+ }
+ }
+ return nil
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ newLexEntry([]string{"default", "mode_1", "mode_2"}, "white_space", ` *`, "", false),
+ newLexEntry([]string{"default"}, "char", `.`, "", false),
+ newLexEntry([]string{"default"}, "push_1", `-> 1`, "mode_1", false),
+ newLexEntry([]string{"mode_1"}, "push_2", `-> 2`, "", false),
+ newLexEntry([]string{"mode_1"}, "pop_1", `<-`, "", false),
+ newLexEntry([]string{"mode_2"}, "pop_2", `<-`, "", true),
+ },
+ },
+ src: `-> 1 -> 2 <- <- a`,
+ tokens: []*Token{
+ withPos(newToken(1, 3, 3, []byte(`-> 1`)), 0, 4, 0, 0),
+ withPos(newToken(2, 1, 1, []byte(` `)), 4, 1, 0, 4),
+ withPos(newToken(2, 4, 2, []byte(`-> 2`)), 5, 4, 0, 5),
+ withPos(newToken(3, 1, 1, []byte(` `)), 9, 1, 0, 9),
+ withPos(newToken(3, 6, 2, []byte(`<-`)), 10, 2, 0, 10),
+ withPos(newToken(2, 1, 1, []byte(` `)), 12, 1, 0, 12),
+ withPos(newToken(2, 5, 3, []byte(`<-`)), 13, 2, 0, 13),
+ withPos(newToken(1, 1, 1, []byte(` `)), 15, 1, 0, 15),
+ withPos(newToken(1, 2, 2, []byte(`a`)), 16, 1, 0, 16),
+ withPos(newEOFTokenDefault(), 17, 0, 0, 17),
+ },
+ // Active mode transition and an external transition function can be used together.
+ passiveModeTran: false,
+ tran: func(l *Lexer, tok *Token) error {
+ switch l.spec.ModeName(l.Mode()) {
+ case "mode_1":
+ switch tok.KindID {
+ case 4: // push_2
+ l.PushMode(3)
+ case 5: // pop_1
+ return l.PopMode()
+ }
+ }
+ return nil
+ },
+ },
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ newLexEntryDefaultNOP("dot", spec.EscapePattern(`.`)),
+ newLexEntryDefaultNOP("star", spec.EscapePattern(`*`)),
+ newLexEntryDefaultNOP("plus", spec.EscapePattern(`+`)),
+ newLexEntryDefaultNOP("question", spec.EscapePattern(`?`)),
+ newLexEntryDefaultNOP("vbar", spec.EscapePattern(`|`)),
+ newLexEntryDefaultNOP("lparen", spec.EscapePattern(`(`)),
+ newLexEntryDefaultNOP("rparen", spec.EscapePattern(`)`)),
+ newLexEntryDefaultNOP("lbrace", spec.EscapePattern(`[`)),
+ newLexEntryDefaultNOP("backslash", spec.EscapePattern(`\`)),
+ },
+ },
+ src: `.*+?|()[\`,
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte(`.`)), 0, 1, 0, 0),
+ withPos(newTokenDefault(2, 2, []byte(`*`)), 1, 1, 0, 1),
+ withPos(newTokenDefault(3, 3, []byte(`+`)), 2, 1, 0, 2),
+ withPos(newTokenDefault(4, 4, []byte(`?`)), 3, 1, 0, 3),
+ withPos(newTokenDefault(5, 5, []byte(`|`)), 4, 1, 0, 4),
+ withPos(newTokenDefault(6, 6, []byte(`(`)), 5, 1, 0, 5),
+ withPos(newTokenDefault(7, 7, []byte(`)`)), 6, 1, 0, 6),
+ withPos(newTokenDefault(8, 8, []byte(`[`)), 7, 1, 0, 7),
+ withPos(newTokenDefault(9, 9, []byte(`\`)), 8, 1, 0, 8),
+ withPos(newEOFTokenDefault(), 9, 0, 0, 9),
+ },
+ },
+ // Character properties are available in a bracket expression.
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ newLexEntryDefaultNOP("letter", `[\p{Letter}]+`),
+ newLexEntryDefaultNOP("non_letter", `[^\p{Letter}]+`),
+ },
+ },
+ src: `foo123`,
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte(`foo`)), 0, 3, 0, 0),
+ withPos(newTokenDefault(2, 2, []byte(`123`)), 3, 3, 0, 3),
+ withPos(newEOFTokenDefault(), 6, 0, 0, 6),
+ },
+ },
+ // The driver can continue lexical analysis even after it detects an invalid token.
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ newLexEntryDefaultNOP("lower", `[a-z]+`),
+ },
+ },
+ src: `foo123bar`,
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte(`foo`)), 0, 3, 0, 0),
+ withPos(newInvalidTokenDefault([]byte(`123`)), 3, 3, 0, 3),
+ withPos(newTokenDefault(1, 1, []byte(`bar`)), 6, 3, 0, 6),
+ withPos(newEOFTokenDefault(), 9, 0, 0, 9),
+ },
+ },
+ // The driver can detect an invalid token immediately preceding an EOF.
+ {
+ lspec: &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ newLexEntryDefaultNOP("lower", `[a-z]+`),
+ },
+ },
+ src: `foo123`,
+ tokens: []*Token{
+ withPos(newTokenDefault(1, 1, []byte(`foo`)), 0, 3, 0, 0),
+ withPos(newInvalidTokenDefault([]byte(`123`)), 3, 3, 0, 3),
+ withPos(newEOFTokenDefault(), 6, 0, 0, 6),
+ },
+ },
+ }
+ for i, tt := range test {
+ for compLv := lexical.CompressionLevelMin; compLv <= lexical.CompressionLevelMax; compLv++ {
+ t.Run(fmt.Sprintf("#%v-%v", i, compLv), func(t *testing.T) {
+ clspec, err, cerrs := lexical.Compile(tt.lspec, compLv)
+ if err != nil {
+ for _, cerr := range cerrs {
+ t.Logf("%#v", cerr)
+ }
+ t.Fatalf("unexpected error: %v", err)
+ }
+ opts := []LexerOption{}
+ if tt.passiveModeTran {
+ opts = append(opts, DisableModeTransition())
+ }
+ lexer, err := NewLexer(NewLexSpec(clspec), strings.NewReader(tt.src), opts...)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ for _, eTok := range tt.tokens {
+ tok, err := lexer.Next()
+ if err != nil {
+ t.Log(err)
+ break
+ }
+ testToken(t, eTok, tok)
+
+ if tok.EOF {
+ break
+ }
+
+ if tt.tran != nil {
+ err := tt.tran(lexer, tok)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ }
+ }
+ })
+ }
+ }
+}
+
+func TestLexer_Next_WithPosition(t *testing.T) {
+ lspec := &lexical.LexSpec{
+ Entries: []*lexical.LexEntry{
+ newLexEntryDefaultNOP("newline", `\u{000A}+`),
+ newLexEntryDefaultNOP("any", `.`),
+ },
+ }
+
+ clspec, err, _ := lexical.Compile(lspec, lexical.CompressionLevelMax)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+
+ src := string([]byte{
+ 0x00,
+ 0x7F,
+ 0x0A,
+
+ 0xC2, 0x80,
+ 0xDF, 0xBF,
+ 0x0A,
+
+ 0xE0, 0xA0, 0x80,
+ 0xE0, 0xBF, 0xBF,
+ 0xE1, 0x80, 0x80,
+ 0xEC, 0xBF, 0xBF,
+ 0xED, 0x80, 0x80,
+ 0xED, 0x9F, 0xBF,
+ 0xEE, 0x80, 0x80,
+ 0xEF, 0xBF, 0xBF,
+ 0x0A,
+
+ 0xF0, 0x90, 0x80, 0x80,
+ 0xF0, 0xBF, 0xBF, 0xBF,
+ 0xF1, 0x80, 0x80, 0x80,
+ 0xF3, 0xBF, 0xBF, 0xBF,
+ 0xF4, 0x80, 0x80, 0x80,
+ 0xF4, 0x8F, 0xBF, 0xBF,
+ 0x0A,
+ 0x0A,
+ 0x0A,
+ })
+
+ expected := []*Token{
+ withPos(newTokenDefault(2, 2, []byte{0x00}), 0, 1, 0, 0),
+ withPos(newTokenDefault(2, 2, []byte{0x7F}), 1, 1, 0, 1),
+ withPos(newTokenDefault(1, 1, []byte{0x0A}), 2, 1, 0, 2),
+
+ withPos(newTokenDefault(2, 2, []byte{0xC2, 0x80}), 3, 2, 1, 0),
+ withPos(newTokenDefault(2, 2, []byte{0xDF, 0xBF}), 5, 2, 1, 1),
+ withPos(newTokenDefault(1, 1, []byte{0x0A}), 7, 1, 1, 2),
+
+ withPos(newTokenDefault(2, 2, []byte{0xE0, 0xA0, 0x80}), 8, 3, 2, 0),
+ withPos(newTokenDefault(2, 2, []byte{0xE0, 0xBF, 0xBF}), 11, 3, 2, 1),
+ withPos(newTokenDefault(2, 2, []byte{0xE1, 0x80, 0x80}), 14, 3, 2, 2),
+ withPos(newTokenDefault(2, 2, []byte{0xEC, 0xBF, 0xBF}), 17, 3, 2, 3),
+ withPos(newTokenDefault(2, 2, []byte{0xED, 0x80, 0x80}), 20, 3, 2, 4),
+ withPos(newTokenDefault(2, 2, []byte{0xED, 0x9F, 0xBF}), 23, 3, 2, 5),
+ withPos(newTokenDefault(2, 2, []byte{0xEE, 0x80, 0x80}), 26, 3, 2, 6),
+ withPos(newTokenDefault(2, 2, []byte{0xEF, 0xBF, 0xBF}), 29, 3, 2, 7),
+ withPos(newTokenDefault(1, 1, []byte{0x0A}), 32, 1, 2, 8),
+
+ withPos(newTokenDefault(2, 2, []byte{0xF0, 0x90, 0x80, 0x80}), 33, 4, 3, 0),
+ withPos(newTokenDefault(2, 2, []byte{0xF0, 0xBF, 0xBF, 0xBF}), 37, 4, 3, 1),
+ withPos(newTokenDefault(2, 2, []byte{0xF1, 0x80, 0x80, 0x80}), 41, 4, 3, 2),
+ withPos(newTokenDefault(2, 2, []byte{0xF3, 0xBF, 0xBF, 0xBF}), 45, 4, 3, 3),
+ withPos(newTokenDefault(2, 2, []byte{0xF4, 0x80, 0x80, 0x80}), 49, 4, 3, 4),
+ withPos(newTokenDefault(2, 2, []byte{0xF4, 0x8F, 0xBF, 0xBF}), 53, 4, 3, 5),
+ // When a token contains multiple line breaks, the driver sets the token position to
+ // the line number where a lexeme first appears.
+ withPos(newTokenDefault(1, 1, []byte{0x0A, 0x0A, 0x0A}), 57, 3, 3, 6),
+
+ withPos(newEOFTokenDefault(), 60, 0, 6, 0),
+ }
+
+ lexer, err := NewLexer(NewLexSpec(clspec), strings.NewReader(src))
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+
+ for _, eTok := range expected {
+ tok, err := lexer.Next()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ testToken(t, eTok, tok)
+
+ if tok.EOF {
+ break
+ }
+ }
+}
+
+func testToken(t *testing.T, expected, actual *Token) {
+ t.Helper()
+
+ if actual.ModeID != expected.ModeID ||
+ actual.KindID != expected.KindID ||
+ actual.ModeKindID != expected.ModeKindID ||
+ !bytes.Equal(actual.Lexeme, expected.Lexeme) ||
+ actual.EOF != expected.EOF ||
+ actual.Invalid != expected.Invalid {
+ t.Fatalf(`unexpected token; want: %+v, got: %+v`, expected, actual)
+ }
+
+ if actual.BytePos != expected.BytePos || actual.ByteLen != expected.ByteLen ||
+ actual.Row != expected.Row || actual.Col != expected.Col {
+ t.Fatalf(`unexpected token; want: %+v, got: %+v`, expected, actual)
+ }
+}
diff --git a/tests/unit/driver/parser/conflict_test.go b/tests/unit/driver/parser/conflict_test.go
new file mode 100644
index 0000000..0bc14d4
--- /dev/null
+++ b/tests/unit/driver/parser/conflict_test.go
@@ -0,0 +1,524 @@
+package parser
+
+import (
+ "strings"
+ "testing"
+
+ "urubu/grammar"
+ "urubu/spec/grammar/parser"
+)
+
+func TestParserWithConflicts(t *testing.T) {
+ tests := []struct {
+ caption string
+ specSrc string
+ src string
+ cst *Node
+ }{
+ {
+ caption: "when a shift/reduce conflict occurred, we prioritize the shift action",
+ specSrc: `
+#name test;
+
+expr
+ : expr assign expr
+ | id
+ ;
+
+id: "[A-Za-z0-9_]+";
+assign: '=';
+`,
+ src: `foo=bar=baz`,
+ cst: nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "foo"),
+ ),
+ termNode("assign", "="),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "bar"),
+ ),
+ termNode("assign", "="),
+ nonTermNode("expr",
+ termNode("id", "baz"),
+ ),
+ ),
+ ),
+ },
+ {
+ caption: "when a reduce/reduce conflict occurred, we prioritize the production defined earlier in the grammar",
+ specSrc: `
+#name test;
+
+s
+ : a
+ | b
+ ;
+a
+ : id
+ ;
+b
+ : id
+ ;
+
+id: "[A-Za-z0-9_]+";
+`,
+ src: `foo`,
+ cst: nonTermNode("s",
+ nonTermNode("a",
+ termNode("id", "foo"),
+ ),
+ ),
+ },
+ {
+ caption: "left associativities defined earlier in the grammar have higher precedence",
+ specSrc: `
+#name test;
+
+#prec (
+ #left mul
+ #left add
+);
+
+expr
+ : expr add expr
+ | expr mul expr
+ | id
+ ;
+
+id: "[A-Za-z0-9_]+";
+add: '+';
+mul: '*';
+`,
+ src: `a+b*c*d+e`,
+ cst: nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "a"),
+ ),
+ termNode("add", "+"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "b"),
+ ),
+ termNode("mul", "*"),
+ nonTermNode("expr",
+ termNode("id", "c"),
+ ),
+ ),
+ termNode("mul", "*"),
+ nonTermNode("expr",
+ termNode("id", "d"),
+ ),
+ ),
+ ),
+ termNode("add", "+"),
+ nonTermNode("expr",
+ termNode("id", "e"),
+ ),
+ ),
+ },
+ {
+ caption: "left associativities defined in the same line have the same precedence",
+ specSrc: `
+#name test;
+
+#prec (
+ #left add sub
+);
+
+expr
+ : expr add expr
+ | expr sub expr
+ | id
+ ;
+
+id: "[A-Za-z0-9_]+";
+add: '+';
+sub: '-';
+`,
+ src: `a-b+c+d-e`,
+ cst: nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "a"),
+ ),
+ termNode("sub", "-"),
+ nonTermNode("expr",
+ termNode("id", "b"),
+ ),
+ ),
+ termNode("add", "+"),
+ nonTermNode("expr",
+ termNode("id", "c"),
+ ),
+ ),
+ termNode("add", "+"),
+ nonTermNode("expr",
+ termNode("id", "d"),
+ ),
+ ),
+ termNode("sub", "-"),
+ nonTermNode("expr",
+ termNode("id", "e"),
+ ),
+ ),
+ },
+ {
+ caption: "right associativities defined earlier in the grammar have higher precedence",
+ specSrc: `
+#name test;
+
+#prec (
+ #right r1
+ #right r2
+);
+
+expr
+ : expr r2 expr
+ | expr r1 expr
+ | id
+ ;
+
+whitespaces #skip
+ : "[\u{0009}\u{0020}]+";
+r1
+ : 'r1';
+r2
+ : 'r2';
+id
+ : "[A-Za-z0-9_]+";
+`,
+ src: `a r2 b r1 c r1 d r2 e`,
+ cst: nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "a"),
+ ),
+ termNode("r2", "r2"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "b"),
+ ),
+ termNode("r1", "r1"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "c"),
+ ),
+ termNode("r1", "r1"),
+ nonTermNode("expr",
+ termNode("id", "d"),
+ ),
+ ),
+ ),
+ termNode("r2", "r2"),
+ nonTermNode("expr",
+ termNode("id", "e"),
+ ),
+ ),
+ ),
+ },
+ {
+ caption: "right associativities defined in the same line have the same precedence",
+ specSrc: `
+#name test;
+
+#prec (
+ #right r1 r2
+);
+
+expr
+ : expr r2 expr
+ | expr r1 expr
+ | id
+ ;
+
+whitespaces #skip
+ : "[\u{0009}\u{0020}]+";
+r1
+ : 'r1';
+r2
+ : 'r2';
+id
+ : "[A-Za-z0-9_]+";
+`,
+ src: `a r2 b r1 c r1 d r2 e`,
+ cst: nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "a"),
+ ),
+ termNode("r2", "r2"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "b"),
+ ),
+ termNode("r1", "r1"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "c"),
+ ),
+ termNode("r1", "r1"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "d"),
+ ),
+ termNode("r2", "r2"),
+ nonTermNode("expr",
+ termNode("id", "e"),
+ ),
+ ),
+ ),
+ ),
+ ),
+ },
+ {
+ caption: "terminal symbols with an #assign directive defined earlier in the grammar have higher precedence",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign a1
+ #assign a2
+);
+
+expr
+ : expr a2 expr
+ | expr a1 expr
+ | id
+ ;
+
+whitespaces #skip
+ : "[\u{0009}\u{0020}]+";
+a1
+ : 'a1';
+a2
+ : 'a2';
+id
+ : "[A-Za-z0-9_]+";
+`,
+ src: `a a2 b a1 c a1 d a2 e`,
+ cst: nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "a"),
+ ),
+ termNode("a2", "a2"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "b"),
+ ),
+ termNode("a1", "a1"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "c"),
+ ),
+ termNode("a1", "a1"),
+ nonTermNode("expr",
+ termNode("id", "d"),
+ ),
+ ),
+ ),
+ termNode("a2", "a2"),
+ nonTermNode("expr",
+ termNode("id", "e"),
+ ),
+ ),
+ ),
+ },
+ {
+ caption: "terminal symbols with an #assign directive defined in the same line have the same precedence",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign a1 a2
+);
+
+expr
+ : expr a2 expr
+ | expr a1 expr
+ | id
+ ;
+
+whitespaces #skip
+ : "[\u{0009}\u{0020}]+";
+a1
+ : 'a1';
+a2
+ : 'a2';
+id
+ : "[A-Za-z0-9_]+";
+`,
+ src: `a a2 b a1 c a1 d a2 e`,
+ cst: nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "a"),
+ ),
+ termNode("a2", "a2"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "b"),
+ ),
+ termNode("a1", "a1"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "c"),
+ ),
+ termNode("a1", "a1"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "d"),
+ ),
+ termNode("a2", "a2"),
+ nonTermNode("expr",
+ termNode("id", "e"),
+ ),
+ ),
+ ),
+ ),
+ ),
+ },
+ {
+ caption: "#left, #right, and #assign can be mixed",
+ specSrc: `
+#name test;
+
+#prec (
+ #left mul div
+ #left add sub
+ #assign else
+ #assign then
+ #right assign
+);
+
+expr
+ : expr add expr
+ | expr sub expr
+ | expr mul expr
+ | expr div expr
+ | expr assign expr
+ | if expr then expr
+ | if expr then expr else expr
+ | id
+ ;
+
+ws #skip: "[\u{0009}\u{0020}]+";
+if: 'if';
+then: 'then';
+else: 'else';
+id: "[A-Za-z0-9_]+";
+add: '+';
+sub: '-';
+mul: '*';
+div: '/';
+assign: '=';
+`,
+ src: `x = y = a + b * c - d / e + if f then if g then h else i`,
+ cst: nonTermNode(
+ "expr",
+ nonTermNode("expr",
+ termNode("id", "x"),
+ ),
+ termNode("assign", "="),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "y"),
+ ),
+ termNode("assign", "="),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "a"),
+ ),
+ termNode("add", "+"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "b"),
+ ),
+ termNode("mul", "*"),
+ nonTermNode("expr",
+ termNode("id", "c"),
+ ),
+ ),
+ ),
+ termNode("sub", "-"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "d"),
+ ),
+ termNode("div", "/"),
+ nonTermNode("expr",
+ termNode("id", "e"),
+ ),
+ ),
+ ),
+ termNode("add", "+"),
+ nonTermNode("expr",
+ termNode("if", "if"),
+ nonTermNode("expr",
+ termNode("id", "f"),
+ ),
+ termNode("then", "then"),
+ nonTermNode("expr",
+ termNode("if", "if"),
+ nonTermNode("expr",
+ termNode("id", "g"),
+ ),
+ termNode("then", "then"),
+ nonTermNode("expr",
+ termNode("id", "h"),
+ ),
+ termNode("else", "else"),
+ nonTermNode("expr",
+ termNode("id", "i"),
+ ),
+ ),
+ ),
+ ),
+ ),
+ ),
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.caption, func(t *testing.T) {
+ ast, err := parser.Parse(strings.NewReader(tt.specSrc))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ b := grammar.GrammarBuilder{
+ AST: ast,
+ }
+ cg, _, err := b.Build()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ toks, err := NewTokenStream(cg, strings.NewReader(tt.src))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ gram := NewGrammar(cg)
+ tb := NewDefaultSyntaxTreeBuilder()
+ p, err := NewParser(toks, gram, SemanticAction(NewCSTActionSet(gram, tb)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ err = p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if tt.cst != nil {
+ testTree(t, tb.Tree(), tt.cst)
+ }
+ })
+ }
+}
diff --git a/tests/unit/driver/parser/lac_test.go b/tests/unit/driver/parser/lac_test.go
new file mode 100644
index 0000000..c2368e8
--- /dev/null
+++ b/tests/unit/driver/parser/lac_test.go
@@ -0,0 +1,120 @@
+package parser
+
+import (
+ "strings"
+ "testing"
+
+ "urubu/grammar"
+ "urubu/spec/grammar/parser"
+)
+
+func TestParserWithLAC(t *testing.T) {
+ specSrc := `
+#name test;
+
+s
+ : t t
+ ;
+t
+ : c t
+ | d
+ ;
+
+c: 'c';
+d: 'd';
+`
+
+ src := `ccd`
+
+ actLogWithLAC := []string{
+ "shift/c",
+ "shift/c",
+ "shift/d",
+ "miss",
+ }
+
+ actLogWithoutLAC := []string{
+ "shift/c",
+ "shift/c",
+ "shift/d",
+ "reduce/t",
+ "reduce/t",
+ "reduce/t",
+ "miss",
+ }
+
+ ast, err := parser.Parse(strings.NewReader(specSrc))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ b := grammar.GrammarBuilder{
+ AST: ast,
+ }
+ gram, _, err := b.Build()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ t.Run("LAC is enabled", func(t *testing.T) {
+ semAct := &testSemAct{
+ gram: gram,
+ }
+
+ toks, err := NewTokenStream(gram, strings.NewReader(src))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ err = p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if len(semAct.actLog) != len(actLogWithLAC) {
+ t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithLAC, semAct.actLog)
+ }
+
+ for i, e := range actLogWithLAC {
+ if semAct.actLog[i] != e {
+ t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithLAC, semAct.actLog)
+ }
+ }
+ })
+
+ t.Run("LAC is disabled", func(t *testing.T) {
+ semAct := &testSemAct{
+ gram: gram,
+ }
+
+ toks, err := NewTokenStream(gram, strings.NewReader(src))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct), DisableLAC())
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ err = p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if len(semAct.actLog) != len(actLogWithoutLAC) {
+ t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithoutLAC, semAct.actLog)
+ }
+
+ for i, e := range actLogWithoutLAC {
+ if semAct.actLog[i] != e {
+ t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithoutLAC, semAct.actLog)
+ }
+ }
+ })
+}
diff --git a/tests/unit/driver/parser/parser_test.go b/tests/unit/driver/parser/parser_test.go
new file mode 100644
index 0000000..bca0391
--- /dev/null
+++ b/tests/unit/driver/parser/parser_test.go
@@ -0,0 +1,833 @@
+package parser
+
+import (
+ "fmt"
+ "strings"
+ "testing"
+
+ "urubu/grammar"
+ "urubu/spec/grammar/parser"
+)
+
+func termNode(kind string, text string, children ...*Node) *Node {
+ return &Node{
+ Type: NodeTypeTerminal,
+ KindName: kind,
+ Text: text,
+ Children: children,
+ }
+}
+
+func errorNode() *Node {
+ return &Node{
+ Type: NodeTypeError,
+ KindName: "error",
+ }
+}
+
+func nonTermNode(kind string, children ...*Node) *Node {
+ return &Node{
+ Type: NodeTypeNonTerminal,
+ KindName: kind,
+ Children: children,
+ }
+}
+
+func TestParser_Parse(t *testing.T) {
+ tests := []struct {
+ specSrc string
+ src string
+ synErr bool
+ cst *Node
+ ast *Node
+ }{
+ {
+ specSrc: `
+#name test;
+
+expr
+ : expr add term
+ | term
+ ;
+term
+ : term mul factor
+ | factor
+ ;
+factor
+ : l_paren expr r_paren
+ | id
+ ;
+
+add
+ : '+';
+mul
+ : '*';
+l_paren
+ : '(';
+r_paren
+ : ')';
+id
+ : "[A-Za-z_][0-9A-Za-z_]*";
+`,
+ src: `(a+(b+c))*d+e`,
+ cst: nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("term",
+ nonTermNode("term",
+ nonTermNode("factor",
+ termNode("l_paren", "("),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("term",
+ nonTermNode("factor",
+ termNode("id", "a"),
+ ),
+ ),
+ ),
+ termNode("add", "+"),
+ nonTermNode("term",
+ nonTermNode("factor",
+ termNode("l_paren", "("),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("term",
+ nonTermNode("factor",
+ termNode("id", "b"),
+ ),
+ ),
+ ),
+ termNode("add", "+"),
+ nonTermNode("term",
+ nonTermNode("factor",
+ termNode("id", "c"),
+ ),
+ ),
+ ),
+ termNode("r_paren", ")"),
+ ),
+ ),
+ ),
+ termNode("r_paren", ")"),
+ ),
+ ),
+ termNode("mul", "*"),
+ nonTermNode("factor",
+ termNode("id", "d"),
+ ),
+ ),
+ ),
+ termNode("add", "+"),
+ nonTermNode("term",
+ nonTermNode("factor",
+ termNode("id", "e"),
+ ),
+ ),
+ ),
+ },
+ // Fragments (\f{}), code point expressions (\u{}), and character property expressions (\p{}) are
+ // not allowed in string literals.
+ {
+ specSrc: `
+#name test;
+
+s
+ : a b c
+ ;
+
+a
+ : '\f{foo}';
+b
+ : '\u{0000}';
+c
+ : '\p{gc=Letter}';
+`,
+ src: `\f{foo}\u{0000}\p{gc=Letter}`,
+ cst: nonTermNode("s",
+ termNode("a", `\f{foo}`),
+ termNode("b", `\u{0000}`),
+ termNode("c", `\p{gc=Letter}`),
+ ),
+ },
+ // The driver can reduce productions that have the empty alternative and can generate a CST (and AST) node.
+ {
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+foo
+ :
+ ;
+bar
+ : bar_text
+ |
+ ;
+bar_text: "bar";
+`,
+ src: ``,
+ cst: nonTermNode("s",
+ nonTermNode("foo"),
+ nonTermNode("bar"),
+ ),
+ },
+ // The driver can reduce productions that have the empty alternative and can generate a CST (and AST) node.
+ {
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+foo
+ :
+ ;
+bar
+ : bar_text
+ |
+ ;
+
+bar_text
+ : "bar";
+`,
+ src: `bar`,
+ cst: nonTermNode("s",
+ nonTermNode("foo"),
+ nonTermNode("bar",
+ termNode("bar_text", "bar"),
+ ),
+ ),
+ },
+ // A production can have multiple alternative productions.
+ {
+ specSrc: `
+#name test;
+
+#prec (
+ #assign $uminus
+ #left mul div
+ #left add sub
+);
+
+expr
+ : expr add expr
+ | expr sub expr
+ | expr mul expr
+ | expr div expr
+ | int
+ | sub int #prec $uminus // This 'sub' means the unary minus symbol.
+ ;
+
+int
+ : "0|[1-9][0-9]*";
+add
+ : '+';
+sub
+ : '-';
+mul
+ : '*';
+div
+ : '/';
+`,
+ src: `-1*-2+3-4/5`,
+ ast: nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("sub", "-"),
+ termNode("int", "1"),
+ ),
+ termNode("mul", "*"),
+ nonTermNode("expr",
+ termNode("sub", "-"),
+ termNode("int", "2"),
+ ),
+ ),
+ termNode("add", "+"),
+ nonTermNode("expr",
+ termNode("int", "3"),
+ ),
+ ),
+ termNode("sub", "-"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("int", "4"),
+ ),
+ termNode("div", "/"),
+ nonTermNode("expr",
+ termNode("int", "5"),
+ ),
+ ),
+ ),
+ },
+ // A lexical production can have multiple production directives.
+ {
+ specSrc: `
+#name test;
+
+s
+ : push_a push_b pop pop
+ ;
+
+push_a #mode default #push a
+ : '->a';
+push_b #mode a #push b
+ : '->b';
+pop #mode a b #pop
+ : '<-';
+`,
+ src: `->a->b<-<-`,
+ ast: nonTermNode("s",
+ termNode("push_a", "->a"),
+ termNode("push_b", "->b"),
+ termNode("pop", "<-"),
+ termNode("pop", "<-"),
+ ),
+ },
+ {
+ specSrc: `
+#name test;
+
+mode_tran_seq
+ : mode_tran_seq mode_tran
+ | mode_tran
+ ;
+mode_tran
+ : push_m1
+ | push_m2
+ | pop_m1
+ | pop_m2
+ ;
+
+push_m1 #push m1
+ : "->";
+push_m2 #mode m1 #push m2
+ : "-->";
+pop_m1 #mode m1 #pop
+ : "<-";
+pop_m2 #mode m2 #pop
+ : "<--";
+whitespace #mode default m1 m2 #skip
+ : "\u{0020}+";
+`,
+ src: ` -> --> <-- <- `,
+ },
+ {
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+
+foo
+ : "foo";
+bar #mode default
+ : "bar";
+`,
+ src: `foobar`,
+ },
+ // When #push and #pop are applied to the same symbol, #pop will run first, then #push.
+ {
+ specSrc: `
+#name test;
+
+s
+ : foo bar baz
+ ;
+
+foo #push m1
+ : 'foo';
+bar #mode m1 #pop #push m2
+ : 'bar';
+baz #mode m2
+ : 'baz';
+`,
+ src: `foobarbaz`,
+ ast: nonTermNode("s",
+ termNode("foo", "foo"),
+ termNode("bar", "bar"),
+ termNode("baz", "baz"),
+ ),
+ },
+ // When #push and #pop are applied to the same symbol, #pop will run first, then #push, even if #push appears first
+ // in a definition. That is, the order in which #push and #pop appear in grammar has nothing to do with the order in which
+ // they are executed.
+ {
+ specSrc: `
+#name test;
+
+s
+ : foo bar baz
+ ;
+
+foo #push m1
+ : 'foo';
+bar #mode m1 #push m2 #pop
+ : 'bar';
+baz #mode m2
+ : 'baz';
+`,
+ src: `foobarbaz`,
+ ast: nonTermNode("s",
+ termNode("foo", "foo"),
+ termNode("bar", "bar"),
+ termNode("baz", "baz"),
+ ),
+ },
+ // The parser can skips specified tokens.
+ {
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+
+foo
+ : "foo";
+bar
+ : "bar";
+white_space #skip
+ : "[\u{0009}\u{0020}]+";
+`,
+ src: `foo bar`,
+ },
+ // A grammar can contain fragments.
+ {
+ specSrc: `
+#name test;
+
+s
+ : tagline
+ ;
+tagline
+ : "\f{words} IS OUT THERE.";
+fragment words
+ : "[A-Za-z\u{0020}]+";
+`,
+ src: `THE TRUTH IS OUT THERE.`,
+ },
+ // A grammar can contain ast actions.
+ {
+ specSrc: `
+#name test;
+
+list
+ : l_bracket elems r_bracket #ast elems...
+ ;
+elems
+ : elems comma id #ast elems... id
+ | id
+ ;
+
+whitespace #skip
+ : "\u{0020}+";
+l_bracket
+ : '[';
+r_bracket
+ : ']';
+comma
+ : ',';
+id
+ : "[A-Za-z]+";
+`,
+ src: `[Byers, Frohike, Langly]`,
+ cst: nonTermNode("list",
+ termNode("x_1", "["),
+ nonTermNode("elems",
+ nonTermNode("elems",
+ nonTermNode("elems",
+ termNode("id", "Byers"),
+ ),
+ termNode("x_3", ","),
+ termNode("id", "Frohike"),
+ ),
+ termNode("x_3", ","),
+ termNode("id", "Langly"),
+ ),
+ termNode("x_2", "]"),
+ ),
+ ast: nonTermNode("list",
+ termNode("id", "Byers"),
+ termNode("id", "Frohike"),
+ termNode("id", "Langly"),
+ ),
+ },
+ // The '...' operator can expand child nodes.
+ {
+ specSrc: `
+#name test;
+
+s
+ : a #ast a...
+ ;
+a
+ : a comma foo #ast a... foo
+ | foo
+ ;
+
+comma
+ : ',';
+foo
+ : 'foo';
+`,
+ src: `foo,foo,foo`,
+ ast: nonTermNode("s",
+ termNode("foo", "foo"),
+ termNode("foo", "foo"),
+ termNode("foo", "foo"),
+ ),
+ },
+ // The '...' operator also can applied to an element having no children.
+ {
+ specSrc: `
+#name test;
+
+s
+ : a semi_colon #ast a...
+ ;
+a
+ :
+ ;
+
+semi_colon
+ : ';';
+`,
+ src: `;`,
+ ast: nonTermNode("s"),
+ },
+ // A label can be a parameter of #ast directive.
+ {
+ specSrc: `
+#name test;
+
+#prec (
+ #left add sub
+);
+
+expr
+ : expr@lhs add expr@rhs #ast add lhs rhs
+ | expr@lhs sub expr@rhs #ast sub lhs rhs
+ | num
+ ;
+
+add
+ : '+';
+sub
+ : '-';
+num
+ : "0|[1-9][0-9]*";
+`,
+ src: `1+2-3`,
+ ast: nonTermNode("expr",
+ termNode("sub", "-"),
+ nonTermNode("expr",
+ termNode("add", "+"),
+ nonTermNode("expr",
+ termNode("num", "1"),
+ ),
+ nonTermNode("expr",
+ termNode("num", "2"),
+ ),
+ ),
+ nonTermNode("expr",
+ termNode("num", "3"),
+ ),
+ ),
+ },
+ // An AST can contain a symbol name, even if the symbol has a label. That is, unused labels are allowed.
+ {
+ specSrc: `
+#name test;
+
+s
+ : foo@x semi_colon #ast foo
+ ;
+
+semi_colon
+ : ';';
+foo
+ : 'foo';
+`,
+ src: `foo;`,
+ ast: nonTermNode("s",
+ termNode("foo", "foo"),
+ ),
+ },
+ // A production has the same precedence and associativity as the right-most terminal symbol.
+ {
+ specSrc: `
+#name test;
+
+#prec (
+ #left add
+);
+
+expr
+ : expr add expr // This alternative has the same precedence and associativiry as 'add'.
+ | int
+ ;
+
+ws #skip
+ : "[\u{0009}\u{0020}]+";
+int
+ : "0|[1-9][0-9]*";
+add
+ : '+';
+`,
+ // This source is recognized as the following structure because the production `expr → expr add expr` has the same
+ // precedence and associativity as the symbol 'add'.
+ //
+ // ((1+2)+3)
+ //
+ // If the symbol doesn't have the precedence and left associativity, the production also doesn't have the precedence
+ // and associativity and this source will be recognized as the following structure.
+ //
+ // (1+(2+3))
+ src: `1+2+3`,
+ ast: nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("int", "1"),
+ ),
+ termNode("add", "+"),
+ nonTermNode("expr",
+ termNode("int", "2"),
+ ),
+ ),
+ termNode("add", "+"),
+ nonTermNode("expr",
+ termNode("int", "3"),
+ ),
+ ),
+ },
+ // The 'prec' directive can set precedence of a production.
+ {
+ specSrc: `
+#name test;
+
+#prec (
+ #assign $uminus
+ #left mul div
+ #left add sub
+);
+
+expr
+ : expr add expr
+ | expr sub expr
+ | expr mul expr
+ | expr div expr
+ | int
+ | sub int #prec $uminus // This 'sub' means a unary minus symbol.
+ ;
+
+ws #skip
+ : "[\u{0009}\u{0020}]+";
+int
+ : "0|[1-9][0-9]*";
+add
+ : '+';
+sub
+ : '-';
+mul
+ : '*';
+div
+ : '/';
+`,
+ // This source is recognized as the following structure because the production `expr → sub expr`
+ // has the `#prec mul` directive and has the same precedence of the symbol `mul`.
+ //
+ // (((-1) * 20) / 5)
+ //
+ // If the production doesn't have the `#prec` directive, this source will be recognized as
+ // the following structure.
+ //
+ // (- ((1 * 20) / 5))
+ src: `-1*20/5`,
+ cst: nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("sub", "-"),
+ termNode("int", "1"),
+ ),
+ termNode("mul", "*"),
+ nonTermNode("expr",
+ termNode("int", "20"),
+ ),
+ ),
+ termNode("div", "/"),
+ nonTermNode("expr",
+ termNode("int", "5"),
+ ),
+ ),
+ },
+ // The grammar can contain the 'error' symbol.
+ {
+ specSrc: `
+#name test;
+
+s
+ : id id id semi_colon
+ | error semi_colon
+ ;
+
+ws #skip
+ : "[\u{0009}\u{0020}]+";
+semi_colon
+ : ';';
+id
+ : "[A-Za-z_]+";
+`,
+ src: `foo bar baz ;`,
+ },
+ // The 'error' symbol can appear in an #ast directive.
+ {
+ specSrc: `
+#name test;
+
+s
+ : foo semi_colon
+ | error semi_colon #ast error
+ ;
+
+semi_colon
+ : ';';
+foo
+ : 'foo';
+`,
+ src: `bar;`,
+ synErr: true,
+ ast: nonTermNode("s",
+ errorNode(),
+ ),
+ },
+ // The 'error' symbol can have a label, and an #ast can reference it.
+ {
+ specSrc: `
+#name test;
+
+s
+ : foo semi_colon
+ | error@e semi_colon #ast e
+ ;
+
+semi_colon
+ : ';';
+foo
+ : 'foo';
+`,
+ src: `bar;`,
+ synErr: true,
+ ast: nonTermNode("s",
+ errorNode(),
+ ),
+ },
+ // The grammar can contain the 'recover' directive.
+ {
+ specSrc: `
+#name test;
+
+seq
+ : seq elem
+ | elem
+ ;
+elem
+ : id id id semi_colon
+ | error semi_colon #recover
+ ;
+
+ws #skip
+ : "[\u{0009}\u{0020}]+";
+semi_colon
+ : ';';
+id
+ : "[A-Za-z_]+";
+`,
+ src: `a b c ; d e f ;`,
+ },
+ // The same label can be used between different alternatives.
+ {
+ specSrc: `
+#name test;
+
+s
+ : foo@x bar
+ | foo@x
+ ;
+
+foo: 'foo';
+bar: 'bar';
+`,
+ src: `foo`,
+ },
+ }
+
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+ ast, err := parser.Parse(strings.NewReader(tt.specSrc))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ b := grammar.GrammarBuilder{
+ AST: ast,
+ }
+ cg, _, err := b.Build()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ toks, err := NewTokenStream(cg, strings.NewReader(tt.src))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ gram := NewGrammar(cg)
+ tb := NewDefaultSyntaxTreeBuilder()
+ var opt []ParserOption
+ switch {
+ case tt.ast != nil:
+ opt = append(opt, SemanticAction(NewASTActionSet(gram, tb)))
+ case tt.cst != nil:
+ opt = append(opt, SemanticAction(NewCSTActionSet(gram, tb)))
+ }
+ p, err := NewParser(toks, gram, opt...)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ err = p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if !tt.synErr && len(p.SyntaxErrors()) > 0 {
+ for _, synErr := range p.SyntaxErrors() {
+ t.Fatalf("unexpected syntax errors occurred: %v", synErr)
+ }
+ }
+
+ switch {
+ case tt.ast != nil:
+ testTree(t, tb.Tree(), tt.ast)
+ case tt.cst != nil:
+ testTree(t, tb.Tree(), tt.cst)
+ }
+ })
+ }
+}
+
+func testTree(t *testing.T, node, expected *Node) {
+ t.Helper()
+
+ if node.Type != expected.Type || node.KindName != expected.KindName || node.Text != expected.Text {
+ t.Fatalf("unexpected node; want: %+v, got: %+v", expected, node)
+ }
+ if len(node.Children) != len(expected.Children) {
+ t.Fatalf("unexpected children; want: %v, got: %v", len(expected.Children), len(node.Children))
+ }
+ for i, c := range node.Children {
+ testTree(t, c, expected.Children[i])
+ }
+}
diff --git a/tests/unit/driver/parser/semantic_action_test.go b/tests/unit/driver/parser/semantic_action_test.go
new file mode 100644
index 0000000..cb3ee70
--- /dev/null
+++ b/tests/unit/driver/parser/semantic_action_test.go
@@ -0,0 +1,227 @@
+package parser
+
+import (
+ "fmt"
+ "strings"
+ "testing"
+
+ "urubu/grammar"
+ spec "urubu/spec/grammar"
+ "urubu/spec/grammar/parser"
+)
+
+type testSemAct struct {
+ gram *spec.CompiledGrammar
+ actLog []string
+}
+
+func (a *testSemAct) Shift(tok VToken, recovered bool) {
+ t := a.gram.Syntactic.Terminals[tok.TerminalID()]
+ if recovered {
+ a.actLog = append(a.actLog, fmt.Sprintf("shift/%v/recovered", t))
+ } else {
+ a.actLog = append(a.actLog, fmt.Sprintf("shift/%v", t))
+ }
+}
+
+func (a *testSemAct) Reduce(prodNum int, recovered bool) {
+ lhsSym := a.gram.Syntactic.LHSSymbols[prodNum]
+ lhsText := a.gram.Syntactic.NonTerminals[lhsSym]
+ if recovered {
+ a.actLog = append(a.actLog, fmt.Sprintf("reduce/%v/recovered", lhsText))
+ } else {
+ a.actLog = append(a.actLog, fmt.Sprintf("reduce/%v", lhsText))
+ }
+}
+
+func (a *testSemAct) Accept() {
+ a.actLog = append(a.actLog, "accept")
+}
+
+func (a *testSemAct) TrapAndShiftError(cause VToken, popped int) {
+ a.actLog = append(a.actLog, fmt.Sprintf("trap/%v/shift/error", popped))
+}
+
+func (a *testSemAct) MissError(cause VToken) {
+ a.actLog = append(a.actLog, "miss")
+}
+
+func TestParserWithSemanticAction(t *testing.T) {
+ specSrcWithErrorProd := `
+#name test;
+
+seq
+ : seq elem semicolon
+ | elem semicolon
+ | error star star semicolon
+ | error semicolon #recover
+ ;
+elem
+ : char char char
+ ;
+
+ws #skip
+ : "[\u{0009}\u{0020}]+";
+semicolon
+ : ';';
+star
+ : '*';
+char
+ : "[a-z]";
+`
+
+ specSrcWithoutErrorProd := `
+#name test;
+
+seq
+ : seq elem semicolon
+ | elem semicolon
+ ;
+elem
+ : char char char
+ ;
+
+ws #skip
+ : "[\u{0009}\u{0020}]+";
+semicolon
+ : ';';
+char
+ : "[a-z]";
+`
+
+ tests := []struct {
+ caption string
+ specSrc string
+ src string
+ actLog []string
+ }{
+ {
+ caption: "when an input contains no syntax error, the driver calls `Shift`, `Reduce`, and `Accept`.",
+ specSrc: specSrcWithErrorProd,
+ src: `a b c; d e f;`,
+ actLog: []string{
+ "shift/char",
+ "shift/char",
+ "shift/char",
+ "reduce/elem",
+ "shift/semicolon",
+ "reduce/seq",
+
+ "shift/char",
+ "shift/char",
+ "shift/char",
+ "reduce/elem",
+ "shift/semicolon",
+ "reduce/seq",
+
+ "accept",
+ },
+ },
+ {
+ caption: "when a grammar has `error` symbol, the driver calls `TrapAndShiftError`.",
+ specSrc: specSrcWithErrorProd,
+ src: `a; b !; c d !; e ! * *; h i j;`,
+ actLog: []string{
+ "shift/char",
+ "trap/1/shift/error",
+ "shift/semicolon",
+ "reduce/seq/recovered",
+
+ "shift/char",
+ "trap/2/shift/error",
+ "shift/semicolon",
+ "reduce/seq/recovered",
+
+ "shift/char",
+ "shift/char",
+ "trap/3/shift/error",
+ "shift/semicolon",
+ "reduce/seq/recovered",
+
+ "shift/char",
+ "trap/2/shift/error",
+ "shift/star",
+ "shift/star",
+ // When the driver shifts three times, it recovers from an error.
+ "shift/semicolon/recovered",
+ "reduce/seq",
+
+ "shift/char",
+ "shift/char",
+ "shift/char",
+ "reduce/elem",
+ "shift/semicolon",
+ "reduce/seq",
+
+ // Even if the input contains syntax errors, the driver calls `Accept` when the input is accepted
+ // according to the error production.
+ "accept",
+ },
+ },
+ {
+ caption: "when the input doesn't meet the error production, the driver calls `MissError`.",
+ specSrc: specSrcWithErrorProd,
+ src: `a !`,
+ actLog: []string{
+ "shift/char",
+ "trap/1/shift/error",
+
+ "miss",
+ },
+ },
+ {
+ caption: "when a syntax error isn't trapped, the driver calls `MissError`.",
+ specSrc: specSrcWithoutErrorProd,
+ src: `a !`,
+ actLog: []string{
+ "shift/char",
+
+ "miss",
+ },
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.caption, func(t *testing.T) {
+ ast, err := parser.Parse(strings.NewReader(tt.specSrc))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ b := grammar.GrammarBuilder{
+ AST: ast,
+ }
+ gram, _, err := b.Build()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ toks, err := NewTokenStream(gram, strings.NewReader(tt.src))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ semAct := &testSemAct{
+ gram: gram,
+ }
+ p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ err = p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if len(semAct.actLog) != len(tt.actLog) {
+ t.Fatalf("unexpected action log; want: %+v, got: %+v", tt.actLog, semAct.actLog)
+ }
+
+ for i, e := range tt.actLog {
+ if semAct.actLog[i] != e {
+ t.Fatalf("unexpected action log; want: %+v, got: %+v", tt.actLog, semAct.actLog)
+ }
+ }
+ })
+ }
+}
diff --git a/tests/unit/driver/parser/syntax_error_test.go b/tests/unit/driver/parser/syntax_error_test.go
new file mode 100644
index 0000000..90e5bd2
--- /dev/null
+++ b/tests/unit/driver/parser/syntax_error_test.go
@@ -0,0 +1,306 @@
+package parser
+
+import (
+ "fmt"
+ "sort"
+ "strings"
+ "testing"
+
+ "urubu/grammar"
+ "urubu/spec/grammar/parser"
+)
+
+func TestParserWithSyntaxErrors(t *testing.T) {
+ tests := []struct {
+ caption string
+ specSrc string
+ src string
+ synErrCount int
+ }{
+ {
+ caption: "the parser can report a syntax error",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ src: `bar`,
+ synErrCount: 1,
+ },
+ {
+ caption: "when the parser reduced a production having the reduce directive, the parser will recover from an error state",
+ specSrc: `
+#name test;
+
+seq
+ : seq elem semi_colon
+ | elem semi_colon
+ | error semi_colon #recover
+ ;
+elem
+ : a b c
+ ;
+
+ws #skip
+ : "[\u{0009}\u{0020}]+";
+semi_colon
+ : ';';
+a
+ : 'a';
+b
+ : 'b';
+c
+ : 'c';
+`,
+ src: `!; a!; ab!;`,
+ synErrCount: 3,
+ },
+ {
+ caption: "After the parser shifts the error symbol, symbols are ignored until a symbol the parser can perform shift appears",
+ specSrc: `
+#name test;
+
+seq
+ : seq elem semi_colon
+ | elem semi_colon
+ | error semi_colon #recover
+ ;
+elem
+ : a b c
+ ;
+
+ws #skip
+ : "[\u{0009}\u{0020}]+";
+semi_colon
+ : ';';
+a
+ : 'a';
+b
+ : 'b';
+c
+ : 'c';
+`,
+ // After the parser trasits to the error state reading the first invalid symbol ('!'),
+ // the second and third invalid symbols ('!') are ignored.
+ src: `! ! !; a!; ab!;`,
+ synErrCount: 3,
+ },
+ {
+ caption: "when the parser performs shift three times, the parser recovers from the error state",
+ specSrc: `
+#name test;
+
+seq
+ : seq elem semi_colon
+ | elem semi_colon
+ | error star star semi_colon
+ ;
+elem
+ : a b c
+ ;
+
+ws #skip
+ : "[\u{0009}\u{0020}]+";
+semi_colon
+ : ';';
+star
+ : '*';
+a
+ : 'a';
+b
+ : 'b';
+c
+ : 'c';
+`,
+ src: `!**; a!**; ab!**; abc!`,
+ synErrCount: 4,
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+ ast, err := parser.Parse(strings.NewReader(tt.specSrc))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ b := grammar.GrammarBuilder{
+ AST: ast,
+ }
+ gram, _, err := b.Build()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ toks, err := NewTokenStream(gram, strings.NewReader(tt.src))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ p, err := NewParser(toks, NewGrammar(gram))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ err = p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ synErrs := p.SyntaxErrors()
+ if len(synErrs) != tt.synErrCount {
+ t.Fatalf("unexpected syntax error; want: %v error(s), got: %v error(s)", tt.synErrCount, len(synErrs))
+ }
+ })
+ }
+}
+
+func TestParserWithSyntaxErrorAndExpectedLookahead(t *testing.T) {
+ tests := []struct {
+ caption string
+ specSrc string
+ src string
+ cause string
+ expected []string
+ }{
+ {
+ caption: "the parser reports an expected lookahead symbol",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ src: `bar`,
+ cause: `bar`,
+ expected: []string{
+ "foo",
+ },
+ },
+ {
+ caption: "the parser reports expected lookahead symbols",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ | bar
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+`,
+ src: `baz`,
+ cause: `baz`,
+ expected: []string{
+ "foo",
+ "bar",
+ },
+ },
+ {
+ caption: "the parser may report the EOF as an expected lookahead symbol",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ src: `foobar`,
+ cause: `bar`,
+ expected: []string{
+ "<eof>",
+ },
+ },
+ {
+ caption: "the parser may report the EOF and others as expected lookahead symbols",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ |
+ ;
+
+foo
+ : 'foo';
+`,
+ src: `bar`,
+ cause: `bar`,
+ expected: []string{
+ "foo",
+ "<eof>",
+ },
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+ ast, err := parser.Parse(strings.NewReader(tt.specSrc))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ b := grammar.GrammarBuilder{
+ AST: ast,
+ }
+ gram, _, err := b.Build()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ toks, err := NewTokenStream(gram, strings.NewReader(tt.src))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ p, err := NewParser(toks, NewGrammar(gram))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ err = p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ synErrs := p.SyntaxErrors()
+ if synErrs == nil {
+ t.Fatalf("expected one syntax error, but it didn't occur")
+ }
+ if len(synErrs) != 1 {
+ t.Fatalf("too many syntax errors: %v errors", len(synErrs))
+ }
+ synErr := synErrs[0]
+ if string(synErr.Token.Lexeme()) != tt.cause {
+ t.Fatalf("unexpected lexeme: want: %v, got: %v", tt.cause, string(synErr.Token.Lexeme()))
+ }
+ if len(synErr.ExpectedTerminals) != len(tt.expected) {
+ t.Fatalf("unexpected lookahead symbols: want: %v, got: %v", tt.expected, synErr.ExpectedTerminals)
+ }
+ sort.Slice(tt.expected, func(i, j int) bool {
+ return tt.expected[i] < tt.expected[j]
+ })
+ sort.Slice(synErr.ExpectedTerminals, func(i, j int) bool {
+ return synErr.ExpectedTerminals[i] < synErr.ExpectedTerminals[j]
+ })
+ for i, e := range tt.expected {
+ if synErr.ExpectedTerminals[i] != e {
+ t.Errorf("unexpected lookahead symbol: want: %v, got: %v", e, synErr.ExpectedTerminals[i])
+ }
+ }
+ })
+ }
+}
diff --git a/tests/unit/grammar/first_test.go b/tests/unit/grammar/first_test.go
new file mode 100644
index 0000000..9625ef6
--- /dev/null
+++ b/tests/unit/grammar/first_test.go
@@ -0,0 +1,219 @@
+package grammar
+
+import (
+ "strings"
+ "testing"
+
+ "urubu/grammar/symbol"
+ "urubu/spec/grammar/parser"
+)
+
+type first struct {
+ lhs string
+ num int
+ dot int
+ symbols []string
+ empty bool
+}
+
+func TestGenFirst(t *testing.T) {
+ tests := []struct {
+ caption string
+ src string
+ first []first
+ }{
+ {
+ caption: "productions contain only non-empty productions",
+ src: `
+#name test;
+
+expr
+ : expr add term
+ | term
+ ;
+term
+ : term mul factor
+ | factor
+ ;
+factor
+ : l_paren expr r_paren
+ | id
+ ;
+add: "\+";
+mul: "\*";
+l_paren: "\(";
+r_paren: "\)";
+id: "[A-Za-z_][0-9A-Za-z_]*";
+`,
+ first: []first{
+ {lhs: "expr'", num: 0, dot: 0, symbols: []string{"l_paren", "id"}},
+ {lhs: "expr", num: 0, dot: 0, symbols: []string{"l_paren", "id"}},
+ {lhs: "expr", num: 0, dot: 1, symbols: []string{"add"}},
+ {lhs: "expr", num: 0, dot: 2, symbols: []string{"l_paren", "id"}},
+ {lhs: "expr", num: 1, dot: 0, symbols: []string{"l_paren", "id"}},
+ {lhs: "term", num: 0, dot: 0, symbols: []string{"l_paren", "id"}},
+ {lhs: "term", num: 0, dot: 1, symbols: []string{"mul"}},
+ {lhs: "term", num: 0, dot: 2, symbols: []string{"l_paren", "id"}},
+ {lhs: "term", num: 1, dot: 0, symbols: []string{"l_paren", "id"}},
+ {lhs: "factor", num: 0, dot: 0, symbols: []string{"l_paren"}},
+ {lhs: "factor", num: 0, dot: 1, symbols: []string{"l_paren", "id"}},
+ {lhs: "factor", num: 0, dot: 2, symbols: []string{"r_paren"}},
+ {lhs: "factor", num: 1, dot: 0, symbols: []string{"id"}},
+ },
+ },
+ {
+ caption: "productions contain the empty start production",
+ src: `
+#name test;
+
+s
+ :
+ ;
+`,
+ first: []first{
+ {lhs: "s'", num: 0, dot: 0, symbols: []string{}, empty: true},
+ {lhs: "s", num: 0, dot: 0, symbols: []string{}, empty: true},
+ },
+ },
+ {
+ caption: "productions contain an empty production",
+ src: `
+#name test;
+
+s
+ : foo bar
+ ;
+foo
+ :
+ ;
+bar: "bar";
+`,
+ first: []first{
+ {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: false},
+ {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: false},
+ {lhs: "foo", num: 0, dot: 0, symbols: []string{}, empty: true},
+ },
+ },
+ {
+ caption: "a start production contains a non-empty alternative and empty alternative",
+ src: `
+#name test;
+
+s
+ : foo
+ |
+ ;
+foo: "foo";
+`,
+ first: []first{
+ {lhs: "s'", num: 0, dot: 0, symbols: []string{"foo"}, empty: true},
+ {lhs: "s", num: 0, dot: 0, symbols: []string{"foo"}},
+ {lhs: "s", num: 1, dot: 0, symbols: []string{}, empty: true},
+ },
+ },
+ {
+ caption: "a production contains non-empty alternative and empty alternative",
+ src: `
+#name test;
+
+s
+ : foo
+ ;
+foo
+ : bar
+ |
+ ;
+bar: "bar";
+`,
+ first: []first{
+ {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: true},
+ {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: true},
+ {lhs: "foo", num: 0, dot: 0, symbols: []string{"bar"}},
+ {lhs: "foo", num: 1, dot: 0, symbols: []string{}, empty: true},
+ },
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.caption, func(t *testing.T) {
+ fst, gram := genActualFirst(t, tt.src)
+
+ for _, ttFirst := range tt.first {
+ lhsSym, ok := gram.symbolTable.ToSymbol(ttFirst.lhs)
+ if !ok {
+ t.Fatalf("a symbol was not found; symbol: %v", ttFirst.lhs)
+ }
+
+ prod, ok := gram.productionSet.findByLHS(lhsSym)
+ if !ok {
+ t.Fatalf("a production was not found; LHS: %v (%v)", ttFirst.lhs, lhsSym)
+ }
+
+ actualFirst, err := fst.find(prod[ttFirst.num], ttFirst.dot)
+ if err != nil {
+ t.Fatalf("failed to get a FIRST set; LHS: %v (%v), num: %v, dot: %v, error: %v", ttFirst.lhs, lhsSym, ttFirst.num, ttFirst.dot, err)
+ }
+
+ expectedFirst := genExpectedFirstEntry(t, ttFirst.symbols, ttFirst.empty, gram.symbolTable)
+
+ testFirst(t, actualFirst, expectedFirst)
+ }
+ })
+ }
+}
+
+func genActualFirst(t *testing.T, src string) (*firstSet, *Grammar) {
+ ast, err := parser.Parse(strings.NewReader(src))
+ if err != nil {
+ t.Fatal(err)
+ }
+ b := GrammarBuilder{
+ AST: ast,
+ }
+ gram, err := b.build()
+ if err != nil {
+ t.Fatal(err)
+ }
+ fst, err := genFirstSet(gram.productionSet)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if fst == nil {
+ t.Fatal("genFiest returned nil without any error")
+ }
+
+ return fst, gram
+}
+
+func genExpectedFirstEntry(t *testing.T, symbols []string, empty bool, symTab *symbol.SymbolTableReader) *firstEntry {
+ t.Helper()
+
+ entry := newFirstEntry()
+ if empty {
+ entry.addEmpty()
+ }
+ for _, sym := range symbols {
+ symSym, ok := symTab.ToSymbol(sym)
+ if !ok {
+ t.Fatalf("a symbol was not found; symbol: %v", sym)
+ }
+ entry.add(symSym)
+ }
+
+ return entry
+}
+
+func testFirst(t *testing.T, actual, expected *firstEntry) {
+ if actual.empty != expected.empty {
+ t.Errorf("empty is mismatched\nwant: %v\ngot: %v", expected.empty, actual.empty)
+ }
+
+ if len(actual.symbols) != len(expected.symbols) {
+ t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols)
+ }
+
+ for eSym := range expected.symbols {
+ if _, ok := actual.symbols[eSym]; !ok {
+ t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols)
+ }
+ }
+}
diff --git a/tests/unit/grammar/grammar_test.go b/tests/unit/grammar/grammar_test.go
new file mode 100644
index 0000000..ddedb27
--- /dev/null
+++ b/tests/unit/grammar/grammar_test.go
@@ -0,0 +1,3381 @@
+package grammar
+
+import (
+ "strings"
+ "testing"
+
+ verr "urubu/error"
+ "urubu/spec/grammar/parser"
+)
+
+func TestGrammarBuilderOK(t *testing.T) {
+ type okTest struct {
+ caption string
+ specSrc string
+ validate func(t *testing.T, g *Grammar)
+ }
+
+ nameTests := []*okTest{
+ {
+ caption: "the `#name` can be the same identifier as a non-terminal symbol",
+ specSrc: `
+#name s;
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ validate: func(t *testing.T, g *Grammar) {
+ expected := "s"
+ if g.name != expected {
+ t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name)
+ }
+ },
+ },
+ {
+ caption: "the `#name` can be the same identifier as a terminal symbol",
+ specSrc: `
+#name foo;
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ validate: func(t *testing.T, g *Grammar) {
+ expected := "foo"
+ if g.name != expected {
+ t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name)
+ }
+ },
+ },
+ {
+ caption: "the `#name` can be the same identifier as the error symbol",
+ specSrc: `
+#name error;
+
+s
+ : foo
+ | error
+ ;
+
+foo
+ : 'foo';
+`,
+ validate: func(t *testing.T, g *Grammar) {
+ expected := "error"
+ if g.name != expected {
+ t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name)
+ }
+ },
+ },
+ {
+ caption: "the `#name` can be the same identifier as a fragment",
+ specSrc: `
+#name f;
+
+s
+ : foo
+ ;
+
+foo
+ : "\f{f}";
+fragment f
+ : 'foo';
+`,
+ validate: func(t *testing.T, g *Grammar) {
+ expected := "f"
+ if g.name != expected {
+ t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name)
+ }
+ },
+ },
+ }
+
+ modeTests := []*okTest{
+ {
+ caption: "a `#mode` can be the same identifier as a non-terminal symbol",
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+
+foo #push s
+ : 'foo';
+bar #mode s
+ : 'bar';
+`,
+ validate: func(t *testing.T, g *Grammar) {
+ kind := "bar"
+ expectedMode := "s"
+ for _, e := range g.lexSpec.Entries {
+ if e.Kind.String() == kind && e.Modes[0].String() == expectedMode {
+ return
+ }
+ }
+ t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode)
+ },
+ },
+ {
+ caption: "a `#mode` can be the same identifier as a terminal symbol",
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+
+foo #push bar
+ : 'foo';
+bar #mode bar
+ : 'bar';
+`,
+ validate: func(t *testing.T, g *Grammar) {
+ kind := "bar"
+ expectedMode := "bar"
+ for _, e := range g.lexSpec.Entries {
+ if e.Kind.String() == kind && e.Modes[0].String() == expectedMode {
+ return
+ }
+ }
+ t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode)
+ },
+ },
+ {
+ caption: "a `#mode` can be the same identifier as the error symbol",
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ | error
+ ;
+
+foo #push error
+ : 'foo';
+bar #mode error
+ : 'bar';
+`,
+ validate: func(t *testing.T, g *Grammar) {
+ kind := "bar"
+ expectedMode := "error"
+ for _, e := range g.lexSpec.Entries {
+ if e.Kind.String() == kind && e.Modes[0].String() == expectedMode {
+ return
+ }
+ }
+ t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode)
+ },
+ },
+ {
+ caption: "a `#mode` can be the same identifier as a fragment",
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+
+foo #push f
+ : "\f{f}";
+bar #mode f
+ : 'bar';
+fragment f
+ : 'foo';
+`,
+ validate: func(t *testing.T, g *Grammar) {
+ kind := "bar"
+ expectedMode := "f"
+ for _, e := range g.lexSpec.Entries {
+ if e.Kind.String() == kind && e.Modes[0].String() == expectedMode {
+ return
+ }
+ }
+ t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode)
+ },
+ },
+ }
+
+ precTests := []*okTest{
+ {
+ caption: "a `#prec` allows the empty directive group",
+ specSrc: `
+#name test;
+
+#prec ();
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ },
+ {
+ caption: "a `#left` directive gives a precedence and the left associativity to specified terminal symbols",
+ specSrc: `
+#name test;
+
+#prec (
+ #left foo bar
+);
+
+s
+ : foo bar baz
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+baz
+ : 'baz';
+`,
+ validate: func(t *testing.T, g *Grammar) {
+ var fooPrec int
+ var fooAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("foo")
+ fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ if fooPrec != 1 || fooAssoc != assocTypeLeft {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc)
+ }
+ var barPrec int
+ var barAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("bar")
+ barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ if barPrec != 1 || barAssoc != assocTypeLeft {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, barPrec, barAssoc)
+ }
+ var bazPrec int
+ var bazAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("baz")
+ bazPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ bazAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ if bazPrec != precNil || bazAssoc != assocTypeNil {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, bazPrec, bazAssoc)
+ }
+ },
+ },
+ {
+ caption: "a `#right` directive gives a precedence and the right associativity to specified terminal symbols",
+ specSrc: `
+#name test;
+
+#prec (
+ #right foo bar
+);
+
+s
+ : foo bar baz
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+baz
+ : 'baz';
+`,
+ validate: func(t *testing.T, g *Grammar) {
+ var fooPrec int
+ var fooAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("foo")
+ fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ if fooPrec != 1 || fooAssoc != assocTypeRight {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeRight, fooPrec, fooAssoc)
+ }
+ var barPrec int
+ var barAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("bar")
+ barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ if barPrec != 1 || barAssoc != assocTypeRight {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeRight, barPrec, barAssoc)
+ }
+ var bazPrec int
+ var bazAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("baz")
+ bazPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ bazAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ if bazPrec != precNil || bazAssoc != assocTypeNil {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, bazPrec, bazAssoc)
+ }
+ },
+ },
+ {
+ caption: "an `#assign` directive gives only a precedence to specified terminal symbols",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign foo bar
+);
+
+s
+ : foo bar baz
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+baz
+ : 'baz';
+`,
+ validate: func(t *testing.T, g *Grammar) {
+ var fooPrec int
+ var fooAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("foo")
+ fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ if fooPrec != 1 || fooAssoc != assocTypeNil {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, fooPrec, fooAssoc)
+ }
+ var barPrec int
+ var barAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("bar")
+ barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ if barPrec != 1 || barAssoc != assocTypeNil {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, barPrec, barAssoc)
+ }
+ var bazPrec int
+ var bazAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("baz")
+ bazPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ bazAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ if bazPrec != precNil || bazAssoc != assocTypeNil {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, bazPrec, bazAssoc)
+ }
+ },
+ },
+ {
+ caption: "a production has the same precedence and associativity as the right-most terminal symbol",
+ specSrc: `
+#name test;
+
+#prec (
+ #left foo
+);
+
+s
+ : foo bar // This alternative has the same precedence and associativity as the right-most terminal symbol 'bar', not 'foo'.
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+`,
+ validate: func(t *testing.T, g *Grammar) {
+ var barPrec int
+ var barAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("bar")
+ barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ var sPrec int
+ var sAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("s")
+ ps, _ := g.productionSet.findByLHS(s)
+ sPrec = g.precAndAssoc.productionPredence(ps[0].num)
+ sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num)
+ }
+ if barPrec != precNil || barAssoc != assocTypeNil {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, barPrec, barAssoc)
+ }
+ if sPrec != barPrec || sAssoc != barAssoc {
+ t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", barPrec, barAssoc, sPrec, sAssoc)
+ }
+ },
+ },
+ {
+ caption: "a production has the same precedence and associativity as the right-most terminal symbol",
+ specSrc: `
+#name test;
+
+#prec (
+ #left foo
+ #right bar
+);
+
+s
+ : foo bar // This alternative has the same precedence and associativity as the right-most terminal symbol 'bar'.
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+`,
+ validate: func(t *testing.T, g *Grammar) {
+ var barPrec int
+ var barAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("bar")
+ barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ var sPrec int
+ var sAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("s")
+ ps, _ := g.productionSet.findByLHS(s)
+ sPrec = g.precAndAssoc.productionPredence(ps[0].num)
+ sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num)
+ }
+ if barPrec != 2 || barAssoc != assocTypeRight {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc)
+ }
+ if sPrec != barPrec || sAssoc != barAssoc {
+ t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", barPrec, barAssoc, sPrec, sAssoc)
+ }
+ },
+ },
+ {
+ caption: "even if a non-terminal symbol apears to a terminal symbol, a production inherits precedence and associativity from the right-most terminal symbol, not from the non-terminal symbol",
+ specSrc: `
+#name test;
+
+#prec (
+ #left foo
+ #right bar
+);
+
+s
+ : foo a // This alternative has the same precedence and associativity as the right-most terminal symbol 'foo', not 'a'.
+ ;
+a
+ : bar
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+`,
+ validate: func(t *testing.T, g *Grammar) {
+ var fooPrec int
+ var fooAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("foo")
+ fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ var barPrec int
+ var barAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("bar")
+ barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ var aPrec int
+ var aAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("a")
+ ps, _ := g.productionSet.findByLHS(s)
+ aPrec = g.precAndAssoc.productionPredence(ps[0].num)
+ aAssoc = g.precAndAssoc.productionAssociativity(ps[0].num)
+ }
+ var sPrec int
+ var sAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("s")
+ ps, _ := g.productionSet.findByLHS(s)
+ sPrec = g.precAndAssoc.productionPredence(ps[0].num)
+ sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num)
+ }
+ if fooPrec != 1 || fooAssoc != assocTypeLeft {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc)
+ }
+ if barPrec != 2 || barAssoc != assocTypeRight {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc)
+ }
+ if aPrec != barPrec || aAssoc != barAssoc {
+ t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", barPrec, barAssoc, aPrec, aAssoc)
+ }
+ if sPrec != fooPrec || sAssoc != fooAssoc {
+ t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, fooAssoc, sPrec, sAssoc)
+ }
+ },
+ },
+ {
+ caption: "each alternative in the same production can have its own precedence and associativity",
+ specSrc: `
+#name test;
+
+#prec (
+ #left foo
+ #right bar
+ #assign baz
+);
+
+s
+ : foo
+ | bar
+ | baz
+ | bra
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+baz
+ : 'baz';
+bra
+ : 'bra';
+`,
+ validate: func(t *testing.T, g *Grammar) {
+ var alt1Prec int
+ var alt1Assoc assocType
+ var alt2Prec int
+ var alt2Assoc assocType
+ var alt3Prec int
+ var alt3Assoc assocType
+ var alt4Prec int
+ var alt4Assoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("s")
+ ps, _ := g.productionSet.findByLHS(s)
+ alt1Prec = g.precAndAssoc.productionPredence(ps[0].num)
+ alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num)
+ alt2Prec = g.precAndAssoc.productionPredence(ps[1].num)
+ alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num)
+ alt3Prec = g.precAndAssoc.productionPredence(ps[2].num)
+ alt3Assoc = g.precAndAssoc.productionAssociativity(ps[2].num)
+ alt4Prec = g.precAndAssoc.productionPredence(ps[3].num)
+ alt4Assoc = g.precAndAssoc.productionAssociativity(ps[3].num)
+ }
+ if alt1Prec != 1 || alt1Assoc != assocTypeLeft {
+ t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, alt1Prec, alt1Assoc)
+ }
+ if alt2Prec != 2 || alt2Assoc != assocTypeRight {
+ t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, alt2Prec, alt2Assoc)
+ }
+ if alt3Prec != 3 || alt3Assoc != assocTypeNil {
+ t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeNil, alt3Prec, alt3Assoc)
+ }
+ if alt4Prec != precNil || alt4Assoc != assocTypeNil {
+ t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, alt4Prec, alt4Assoc)
+ }
+ },
+ },
+ {
+ caption: "when a production contains no terminal symbols, the production will not have precedence and associativiry",
+ specSrc: `
+#name test;
+
+#prec (
+ #left foo
+);
+
+s
+ : a
+ ;
+a
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ validate: func(t *testing.T, g *Grammar) {
+ var fooPrec int
+ var fooAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("foo")
+ fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ var aPrec int
+ var aAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("a")
+ ps, _ := g.productionSet.findByLHS(s)
+ aPrec = g.precAndAssoc.productionPredence(ps[0].num)
+ aAssoc = g.precAndAssoc.productionAssociativity(ps[0].num)
+ }
+ var sPrec int
+ var sAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("s")
+ ps, _ := g.productionSet.findByLHS(s)
+ sPrec = g.precAndAssoc.productionPredence(ps[0].num)
+ sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num)
+ }
+ if fooPrec != 1 || fooAssoc != assocTypeLeft {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc)
+ }
+ if aPrec != fooPrec || aAssoc != fooAssoc {
+ t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, fooAssoc, aPrec, aAssoc)
+ }
+ if sPrec != precNil || sAssoc != assocTypeNil {
+ t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, sPrec, sAssoc)
+ }
+ },
+ },
+ {
+ caption: "the `#prec` directive applied to an alternative changes only precedence, not associativity",
+ specSrc: `
+#name test;
+
+#prec (
+ #left foo
+);
+
+s
+ : foo bar #prec foo
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+`,
+ validate: func(t *testing.T, g *Grammar) {
+ var fooPrec int
+ var fooAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("foo")
+ fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ var sPrec int
+ var sAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("s")
+ ps, _ := g.productionSet.findByLHS(s)
+ sPrec = g.precAndAssoc.productionPredence(ps[0].num)
+ sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num)
+ }
+ if fooPrec != 1 || fooAssoc != assocTypeLeft {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc)
+ }
+ if sPrec != fooPrec || sAssoc != assocTypeNil {
+ t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, assocTypeNil, sPrec, sAssoc)
+ }
+ },
+ },
+ {
+ caption: "the `#prec` directive applied to an alternative changes only precedence, not associativity",
+ specSrc: `
+#name test;
+
+#prec (
+ #left foo
+ #right bar
+);
+
+s
+ : foo bar #prec foo
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+`,
+ validate: func(t *testing.T, g *Grammar) {
+ var fooPrec int
+ var fooAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("foo")
+ fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ var barPrec int
+ var barAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("bar")
+ barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ var sPrec int
+ var sAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("s")
+ ps, _ := g.productionSet.findByLHS(s)
+ sPrec = g.precAndAssoc.productionPredence(ps[0].num)
+ sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num)
+ }
+ if fooPrec != 1 || fooAssoc != assocTypeLeft {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc)
+ }
+ if barPrec != 2 || barAssoc != assocTypeRight {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc)
+ }
+ if sPrec != fooPrec || sAssoc != assocTypeNil {
+ t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, assocTypeNil, sPrec, sAssoc)
+ }
+ },
+ },
+ {
+ caption: "an ordered symbol can appear in a `#left` directive",
+ specSrc: `
+#name test;
+
+#prec (
+ #left $high
+ #right foo bar
+ #left $low
+);
+
+s
+ : foo #prec $high
+ | bar #prec $low
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+`,
+ validate: func(t *testing.T, g *Grammar) {
+ var fooPrec int
+ var fooAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("foo")
+ fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ if fooPrec != 2 || fooAssoc != assocTypeRight {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, fooPrec, fooAssoc)
+ }
+ var barPrec int
+ var barAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("bar")
+ barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ if barPrec != 2 || barAssoc != assocTypeRight {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc)
+ }
+ var alt1Prec int
+ var alt1Assoc assocType
+ var alt2Prec int
+ var alt2Assoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("s")
+ ps, _ := g.productionSet.findByLHS(s)
+ alt1Prec = g.precAndAssoc.productionPredence(ps[0].num)
+ alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num)
+ alt2Prec = g.precAndAssoc.productionPredence(ps[1].num)
+ alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num)
+ }
+ if alt1Prec != 1 || alt1Assoc != assocTypeNil {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, alt1Prec, alt1Assoc)
+ }
+ if alt2Prec != 3 || alt2Assoc != assocTypeNil {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeNil, alt2Prec, alt2Assoc)
+ }
+ },
+ },
+ {
+ caption: "an ordered symbol can appear in a `#right` directive",
+ specSrc: `
+#name test;
+
+#prec (
+ #right $high
+ #left foo bar
+ #right $low
+);
+
+s
+ : foo #prec $high
+ | bar #prec $low
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+`,
+ validate: func(t *testing.T, g *Grammar) {
+ var fooPrec int
+ var fooAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("foo")
+ fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ if fooPrec != 2 || fooAssoc != assocTypeLeft {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeLeft, fooPrec, fooAssoc)
+ }
+ var barPrec int
+ var barAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("bar")
+ barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ if barPrec != 2 || barAssoc != assocTypeLeft {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeLeft, barPrec, barAssoc)
+ }
+ var alt1Prec int
+ var alt1Assoc assocType
+ var alt2Prec int
+ var alt2Assoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("s")
+ ps, _ := g.productionSet.findByLHS(s)
+ alt1Prec = g.precAndAssoc.productionPredence(ps[0].num)
+ alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num)
+ alt2Prec = g.precAndAssoc.productionPredence(ps[1].num)
+ alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num)
+ }
+ if alt1Prec != 1 || alt1Assoc != assocTypeNil {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, alt1Prec, alt1Assoc)
+ }
+ if alt2Prec != 3 || alt2Assoc != assocTypeNil {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeNil, alt2Prec, alt2Assoc)
+ }
+ },
+ },
+ {
+ caption: "an ordered symbol can appear in a `#assign` directive",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign $high
+ #left foo
+ #right bar
+ #assign $low
+);
+
+s
+ : foo #prec $high
+ | bar #prec $low
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+`,
+ validate: func(t *testing.T, g *Grammar) {
+ var fooPrec int
+ var fooAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("foo")
+ fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ if fooPrec != 2 || fooAssoc != assocTypeLeft {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeLeft, fooPrec, fooAssoc)
+ }
+ var barPrec int
+ var barAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("bar")
+ barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ if barPrec != 3 || barAssoc != assocTypeRight {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeRight, barPrec, barAssoc)
+ }
+ var alt1Prec int
+ var alt1Assoc assocType
+ var alt2Prec int
+ var alt2Assoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("s")
+ ps, _ := g.productionSet.findByLHS(s)
+ alt1Prec = g.precAndAssoc.productionPredence(ps[0].num)
+ alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num)
+ alt2Prec = g.precAndAssoc.productionPredence(ps[1].num)
+ alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num)
+ }
+ if alt1Prec != 1 || alt1Assoc != assocTypeNil {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, alt1Prec, alt1Assoc)
+ }
+ if alt2Prec != 4 || alt2Assoc != assocTypeNil {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 4, assocTypeNil, alt2Prec, alt2Assoc)
+ }
+ },
+ },
+ {
+ caption: "names of an ordered symbol and a terminal symbol can duplicate",
+ specSrc: `
+#name test;
+
+#prec (
+ #left foo bar
+ #right $foo
+);
+
+s
+ : foo
+ | bar #prec $foo
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+`,
+ validate: func(t *testing.T, g *Grammar) {
+ var fooPrec int
+ var fooAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("foo")
+ fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ var barPrec int
+ var barAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("bar")
+ barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ if fooPrec != 1 || fooAssoc != assocTypeLeft {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc)
+ }
+ if barPrec != 1 || barAssoc != assocTypeLeft {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, barPrec, barAssoc)
+ }
+ var alt1Prec int
+ var alt1Assoc assocType
+ var alt2Prec int
+ var alt2Assoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("s")
+ ps, _ := g.productionSet.findByLHS(s)
+ alt1Prec = g.precAndAssoc.productionPredence(ps[0].num)
+ alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num)
+ alt2Prec = g.precAndAssoc.productionPredence(ps[1].num)
+ alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num)
+ }
+ if alt1Prec != fooPrec || alt1Assoc != fooAssoc {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, fooAssoc, alt1Prec, alt1Assoc)
+ }
+ if alt2Prec != 2 || alt2Assoc != assocTypeNil {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeNil, alt2Prec, alt2Assoc)
+ }
+ },
+ },
+ {
+ caption: "names of an ordered symbol and a non-terminal symbol can duplicate",
+ specSrc: `
+#name test;
+
+#prec (
+ #left foo bar
+ #right $a
+);
+
+s
+ : a
+ | bar #prec $a
+ ;
+a
+ : foo
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+`,
+ validate: func(t *testing.T, g *Grammar) {
+ var barPrec int
+ var barAssoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("bar")
+ barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
+ }
+ if barPrec != 1 || barAssoc != assocTypeLeft {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, barPrec, barAssoc)
+ }
+ var alt1Prec int
+ var alt1Assoc assocType
+ var alt2Prec int
+ var alt2Assoc assocType
+ {
+ s, _ := g.symbolTable.ToSymbol("s")
+ ps, _ := g.productionSet.findByLHS(s)
+ alt1Prec = g.precAndAssoc.productionPredence(ps[0].num)
+ alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num)
+ alt2Prec = g.precAndAssoc.productionPredence(ps[1].num)
+ alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num)
+ }
+ if alt1Prec != precNil || alt1Assoc != assocTypeNil {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, alt1Prec, alt1Assoc)
+ }
+ if alt2Prec != 2 || alt2Assoc != assocTypeNil {
+ t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeNil, alt2Prec, alt2Assoc)
+ }
+ },
+ },
+ }
+
+ var tests []*okTest
+ tests = append(tests, nameTests...)
+ tests = append(tests, modeTests...)
+ tests = append(tests, precTests...)
+
+ for _, test := range tests {
+ t.Run(test.caption, func(t *testing.T) {
+ ast, err := parser.Parse(strings.NewReader(test.specSrc))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ b := GrammarBuilder{
+ AST: ast,
+ }
+ g, err := b.build()
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if test.validate != nil {
+ test.validate(t, g)
+ }
+ })
+ }
+}
+
+func TestGrammarBuilderSpecError(t *testing.T) {
+ type specErrTest struct {
+ caption string
+ specSrc string
+ errs []error
+ }
+
+ spellingInconsistenciesTests := []*specErrTest{
+ {
+ caption: "a spelling inconsistency appears among non-terminal symbols",
+ specSrc: `
+#name test;
+
+a1
+ : a_1
+ ;
+a_1
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrSpellingInconsistency},
+ },
+ {
+ caption: "a spelling inconsistency appears among terminal symbols",
+ specSrc: `
+#name test;
+
+s
+ : foo1 foo_1
+ ;
+
+foo1
+ : 'foo1';
+foo_1
+ : 'foo_1';
+`,
+ errs: []error{semErrSpellingInconsistency},
+ },
+ {
+ caption: "a spelling inconsistency appears among non-terminal and terminal symbols",
+ specSrc: `
+#name test;
+
+a1
+ : a_1
+ ;
+
+a_1
+ : 'a_1';
+`,
+ errs: []error{semErrSpellingInconsistency},
+ },
+ {
+ caption: "a spelling inconsistency appears among ordered symbols whose precedence is the same",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign $p1 $p_1
+);
+
+s
+ : foo #prec $p1
+ | bar #prec $p_1
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+`,
+ errs: []error{semErrSpellingInconsistency},
+ },
+ {
+ caption: "a spelling inconsistency appears among ordered symbols whose precedence is not the same",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign $p1
+ #assign $p_1
+);
+
+s
+ : foo #prec $p1
+ | bar #prec $p_1
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+`,
+ errs: []error{semErrSpellingInconsistency},
+ },
+ {
+ caption: "a spelling inconsistency appears among labels the same alternative contains",
+ specSrc: `
+#name test;
+
+s
+ : foo@l1 foo@l_1
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrSpellingInconsistency},
+ },
+ {
+ caption: "a spelling inconsistency appears among labels the same production contains",
+ specSrc: `
+#name test;
+
+s
+ : foo@l1
+ | bar@l_1
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+`,
+ errs: []error{semErrSpellingInconsistency},
+ },
+ {
+ caption: "a spelling inconsistency appears among labels different productions contain",
+ specSrc: `
+#name test;
+
+s
+ : foo@l1
+ ;
+a
+ : bar@l_1
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+`,
+ errs: []error{semErrSpellingInconsistency},
+ },
+ }
+
+ prodTests := []*specErrTest{
+ {
+ caption: "a production `b` is unused",
+ specSrc: `
+#name test;
+
+a
+ : foo
+ ;
+b
+ : foo
+ ;
+
+foo
+ : "foo";
+`,
+ errs: []error{semErrUnusedProduction},
+ },
+ {
+ caption: "a terminal symbol `bar` is unused",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ ;
+
+foo
+ : "foo";
+bar
+ : "bar";
+`,
+ errs: []error{semErrUnusedTerminal},
+ },
+ {
+ caption: "a production `b` and terminal symbol `bar` is unused",
+ specSrc: `
+#name test;
+
+a
+ : foo
+ ;
+b
+ : bar
+ ;
+
+foo
+ : "foo";
+bar
+ : "bar";
+`,
+ errs: []error{
+ semErrUnusedProduction,
+ semErrUnusedTerminal,
+ },
+ },
+ {
+ caption: "a production cannot have production directives",
+ specSrc: `
+#name test;
+
+s #prec foo
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrInvalidProdDir},
+ },
+ {
+ caption: "a lexical production cannot have alternative directives",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo' #skip;
+`,
+ errs: []error{semErrInvalidAltDir},
+ },
+ {
+ caption: "a production directive must not be duplicated",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ ;
+
+foo #skip #skip
+ : 'foo';
+`,
+ errs: []error{semErrDuplicateDir},
+ },
+ {
+ caption: "an alternative directive must not be duplicated",
+ specSrc: `
+#name test;
+
+s
+ : foo bar #ast foo bar #ast foo bar
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+`,
+ errs: []error{semErrDuplicateDir},
+ },
+ {
+ caption: "a production must not have a duplicate alternative (non-empty alternatives)",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ | foo
+ ;
+
+foo
+ : "foo";
+`,
+ errs: []error{semErrDuplicateProduction},
+ },
+ {
+ caption: "a production must not have a duplicate alternative (non-empty and split alternatives)",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ | a
+ ;
+a
+ : bar
+ ;
+s
+ : foo
+ ;
+
+foo
+ : "foo";
+bar
+ : "bar";
+`,
+ errs: []error{semErrDuplicateProduction},
+ },
+ {
+ caption: "a production must not have a duplicate alternative (empty alternatives)",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ | a
+ ;
+a
+ :
+ |
+ ;
+
+foo
+ : "foo";
+`,
+ errs: []error{semErrDuplicateProduction},
+ },
+ {
+ caption: "a production must not have a duplicate alternative (empty and split alternatives)",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ | a
+ ;
+a
+ :
+ | foo
+ ;
+a
+ :
+ ;
+
+foo
+ : "foo";
+`,
+ errs: []error{semErrDuplicateProduction},
+ },
+ {
+ caption: "a terminal symbol and a non-terminal symbol (start symbol) are duplicates",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ ;
+
+foo
+ : "foo";
+s
+ : "a";
+`,
+ errs: []error{semErrDuplicateName},
+ },
+ {
+ caption: "a terminal symbol and a non-terminal symbol (not start symbol) are duplicates",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ | a
+ ;
+a
+ : bar
+ ;
+
+foo
+ : "foo";
+bar
+ : "bar";
+a
+ : "a";
+`,
+ errs: []error{semErrDuplicateName},
+ },
+ {
+ caption: "an invalid top-level directive",
+ specSrc: `
+#name test;
+
+#foo;
+
+s
+ : a
+ ;
+
+a
+ : 'a';
+`,
+ errs: []error{semErrDirInvalidName},
+ },
+ {
+ caption: "a label must be unique in an alternative",
+ specSrc: `
+#name test;
+
+s
+ : foo@x bar@x
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+`,
+ errs: []error{semErrDuplicateLabel},
+ },
+ {
+ caption: "a label cannot be the same name as terminal symbols",
+ specSrc: `
+#name test;
+
+s
+ : foo bar@foo
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+`,
+ errs: []error{semErrDuplicateLabel},
+ },
+ {
+ caption: "a label cannot be the same name as non-terminal symbols",
+ specSrc: `
+#name test;
+
+s
+ : foo@a
+ | a
+ ;
+a
+ : bar
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+`,
+ errs: []error{
+ semErrInvalidLabel,
+ },
+ },
+ }
+
+ nameDirTests := []*specErrTest{
+ {
+ caption: "the `#name` directive is required",
+ specSrc: `
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrNoGrammarName},
+ },
+ {
+ caption: "the `#name` directive needs an ID parameter",
+ specSrc: `
+#name;
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#name` directive cannot take a pattern parameter",
+ specSrc: `
+#name "test";
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#name` directive cannot take a string parameter",
+ specSrc: `
+#name 'test';
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#name` directive takes just one parameter",
+ specSrc: `
+#name test1 test2;
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ }
+
+ precDirTests := []*specErrTest{
+ {
+ caption: "the `#prec` directive needs a directive group parameter",
+ specSrc: `
+#name test;
+
+#prec;
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#prec` directive cannot take an ID parameter",
+ specSrc: `
+#name test;
+
+#prec foo;
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#prec` directive cannot take an ordered symbol parameter",
+ specSrc: `
+#name test;
+
+#prec $x;
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#prec` directive cannot take a pattern parameter",
+ specSrc: `
+#name test;
+
+#prec "foo";
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#prec` directive cannot take a string parameter",
+ specSrc: `
+#name test;
+
+#prec 'foo';
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#prec` directive takes just one directive group parameter",
+ specSrc: `
+#name test;
+
+#prec () ();
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ }
+
+ leftDirTests := []*specErrTest{
+ {
+ caption: "the `#left` directive needs ID parameters",
+ specSrc: `
+#name test;
+
+#prec (
+ #left
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#left` directive cannot be applied to an error symbol",
+ specSrc: `
+#name test;
+
+#prec (
+ #left error
+);
+
+s
+ : foo semi_colon
+ | error semi_colon
+ ;
+
+foo
+ : 'foo';
+semi_colon
+ : ';';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#left` directive cannot take an undefined symbol",
+ specSrc: `
+#name test;
+
+#prec (
+ #left x
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#left` directive cannot take a non-terminal symbol",
+ specSrc: `
+#name test;
+
+#prec (
+ #left s
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#left` directive cannot take a pattern parameter",
+ specSrc: `
+#name test;
+
+#prec (
+ #left "foo"
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#left` directive cannot take a string parameter",
+ specSrc: `
+#name test;
+
+#prec (
+ #left 'foo'
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#left` directive cannot take a directive parameter",
+ specSrc: `
+#name test;
+
+#prec (
+ #left ()
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#left` dirctive cannot be specified multiple times for a terminal symbol",
+ specSrc: `
+#name test;
+
+#prec (
+ #left foo foo
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDuplicateAssoc},
+ },
+ {
+ caption: "the `#left` dirctive cannot be specified multiple times for an ordered symbol",
+ specSrc: `
+#name test;
+
+#prec (
+ #left $x $x
+);
+
+s
+ : foo #prec $x
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDuplicateAssoc},
+ },
+ {
+ caption: "a terminal symbol cannot have different precedence",
+ specSrc: `
+#name test;
+
+#prec (
+ #left foo
+ #left foo
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDuplicateAssoc},
+ },
+ {
+ caption: "an ordered symbol cannot have different precedence",
+ specSrc: `
+#name test;
+
+#prec (
+ #left $x
+ #left $x
+);
+
+s
+ : foo #prec $x
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDuplicateAssoc},
+ },
+ {
+ caption: "a terminal symbol cannot have different associativity",
+ specSrc: `
+#name test;
+
+#prec (
+ #right foo
+ #left foo
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDuplicateAssoc},
+ },
+ {
+ caption: "an ordered symbol cannot have different associativity",
+ specSrc: `
+#name test;
+
+#prec (
+ #right $x
+ #left $x
+);
+
+s
+ : foo #prec $x
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDuplicateAssoc},
+ },
+ }
+
+ rightDirTests := []*specErrTest{
+ {
+ caption: "the `#right` directive needs ID parameters",
+ specSrc: `
+#name test;
+
+#prec (
+ #right
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#right` directive cannot be applied to an error symbol",
+ specSrc: `
+#name test;
+
+#prec (
+ #right error
+);
+
+s
+ : foo semi_colon
+ | error semi_colon
+ ;
+
+foo
+ : 'foo';
+semi_colon
+ : ';';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#right` directive cannot take an undefined symbol",
+ specSrc: `
+#name test;
+
+#prec (
+ #right x
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#right` directive cannot take a non-terminal symbol",
+ specSrc: `
+#name test;
+
+#prec (
+ #right s
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#right` directive cannot take a pattern parameter",
+ specSrc: `
+#name test;
+
+#prec (
+ #right "foo"
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#right` directive cannot take a string parameter",
+ specSrc: `
+#name test;
+
+#prec (
+ #right 'foo'
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#right` directive cannot take a directive group parameter",
+ specSrc: `
+#name test;
+
+#prec (
+ #right ()
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#right` directive cannot be specified multiple times for a terminal symbol",
+ specSrc: `
+#name test;
+
+#prec (
+ #right foo foo
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDuplicateAssoc},
+ },
+ {
+ caption: "the `#right` directive cannot be specified multiple times for an ordered symbol",
+ specSrc: `
+#name test;
+
+#prec (
+ #right $x $x
+);
+
+s
+ : foo #prec $x
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDuplicateAssoc},
+ },
+ {
+ caption: "a terminal symbol cannot have different precedence",
+ specSrc: `
+#name test;
+
+#prec (
+ #right foo
+ #right foo
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDuplicateAssoc},
+ },
+ {
+ caption: "an ordered symbol cannot have different precedence",
+ specSrc: `
+#name test;
+
+#prec (
+ #right $x
+ #right $x
+);
+
+s
+ : foo #prec $x
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDuplicateAssoc},
+ },
+ {
+ caption: "a terminal symbol cannot have different associativity",
+ specSrc: `
+#name test;
+
+#prec (
+ #left foo
+ #right foo
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDuplicateAssoc},
+ },
+ {
+ caption: "an ordered symbol cannot have different associativity",
+ specSrc: `
+#name test;
+
+#prec (
+ #left $x
+ #right $x
+);
+
+s
+ : foo #prec $x
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDuplicateAssoc},
+ },
+ }
+
+ assignDirTests := []*specErrTest{
+ {
+ caption: "the `#assign` directive needs ID parameters",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#assign` directive cannot be applied to an error symbol",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign error
+);
+
+s
+ : foo semi_colon
+ | error semi_colon
+ ;
+
+foo
+ : 'foo';
+semi_colon
+ : ';';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#assign` directive cannot take an undefined symbol",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign x
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#assign` directive cannot take a non-terminal symbol",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign s
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#assign` directive cannot take a pattern parameter",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign "foo"
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#assign` directive cannot take a string parameter",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign 'foo'
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#assign` directive cannot take a directive parameter",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign ()
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#assign` dirctive cannot be specified multiple times for a terminal symbol",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign foo foo
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDuplicateAssoc},
+ },
+ {
+ caption: "the `#assign` dirctive cannot be specified multiple times for an ordered symbol",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign $x $x
+);
+
+s
+ : foo #prec $x
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDuplicateAssoc},
+ },
+ {
+ caption: "a terminal symbol cannot have different precedence",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign foo
+ #assign foo
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDuplicateAssoc},
+ },
+ {
+ caption: "an ordered symbol cannot have different precedence",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign $x
+ #assign $x
+);
+
+s
+ : foo #prec $x
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDuplicateAssoc},
+ },
+ {
+ caption: "a terminal symbol cannot have different associativity",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign foo
+ #left foo
+);
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDuplicateAssoc},
+ },
+ {
+ caption: "an ordered symbol cannot have different associativity",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign $x
+ #left $x
+);
+
+s
+ : foo #prec $x
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDuplicateAssoc},
+ },
+ }
+
+ errorSymTests := []*specErrTest{
+ {
+ caption: "cannot use the error symbol as a non-terminal symbol",
+ specSrc: `
+#name test;
+
+s
+ : error
+ ;
+error
+ : foo
+ ;
+
+foo: 'foo';
+`,
+ errs: []error{
+ semErrErrSymIsReserved,
+ semErrDuplicateName,
+ },
+ },
+ {
+ caption: "cannot use the error symbol as a terminal symbol",
+ specSrc: `
+#name test;
+
+s
+ : error
+ ;
+
+error: 'error';
+`,
+ errs: []error{semErrErrSymIsReserved},
+ },
+ {
+ caption: "cannot use the error symbol as a terminal symbol, even if given the skip directive",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+error #skip
+ : 'error';
+`,
+ errs: []error{semErrErrSymIsReserved},
+ },
+ }
+
+ astDirTests := []*specErrTest{
+ {
+ caption: "the `#ast` directive needs ID or label prameters",
+ specSrc: `
+#name test;
+
+s
+ : foo #ast
+ ;
+
+foo
+ : "foo";
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#ast` directive cannot take an ordered symbol parameter",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign $x
+);
+
+s
+ : foo #ast $x
+ ;
+
+foo
+ : "foo";
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#ast` directive cannot take a pattern parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo #ast "foo"
+ ;
+
+foo
+ : "foo";
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#ast` directive cannot take a string parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo #ast 'foo'
+ ;
+
+foo
+ : "foo";
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#ast` directive cannot take a directive group parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo #ast ()
+ ;
+
+foo
+ : "foo";
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "a parameter of the `#ast` directive must be either a symbol or a label in an alternative",
+ specSrc: `
+#name test;
+
+s
+ : foo bar #ast foo x
+ ;
+
+foo
+ : "foo";
+bar
+ : "bar";
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "a symbol in a different alternative cannot be a parameter of the `#ast` directive",
+ specSrc: `
+#name test;
+
+s
+ : foo #ast bar
+ | bar
+ ;
+
+foo
+ : "foo";
+bar
+ : "bar";
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "a label in a different alternative cannot be a parameter of the `#ast` directive",
+ specSrc: `
+#name test;
+
+s
+ : foo #ast b
+ | bar@b
+ ;
+
+foo
+ : "foo";
+bar
+ : "bar";
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "a symbol can appear in the `#ast` directive only once",
+ specSrc: `
+#name test;
+
+s
+ : foo #ast foo foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDuplicateElem},
+ },
+ {
+ caption: "a label can appear in the `#ast` directive only once",
+ specSrc: `
+#name test;
+
+s
+ : foo@x #ast x x
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDuplicateElem},
+ },
+ {
+ caption: "a symbol can appear in the `#ast` directive only once, even if the symbol has a label",
+ specSrc: `
+#name test;
+
+s
+ : foo@x #ast foo x
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDuplicateElem},
+ },
+ {
+ caption: "symbol `foo` is ambiguous because it appears in an alternative twice",
+ specSrc: `
+#name test;
+
+s
+ : foo foo #ast foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrAmbiguousElem},
+ },
+ {
+ caption: "symbol `foo` is ambiguous because it appears in an alternative twice, even if one of them has a label",
+ specSrc: `
+#name test;
+
+s
+ : foo@x foo #ast foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrAmbiguousElem},
+ },
+ {
+ caption: "the expansion operator cannot be applied to a terminal symbol",
+ specSrc: `
+#name test;
+
+s
+ : foo #ast foo...
+ ;
+
+foo
+ : "foo";
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ }
+
+ altPrecDirTests := []*specErrTest{
+ {
+ caption: "the `#prec` directive needs an ID parameter or an ordered symbol parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo #prec
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#prec` directive cannot be applied to an error symbol",
+ specSrc: `
+#name test;
+
+s
+ : foo #prec error
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#prec` directive cannot take an undefined symbol",
+ specSrc: `
+#name test;
+
+s
+ : foo #prec x
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#prec` directive cannot take a non-terminal symbol",
+ specSrc: `
+#name test;
+
+s
+ : a #prec b
+ | b
+ ;
+a
+ : foo
+ ;
+b
+ : bar
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#prec` directive cannot take an undefined ordered symbol parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo #prec $x
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrUndefinedOrdSym},
+ },
+ {
+ caption: "the `#prec` directive cannot take a pattern parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo #prec "foo"
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#prec` directive cannot take a string parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo #prec 'foo'
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#prec` directive cannot take a directive parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo #prec ()
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "a symbol the `#prec` directive takes must be given precedence explicitly",
+ specSrc: `
+#name test;
+
+s
+ : foo bar #prec foo
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+`,
+ errs: []error{semErrUndefinedPrec},
+ },
+ }
+
+ recoverDirTests := []*specErrTest{
+ {
+ caption: "the `#recover` directive cannot take an ID parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo #recover foo
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#recover` directive cannot take an ordered symbol parameter",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign $x
+);
+
+s
+ : foo #recover $x
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#recover` directive cannot take a pattern parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo #recover "foo"
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#recover` directive cannot take a string parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo #recover 'foo'
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#recover` directive cannot take a directive group parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo #recover ()
+ ;
+
+foo
+ : 'foo';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ }
+
+ fragmentTests := []*specErrTest{
+ {
+ caption: "a production cannot contain a fragment",
+ specSrc: `
+#name test;
+
+s
+ : f
+ ;
+
+fragment f
+ : 'fragment';
+`,
+ errs: []error{semErrUndefinedSym},
+ },
+ {
+ caption: "fragments cannot be duplicated",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ ;
+
+foo
+ : "\f{f}";
+fragment f
+ : 'fragment 1';
+fragment f
+ : 'fragment 2';
+`,
+ errs: []error{semErrDuplicateFragment},
+ },
+ }
+
+ modeDirTests := []*specErrTest{
+ {
+ caption: "the `#mode` directive needs an ID parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+
+foo #push mode_1
+ : 'foo';
+bar #mode
+ : 'bar';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#mode` directive cannot take an ordered symbol parameter",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign $x
+);
+
+s
+ : foo bar
+ ;
+
+foo
+ : 'foo';
+bar #mode $x
+ : 'bar';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#mode` directive cannot take a pattern parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+
+foo #push mode_1
+ : 'foo';
+bar #mode "mode_1"
+ : 'bar';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#mode` directive cannot take a string parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+
+foo #push mode_1
+ : 'foo';
+bar #mode 'mode_1'
+ : 'bar';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#mode` directive cannot take a directive group parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+
+foo #push mode_1
+ : 'foo';
+bar #mode ()
+ : 'bar';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ }
+
+ pushDirTests := []*specErrTest{
+ {
+ caption: "the `#push` directive needs an ID parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+
+foo #push
+ : 'foo';
+bar #mode mode_1
+ : 'bar';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#push` directive takes just one ID parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+
+foo #push mode_1 mode_2
+ : 'foo';
+bar #mode mode_1
+ : 'bar';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#push` directive cannot take an ordered symbol parameter",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign $x
+);
+
+s
+ : foo bar
+ ;
+
+foo #push $x
+ : 'foo';
+bar
+ : 'bar';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#push` directive cannot take a pattern parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+
+foo #push "mode_1"
+ : 'foo';
+bar #mode mode_1
+ : 'bar';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#push` directive cannot take a string parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+
+foo #push 'mode_1'
+ : 'foo';
+bar #mode mode_1
+ : 'bar';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#push` directive cannot take a directive group parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+
+foo #push ()
+ : 'foo';
+bar #mode mode_1
+ : 'bar';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ }
+
+ popDirTests := []*specErrTest{
+ {
+ caption: "the `#pop` directive cannot take an ID parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo bar baz
+ ;
+
+foo #push mode_1
+ : 'foo';
+bar #mode mode_1
+ : 'bar';
+baz #pop mode_1
+ : 'baz';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#pop` directive cannot take an ordered symbol parameter",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign $x
+);
+
+s
+ : foo bar baz
+ ;
+
+foo #push mode_1
+ : 'foo';
+bar #mode mode_1
+ : 'bar';
+baz #pop $x
+ : 'baz';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#pop` directive cannot take a pattern parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo bar baz
+ ;
+
+foo #push mode_1
+ : 'foo';
+bar #mode mode_1
+ : 'bar';
+baz #pop "mode_1"
+ : 'baz';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#pop` directive cannot take a string parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo bar baz
+ ;
+
+foo #push mode_1
+ : 'foo';
+bar #mode mode_1
+ : 'bar';
+baz #pop 'mode_1'
+ : 'baz';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#pop` directive cannot take a directive parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo bar baz
+ ;
+
+foo #push mode_1
+ : 'foo';
+bar #mode mode_1
+ : 'bar';
+baz #pop ()
+ : 'baz';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ }
+
+ skipDirTests := []*specErrTest{
+ {
+ caption: "the `#skip` directive cannot take an ID parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+
+foo #skip bar
+ : 'foo';
+bar
+ : 'bar';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#skip` directive cannot take an ordered symbol parameter",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign $x
+);
+
+s
+ : foo bar
+ ;
+
+foo #skip $x
+ : 'foo';
+bar
+ : 'bar';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#skip` directive cannot take a pattern parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+
+foo #skip "bar"
+ : 'foo';
+bar
+ : 'bar';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#skip` directive cannot take a string parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+
+foo #skip 'bar'
+ : 'foo';
+bar
+ : 'bar';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "the `#skip` directive cannot take a directive group parameter",
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+
+foo #skip ()
+ : 'foo';
+bar
+ : 'bar';
+`,
+ errs: []error{semErrDirInvalidParam},
+ },
+ {
+ caption: "a terminal symbol used in productions cannot have the skip directive",
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+
+foo #skip
+ : 'foo';
+bar
+ : 'bar';
+`,
+ errs: []error{semErrTermCannotBeSkipped},
+ },
+ }
+
+ var tests []*specErrTest
+ tests = append(tests, spellingInconsistenciesTests...)
+ tests = append(tests, prodTests...)
+ tests = append(tests, nameDirTests...)
+ tests = append(tests, precDirTests...)
+ tests = append(tests, leftDirTests...)
+ tests = append(tests, rightDirTests...)
+ tests = append(tests, assignDirTests...)
+ tests = append(tests, errorSymTests...)
+ tests = append(tests, astDirTests...)
+ tests = append(tests, altPrecDirTests...)
+ tests = append(tests, recoverDirTests...)
+ tests = append(tests, fragmentTests...)
+ tests = append(tests, modeDirTests...)
+ tests = append(tests, pushDirTests...)
+ tests = append(tests, popDirTests...)
+ tests = append(tests, skipDirTests...)
+ for _, test := range tests {
+ t.Run(test.caption, func(t *testing.T) {
+ ast, err := parser.Parse(strings.NewReader(test.specSrc))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ b := GrammarBuilder{
+ AST: ast,
+ }
+ _, err = b.build()
+ if err == nil {
+ t.Fatal("an expected error didn't occur")
+ }
+ specErrs, ok := err.(verr.SpecErrors)
+ if !ok {
+ t.Fatalf("unexpected error type: want: %T, got: %T: %v", verr.SpecErrors{}, err, err)
+ }
+ if len(specErrs) != len(test.errs) {
+ t.Fatalf("unexpected spec error count: want: %+v, got: %+v", test.errs, specErrs)
+ }
+ for _, expected := range test.errs {
+ for _, actual := range specErrs {
+ if actual.Cause == expected {
+ return
+ }
+ }
+ }
+ t.Fatalf("an expected spec error didn't occur: want: %v, got: %+v", test.errs, specErrs)
+ })
+ }
+}
diff --git a/tests/unit/grammar/lalr1_test.go b/tests/unit/grammar/lalr1_test.go
new file mode 100644
index 0000000..fd09333
--- /dev/null
+++ b/tests/unit/grammar/lalr1_test.go
@@ -0,0 +1,187 @@
+package grammar
+
+import (
+ "strings"
+ "testing"
+
+ "urubu/grammar/symbol"
+ "urubu/spec/grammar/parser"
+)
+
+func TestGenLALR1Automaton(t *testing.T) {
+ // This grammar belongs to LALR(1) class, not SLR(1).
+ src := `
+#name test;
+
+s: l eq r | r;
+l: ref r | id;
+r: l;
+eq: '=';
+ref: '*';
+id: "[A-Za-z0-9_]+";
+`
+
+ var gram *Grammar
+ var automaton *lalr1Automaton
+ {
+ ast, err := parser.Parse(strings.NewReader(src))
+ if err != nil {
+ t.Fatal(err)
+ }
+ b := GrammarBuilder{
+ AST: ast,
+ }
+ gram, err = b.build()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
+ if err != nil {
+ t.Fatalf("failed to create a LR0 automaton: %v", err)
+ }
+
+ firstSet, err := genFirstSet(gram.productionSet)
+ if err != nil {
+ t.Fatalf("failed to create a FIRST set: %v", err)
+ }
+
+ automaton, err = genLALR1Automaton(lr0, gram.productionSet, firstSet)
+ if err != nil {
+ t.Fatalf("failed to create a LALR1 automaton: %v", err)
+ }
+ if automaton == nil {
+ t.Fatalf("genLALR1Automaton returns nil without any error")
+ }
+ }
+
+ initialState := automaton.states[automaton.initialState]
+ if initialState == nil {
+ t.Errorf("failed to get an initial status: %v", automaton.initialState)
+ }
+
+ genSym := newTestSymbolGenerator(t, gram.symbolTable)
+ genProd := newTestProductionGenerator(t, genSym)
+ genLR0Item := newTestLR0ItemGenerator(t, genProd)
+
+ expectedKernels := map[int][]*lrItem{
+ 0: {
+ withLookAhead(genLR0Item("s'", 0, "s"), symbol.SymbolEOF),
+ },
+ 1: {
+ withLookAhead(genLR0Item("s'", 1, "s"), symbol.SymbolEOF),
+ },
+ 2: {
+ withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbol.SymbolEOF),
+ withLookAhead(genLR0Item("r", 1, "l"), symbol.SymbolEOF),
+ },
+ 3: {
+ withLookAhead(genLR0Item("s", 1, "r"), symbol.SymbolEOF),
+ },
+ 4: {
+ withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbol.SymbolEOF),
+ },
+ 5: {
+ withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbol.SymbolEOF),
+ },
+ 6: {
+ withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbol.SymbolEOF),
+ },
+ 7: {
+ withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbol.SymbolEOF),
+ },
+ 8: {
+ withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbol.SymbolEOF),
+ },
+ 9: {
+ withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbol.SymbolEOF),
+ },
+ }
+
+ expectedStates := []*expectedLRState{
+ {
+ kernelItems: expectedKernels[0],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("s"): expectedKernels[1],
+ genSym("l"): expectedKernels[2],
+ genSym("r"): expectedKernels[3],
+ genSym("ref"): expectedKernels[4],
+ genSym("id"): expectedKernels[5],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[1],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("s'", "s"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[2],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("eq"): expectedKernels[6],
+ },
+ reducibleProds: []*production{
+ genProd("r", "l"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[3],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("s", "r"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[4],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("r"): expectedKernels[7],
+ genSym("l"): expectedKernels[8],
+ genSym("ref"): expectedKernels[4],
+ genSym("id"): expectedKernels[5],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[5],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("l", "id"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[6],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("r"): expectedKernels[9],
+ genSym("l"): expectedKernels[8],
+ genSym("ref"): expectedKernels[4],
+ genSym("id"): expectedKernels[5],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[7],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("l", "ref", "r"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[8],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("r", "l"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[9],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("s", "l", "eq", "r"),
+ },
+ },
+ }
+
+ testLRAutomaton(t, expectedStates, automaton.lr0Automaton)
+}
diff --git a/tests/unit/grammar/lexical/compiler_test.go b/tests/unit/grammar/lexical/compiler_test.go
new file mode 100644
index 0000000..b621cd2
--- /dev/null
+++ b/tests/unit/grammar/lexical/compiler_test.go
@@ -0,0 +1,338 @@
+package lexical
+
+import (
+ "encoding/json"
+ "fmt"
+ "testing"
+
+ spec "urubu/spec/grammar"
+)
+
+func TestLexSpec_Validate(t *testing.T) {
+ // We expect that the spelling inconsistency error will occur.
+ spec := &LexSpec{
+ Entries: []*LexEntry{
+ {
+ Modes: []spec.LexModeName{
+ // 'Default' is the spelling inconsistency because 'default' is predefined.
+ "Default",
+ },
+ Kind: "foo",
+ Pattern: "foo",
+ },
+ },
+ }
+ err := spec.Validate()
+ if err == nil {
+ t.Fatalf("expected error didn't occur")
+ }
+}
+
+func TestSnakeCaseToUpperCamelCase(t *testing.T) {
+ tests := []struct {
+ snake string
+ camel string
+ }{
+ {
+ snake: "foo",
+ camel: "Foo",
+ },
+ {
+ snake: "foo_bar",
+ camel: "FooBar",
+ },
+ {
+ snake: "foo_bar_baz",
+ camel: "FooBarBaz",
+ },
+ {
+ snake: "Foo",
+ camel: "Foo",
+ },
+ {
+ snake: "fooBar",
+ camel: "FooBar",
+ },
+ {
+ snake: "FOO",
+ camel: "FOO",
+ },
+ {
+ snake: "FOO_BAR",
+ camel: "FOOBAR",
+ },
+ {
+ snake: "_foo_bar_",
+ camel: "FooBar",
+ },
+ {
+ snake: "___foo___bar___",
+ camel: "FooBar",
+ },
+ }
+ for _, tt := range tests {
+ c := SnakeCaseToUpperCamelCase(tt.snake)
+ if c != tt.camel {
+ t.Errorf("unexpected string; want: %v, got: %v", tt.camel, c)
+ }
+ }
+}
+
+func TestFindSpellingInconsistencies(t *testing.T) {
+ tests := []struct {
+ ids []string
+ duplicated [][]string
+ }{
+ {
+ ids: []string{"foo", "foo"},
+ duplicated: nil,
+ },
+ {
+ ids: []string{"foo", "Foo"},
+ duplicated: [][]string{{"Foo", "foo"}},
+ },
+ {
+ ids: []string{"foo", "foo", "Foo"},
+ duplicated: [][]string{{"Foo", "foo"}},
+ },
+ {
+ ids: []string{"foo_bar_baz", "FooBarBaz"},
+ duplicated: [][]string{{"FooBarBaz", "foo_bar_baz"}},
+ },
+ {
+ ids: []string{"foo", "Foo", "bar", "Bar"},
+ duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}},
+ },
+ {
+ ids: []string{"foo", "Foo", "bar", "Bar", "baz", "bra"},
+ duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}},
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+ duplicated := FindSpellingInconsistencies(tt.ids)
+ if len(duplicated) != len(tt.duplicated) {
+ t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated, duplicated)
+ }
+ for i, dupIDs := range duplicated {
+ if len(dupIDs) != len(tt.duplicated[i]) {
+ t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs)
+ }
+ for j, id := range dupIDs {
+ if id != tt.duplicated[i][j] {
+ t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs)
+ }
+ }
+ }
+ })
+ }
+}
+
+func TestCompile(t *testing.T) {
+ tests := []struct {
+ Caption string
+ Spec string
+ Err bool
+ }{
+ {
+ Caption: "allow duplicates names between fragments and non-fragments",
+ Spec: `
+{
+ "name": "test",
+ "entries": [
+ {
+ "kind": "a2z",
+ "pattern": "\\f{a2z}"
+ },
+ {
+ "fragment": true,
+ "kind": "a2z",
+ "pattern": "[a-z]"
+ }
+ ]
+}
+`,
+ },
+ {
+ Caption: "don't allow duplicates names in non-fragments",
+ Spec: `
+{
+ "name": "test",
+ "entries": [
+ {
+ "kind": "a2z",
+ "pattern": "a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z"
+ },
+ {
+ "kind": "a2z",
+ "pattern": "[a-z]"
+ }
+ ]
+}
+`,
+ Err: true,
+ },
+ {
+ Caption: "don't allow duplicates names in fragments",
+ Spec: `
+{
+ "name": "test",
+ "entries": [
+ {
+ "kind": "a2z",
+ "pattern": "\\f{a2z}"
+ },
+ {
+ "fragments": true,
+ "kind": "a2z",
+ "pattern": "a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z"
+ },
+ {
+ "fragments": true,
+ "kind": "a2z",
+ "pattern": "[a-z]"
+ }
+ ]
+}
+`,
+ Err: true,
+ },
+ {
+ Caption: "don't allow kind names in the same mode to contain spelling inconsistencies",
+ Spec: `
+{
+ "name": "test",
+ "entries": [
+ {
+ "kind": "foo_1",
+ "pattern": "foo_1"
+ },
+ {
+ "kind": "foo1",
+ "pattern": "foo1"
+ }
+ ]
+}
+`,
+ Err: true,
+ },
+ {
+ Caption: "don't allow kind names across modes to contain spelling inconsistencies",
+ Spec: `
+{
+ "name": "test",
+ "entries": [
+ {
+ "modes": ["default"],
+ "kind": "foo_1",
+ "pattern": "foo_1"
+ },
+ {
+ "modes": ["other_mode"],
+ "kind": "foo1",
+ "pattern": "foo1"
+ }
+ ]
+}
+`,
+ Err: true,
+ },
+ {
+ Caption: "don't allow mode names to contain spelling inconsistencies",
+ Spec: `
+{
+ "name": "test",
+ "entries": [
+ {
+ "modes": ["foo_1"],
+ "kind": "a",
+ "pattern": "a"
+ },
+ {
+ "modes": ["foo1"],
+ "kind": "b",
+ "pattern": "b"
+ }
+ ]
+}
+`,
+ Err: true,
+ },
+ {
+ Caption: "allow fragment names in the same mode to contain spelling inconsistencies because fragments will not appear in output files",
+ Spec: `
+{
+ "name": "test",
+ "entries": [
+ {
+ "kind": "a",
+ "pattern": "a"
+ },
+ {
+ "fragment": true,
+ "kind": "foo_1",
+ "pattern": "foo_1"
+ },
+ {
+ "fragment": true,
+ "kind": "foo1",
+ "pattern": "foo1"
+ }
+ ]
+}
+`,
+ },
+ {
+ Caption: "allow fragment names across modes to contain spelling inconsistencies because fragments will not appear in output files",
+ Spec: `
+{
+ "name": "test",
+ "entries": [
+ {
+ "modes": ["default"],
+ "kind": "a",
+ "pattern": "a"
+ },
+ {
+ "modes": ["default"],
+ "fragment": true,
+ "kind": "foo_1",
+ "pattern": "foo_1"
+ },
+ {
+ "modes": ["other_mode"],
+ "fragment": true,
+ "kind": "foo1",
+ "pattern": "foo1"
+ }
+ ]
+}
+`,
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v %s", i, tt.Caption), func(t *testing.T) {
+ lspec := &LexSpec{}
+ err := json.Unmarshal([]byte(tt.Spec), lspec)
+ if err != nil {
+ t.Fatalf("%v", err)
+ }
+ clspec, err, _ := Compile(lspec, CompressionLevelMin)
+ if tt.Err {
+ if err == nil {
+ t.Fatalf("expected an error")
+ }
+ if clspec != nil {
+ t.Fatalf("Compile function mustn't return a compiled specification")
+ }
+ } else {
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if clspec == nil {
+ t.Fatalf("Compile function must return a compiled specification")
+ }
+ }
+ })
+ }
+}
diff --git a/tests/unit/grammar/lexical/dfa/dfa_test.go b/tests/unit/grammar/lexical/dfa/dfa_test.go
new file mode 100644
index 0000000..38577cf
--- /dev/null
+++ b/tests/unit/grammar/lexical/dfa/dfa_test.go
@@ -0,0 +1,121 @@
+package dfa
+
+import (
+ "strings"
+ "testing"
+
+ "urubu/grammar/lexical/parser"
+ spec "urubu/spec/grammar"
+)
+
+func TestGenDFA(t *testing.T) {
+ p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb"))
+ cpt, err := p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+ bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{
+ spec.LexModeKindIDMin: cpt,
+ })
+ if err != nil {
+ t.Fatal(err)
+ }
+ dfa := GenDFA(bt, symTab)
+ if dfa == nil {
+ t.Fatalf("DFA is nil")
+ }
+
+ symPos := func(n uint16) symbolPosition {
+ pos, err := newSymbolPosition(n, false)
+ if err != nil {
+ panic(err)
+ }
+ return pos
+ }
+
+ endPos := func(n uint16) symbolPosition {
+ pos, err := newSymbolPosition(n, true)
+ if err != nil {
+ panic(err)
+ }
+ return pos
+ }
+
+ s0 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3))
+ s1 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(4))
+ s2 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(5))
+ s3 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(endPos(6))
+
+ rune2Int := func(char rune, index int) uint8 {
+ return uint8([]byte(string(char))[index])
+ }
+
+ tranS0 := [256]string{}
+ tranS0[rune2Int('a', 0)] = s1.hash()
+ tranS0[rune2Int('b', 0)] = s0.hash()
+
+ tranS1 := [256]string{}
+ tranS1[rune2Int('a', 0)] = s1.hash()
+ tranS1[rune2Int('b', 0)] = s2.hash()
+
+ tranS2 := [256]string{}
+ tranS2[rune2Int('a', 0)] = s1.hash()
+ tranS2[rune2Int('b', 0)] = s3.hash()
+
+ tranS3 := [256]string{}
+ tranS3[rune2Int('a', 0)] = s1.hash()
+ tranS3[rune2Int('b', 0)] = s0.hash()
+
+ expectedTranTab := map[string][256]string{
+ s0.hash(): tranS0,
+ s1.hash(): tranS1,
+ s2.hash(): tranS2,
+ s3.hash(): tranS3,
+ }
+ if len(dfa.TransitionTable) != len(expectedTranTab) {
+ t.Errorf("transition table is mismatched: want: %v entries, got: %v entries", len(expectedTranTab), len(dfa.TransitionTable))
+ }
+ for h, eTranTab := range expectedTranTab {
+ tranTab, ok := dfa.TransitionTable[h]
+ if !ok {
+ t.Errorf("no entry; hash: %v", h)
+ continue
+ }
+ if len(tranTab) != len(eTranTab) {
+ t.Errorf("transition table is mismatched: hash: %v, want: %v entries, got: %v entries", h, len(eTranTab), len(tranTab))
+ }
+ for c, eNext := range eTranTab {
+ if eNext == "" {
+ continue
+ }
+
+ next := tranTab[c]
+ if next == "" {
+ t.Errorf("no enatry: hash: %v, char: %v", h, c)
+ }
+ if next != eNext {
+ t.Errorf("next state is mismatched: want: %v, got: %v", eNext, next)
+ }
+ }
+ }
+
+ if dfa.InitialState != s0.hash() {
+ t.Errorf("initial state is mismatched: want: %v, got: %v", s0.hash(), dfa.InitialState)
+ }
+
+ accTab := map[string]spec.LexModeKindID{
+ s3.hash(): 1,
+ }
+ if len(dfa.AcceptingStatesTable) != len(accTab) {
+ t.Errorf("accepting states are mismatched: want: %v entries, got: %v entries", len(accTab), len(dfa.AcceptingStatesTable))
+ }
+ for eState, eID := range accTab {
+ id, ok := dfa.AcceptingStatesTable[eState]
+ if !ok {
+ t.Errorf("accepting state is not found: state: %v", eState)
+ }
+ if id != eID {
+ t.Errorf("ID is mismatched: state: %v, want: %v, got: %v", eState, eID, id)
+ }
+ }
+}
diff --git a/tests/unit/grammar/lexical/dfa/symbol_position_test.go b/tests/unit/grammar/lexical/dfa/symbol_position_test.go
new file mode 100644
index 0000000..c867f64
--- /dev/null
+++ b/tests/unit/grammar/lexical/dfa/symbol_position_test.go
@@ -0,0 +1,79 @@
+package dfa
+
+import (
+ "fmt"
+ "testing"
+)
+
+func TestNewSymbolPosition(t *testing.T) {
+ tests := []struct {
+ n uint16
+ endMark bool
+ err bool
+ }{
+ {
+ n: 0,
+ endMark: false,
+ err: true,
+ },
+ {
+ n: 0,
+ endMark: true,
+ err: true,
+ },
+ {
+ n: symbolPositionMin - 1,
+ endMark: false,
+ err: true,
+ },
+ {
+ n: symbolPositionMin - 1,
+ endMark: true,
+ err: true,
+ },
+ {
+ n: symbolPositionMin,
+ endMark: false,
+ },
+ {
+ n: symbolPositionMin,
+ endMark: true,
+ },
+ {
+ n: symbolPositionMax,
+ endMark: false,
+ },
+ {
+ n: symbolPositionMax,
+ endMark: true,
+ },
+ {
+ n: symbolPositionMax + 1,
+ endMark: false,
+ err: true,
+ },
+ {
+ n: symbolPositionMax + 1,
+ endMark: true,
+ err: true,
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v n: %v, endMark: %v", i, tt.n, tt.endMark), func(t *testing.T) {
+ pos, err := newSymbolPosition(tt.n, tt.endMark)
+ if tt.err {
+ if err == nil {
+ t.Fatal("err is nil")
+ }
+ return
+ }
+ if err != nil {
+ t.Fatal(err)
+ }
+ n, endMark := pos.describe()
+ if n != tt.n || endMark != tt.endMark {
+ t.Errorf("unexpected symbol position: want: n: %v, endMark: %v, got: n: %v, endMark: %v", tt.n, tt.endMark, n, endMark)
+ }
+ })
+ }
+}
diff --git a/tests/unit/grammar/lexical/dfa/tree_test.go b/tests/unit/grammar/lexical/dfa/tree_test.go
new file mode 100644
index 0000000..de3ebbb
--- /dev/null
+++ b/tests/unit/grammar/lexical/dfa/tree_test.go
@@ -0,0 +1,257 @@
+package dfa
+
+import (
+ "fmt"
+ "strings"
+ "testing"
+
+ "urubu/grammar/lexical/parser"
+ spec "urubu/spec/grammar"
+)
+
+func TestByteTree(t *testing.T) {
+ tests := []struct {
+ root byteTree
+ nullable bool
+ first *symbolPositionSet
+ last *symbolPositionSet
+ }{
+ {
+ root: newSymbolNodeWithPos(0, 1),
+ nullable: false,
+ first: newSymbolPositionSet().add(1),
+ last: newSymbolPositionSet().add(1),
+ },
+ {
+ root: newEndMarkerNodeWithPos(1, 1),
+ nullable: false,
+ first: newSymbolPositionSet().add(1),
+ last: newSymbolPositionSet().add(1),
+ },
+ {
+ root: newConcatNode(
+ newSymbolNodeWithPos(0, 1),
+ newSymbolNodeWithPos(0, 2),
+ ),
+ nullable: false,
+ first: newSymbolPositionSet().add(1),
+ last: newSymbolPositionSet().add(2),
+ },
+ {
+ root: newConcatNode(
+ newRepeatNode(newSymbolNodeWithPos(0, 1)),
+ newSymbolNodeWithPos(0, 2),
+ ),
+ nullable: false,
+ first: newSymbolPositionSet().add(1).add(2),
+ last: newSymbolPositionSet().add(2),
+ },
+ {
+ root: newConcatNode(
+ newSymbolNodeWithPos(0, 1),
+ newRepeatNode(newSymbolNodeWithPos(0, 2)),
+ ),
+ nullable: false,
+ first: newSymbolPositionSet().add(1),
+ last: newSymbolPositionSet().add(1).add(2),
+ },
+ {
+ root: newConcatNode(
+ newRepeatNode(newSymbolNodeWithPos(0, 1)),
+ newRepeatNode(newSymbolNodeWithPos(0, 2)),
+ ),
+ nullable: true,
+ first: newSymbolPositionSet().add(1).add(2),
+ last: newSymbolPositionSet().add(1).add(2),
+ },
+ {
+ root: newAltNode(
+ newSymbolNodeWithPos(0, 1),
+ newSymbolNodeWithPos(0, 2),
+ ),
+ nullable: false,
+ first: newSymbolPositionSet().add(1).add(2),
+ last: newSymbolPositionSet().add(1).add(2),
+ },
+ {
+ root: newAltNode(
+ newRepeatNode(newSymbolNodeWithPos(0, 1)),
+ newSymbolNodeWithPos(0, 2),
+ ),
+ nullable: true,
+ first: newSymbolPositionSet().add(1).add(2),
+ last: newSymbolPositionSet().add(1).add(2),
+ },
+ {
+ root: newAltNode(
+ newSymbolNodeWithPos(0, 1),
+ newRepeatNode(newSymbolNodeWithPos(0, 2)),
+ ),
+ nullable: true,
+ first: newSymbolPositionSet().add(1).add(2),
+ last: newSymbolPositionSet().add(1).add(2),
+ },
+ {
+ root: newAltNode(
+ newRepeatNode(newSymbolNodeWithPos(0, 1)),
+ newRepeatNode(newSymbolNodeWithPos(0, 2)),
+ ),
+ nullable: true,
+ first: newSymbolPositionSet().add(1).add(2),
+ last: newSymbolPositionSet().add(1).add(2),
+ },
+ {
+ root: newRepeatNode(newSymbolNodeWithPos(0, 1)),
+ nullable: true,
+ first: newSymbolPositionSet().add(1),
+ last: newSymbolPositionSet().add(1),
+ },
+ {
+ root: newOptionNode(newSymbolNodeWithPos(0, 1)),
+ nullable: true,
+ first: newSymbolPositionSet().add(1),
+ last: newSymbolPositionSet().add(1),
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+ if tt.root.nullable() != tt.nullable {
+ t.Errorf("unexpected nullable attribute; want: %v, got: %v", tt.nullable, tt.root.nullable())
+ }
+ if tt.first.hash() != tt.root.first().hash() {
+ t.Errorf("unexpected first positions attribute; want: %v, got: %v", tt.first, tt.root.first())
+ }
+ if tt.last.hash() != tt.root.last().hash() {
+ t.Errorf("unexpected last positions attribute; want: %v, got: %v", tt.last, tt.root.last())
+ }
+ })
+ }
+}
+
+func newSymbolNodeWithPos(v byte, pos symbolPosition) *symbolNode {
+ n := newSymbolNode(v)
+ n.pos = pos
+ return n
+}
+
+func newEndMarkerNodeWithPos(id int, pos symbolPosition) *endMarkerNode {
+ n := newEndMarkerNode(spec.LexModeKindID(id))
+ n.pos = pos
+ return n
+}
+
+func TestFollowAndSymbolTable(t *testing.T) {
+ symPos := func(n uint16) symbolPosition {
+ pos, err := newSymbolPosition(n, false)
+ if err != nil {
+ panic(err)
+ }
+ return pos
+ }
+
+ endPos := func(n uint16) symbolPosition {
+ pos, err := newSymbolPosition(n, true)
+ if err != nil {
+ panic(err)
+ }
+ return pos
+ }
+
+ p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb"))
+ cpt, err := p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{
+ spec.LexModeKindIDMin: cpt,
+ })
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ {
+ followTab := genFollowTable(bt)
+ if followTab == nil {
+ t.Fatal("follow table is nil")
+ }
+ expectedFollowTab := followTable{
+ 1: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)),
+ 2: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)),
+ 3: newSymbolPositionSet().add(symPos(4)),
+ 4: newSymbolPositionSet().add(symPos(5)),
+ 5: newSymbolPositionSet().add(endPos(6)),
+ }
+ testFollowTable(t, expectedFollowTab, followTab)
+ }
+
+ {
+ entry := func(v byte) byteRange {
+ return byteRange{
+ from: v,
+ to: v,
+ }
+ }
+
+ expectedSymTab := &symbolTable{
+ symPos2Byte: map[symbolPosition]byteRange{
+ symPos(1): entry(byte('a')),
+ symPos(2): entry(byte('b')),
+ symPos(3): entry(byte('a')),
+ symPos(4): entry(byte('b')),
+ symPos(5): entry(byte('b')),
+ },
+ endPos2ID: map[symbolPosition]spec.LexModeKindID{
+ endPos(6): 1,
+ },
+ }
+ testSymbolTable(t, expectedSymTab, symTab)
+ }
+}
+
+func testFollowTable(t *testing.T, expected, actual followTable) {
+ if len(actual) != len(expected) {
+ t.Errorf("unexpected number of the follow table entries; want: %v, got: %v", len(expected), len(actual))
+ }
+ for ePos, eSet := range expected {
+ aSet, ok := actual[ePos]
+ if !ok {
+ t.Fatalf("follow entry is not found: position: %v, follow: %v", ePos, eSet)
+ }
+ if aSet.hash() != eSet.hash() {
+ t.Fatalf("follow entry of position %v is mismatched: want: %v, got: %v", ePos, aSet, eSet)
+ }
+ }
+}
+
+func testSymbolTable(t *testing.T, expected, actual *symbolTable) {
+ t.Helper()
+
+ if len(actual.symPos2Byte) != len(expected.symPos2Byte) {
+ t.Errorf("unexpected symPos2Byte entries: want: %v entries, got: %v entries", len(expected.symPos2Byte), len(actual.symPos2Byte))
+ }
+ for ePos, eByte := range expected.symPos2Byte {
+ byte, ok := actual.symPos2Byte[ePos]
+ if !ok {
+ t.Errorf("a symbol position entry is not found: %v -> %v", ePos, eByte)
+ continue
+ }
+ if byte.from != eByte.from || byte.to != eByte.to {
+ t.Errorf("unexpected symbol position entry: want: %v -> %v, got: %v -> %v", ePos, eByte, ePos, byte)
+ }
+ }
+
+ if len(actual.endPos2ID) != len(expected.endPos2ID) {
+ t.Errorf("unexpected endPos2ID entries: want: %v entries, got: %v entries", len(expected.endPos2ID), len(actual.endPos2ID))
+ }
+ for ePos, eID := range expected.endPos2ID {
+ id, ok := actual.endPos2ID[ePos]
+ if !ok {
+ t.Errorf("an end position entry is not found: %v -> %v", ePos, eID)
+ continue
+ }
+ if id != eID {
+ t.Errorf("unexpected end position entry: want: %v -> %v, got: %v -> %v", ePos, eID, ePos, id)
+ }
+ }
+}
diff --git a/tests/unit/grammar/lexical/parser/lexer_test.go b/tests/unit/grammar/lexical/parser/lexer_test.go
new file mode 100644
index 0000000..055466e
--- /dev/null
+++ b/tests/unit/grammar/lexical/parser/lexer_test.go
@@ -0,0 +1,524 @@
+package parser
+
+import (
+ "strings"
+ "testing"
+)
+
+func TestLexer(t *testing.T) {
+ tests := []struct {
+ caption string
+ src string
+ tokens []*token
+ err error
+ }{
+ {
+ caption: "lexer can recognize ordinaly characters",
+ src: "123abcいろは",
+ tokens: []*token{
+ newToken(tokenKindChar, '1'),
+ newToken(tokenKindChar, '2'),
+ newToken(tokenKindChar, '3'),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindChar, 'b'),
+ newToken(tokenKindChar, 'c'),
+ newToken(tokenKindChar, 'い'),
+ newToken(tokenKindChar, 'ろ'),
+ newToken(tokenKindChar, 'は'),
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "lexer can recognize the special characters in default mode",
+ src: ".*+?|()[\\u",
+ tokens: []*token{
+ newToken(tokenKindAnyChar, nullChar),
+ newToken(tokenKindRepeat, nullChar),
+ newToken(tokenKindRepeatOneOrMore, nullChar),
+ newToken(tokenKindOption, nullChar),
+ newToken(tokenKindAlt, nullChar),
+ newToken(tokenKindGroupOpen, nullChar),
+ newToken(tokenKindGroupClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "lexer can recognize the escape sequences in default mode",
+ src: "\\\\\\.\\*\\+\\?\\|\\(\\)\\[",
+ tokens: []*token{
+ newToken(tokenKindChar, '\\'),
+ newToken(tokenKindChar, '.'),
+ newToken(tokenKindChar, '*'),
+ newToken(tokenKindChar, '+'),
+ newToken(tokenKindChar, '?'),
+ newToken(tokenKindChar, '|'),
+ newToken(tokenKindChar, '('),
+ newToken(tokenKindChar, ')'),
+ newToken(tokenKindChar, '['),
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "], {, and } are treated as an ordinary character in default mode",
+ src: "]{}",
+ tokens: []*token{
+ newToken(tokenKindChar, ']'),
+ newToken(tokenKindChar, '{'),
+ newToken(tokenKindChar, '}'),
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "lexer can recognize the special characters in bracket expression mode",
+ src: "[a-z\\u{09AF}][^a-z\\u{09abcf}]",
+ tokens: []*token{
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("09AF"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("09abcf"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "lexer can recognize the escape sequences in bracket expression mode",
+ src: "[\\^a\\-z]",
+ tokens: []*token{
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '^'),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "in a bracket expression, the special characters are also handled as normal characters",
+ src: "[\\\\.*+?|()[",
+ tokens: []*token{
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '\\'),
+ newToken(tokenKindChar, '.'),
+ newToken(tokenKindChar, '*'),
+ newToken(tokenKindChar, '+'),
+ newToken(tokenKindChar, '?'),
+ newToken(tokenKindChar, '|'),
+ newToken(tokenKindChar, '('),
+ newToken(tokenKindChar, ')'),
+ newToken(tokenKindChar, '['),
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "hyphen symbols that appear in bracket expressions are handled as the character range symbol or ordinary characters",
+ // [...-...][...-][-...][-]
+ // ~~~~~~~ ~ ~ ~
+ // ^ ^ ^ ^
+ // | | | `-- Ordinary Character (b)
+ // | | `-- Ordinary Character (b)
+ // | `-- Ordinary Character (b)
+ // `-- Character Range (a)
+ //
+ // a. *-* is handled as a character-range expression.
+ // b. *-, -*, or - are handled as ordinary characters.
+ src: "[a-z][a-][-z][-][--][---][^a-z][^a-][^-z][^-][^--][^---]",
+ tokens: []*token{
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "caret symbols that appear in bracket expressions are handled as the logical inverse symbol or ordinary characters",
+ // [^...^...][^]
+ // ~~ ~ ~~
+ // ^ ^ ^^
+ // | | |`-- Ordinary Character (c)
+ // | | `-- Bracket Expression
+ // | `-- Ordinary Character (b)
+ // `-- Inverse Bracket Expression (a)
+ //
+ // a. Bracket expressions that have a caret symbol at the beginning are handled as logical inverse expressions.
+ // b. caret symbols that appear as the second and the subsequent symbols are handled as ordinary symbols.
+ // c. When a bracket expression has just one symbol, a caret symbol at the beginning is handled as an ordinary character.
+ src: "[^^][^]",
+ tokens: []*token{
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, '^'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '^'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "lexer raises an error when an invalid escape sequence appears",
+ src: "\\@",
+ err: synErrInvalidEscSeq,
+ },
+ {
+ caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears",
+ src: "\\",
+ err: synErrIncompletedEscSeq,
+ },
+ {
+ caption: "lexer raises an error when an invalid escape sequence appears",
+ src: "[\\@",
+ tokens: []*token{
+ newToken(tokenKindBExpOpen, nullChar),
+ },
+ err: synErrInvalidEscSeq,
+ },
+ {
+ caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears",
+ src: "[\\",
+ tokens: []*token{
+ newToken(tokenKindBExpOpen, nullChar),
+ },
+ err: synErrIncompletedEscSeq,
+ },
+ {
+ caption: "lexer can recognize the special characters and code points in code point expression mode",
+ src: "\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}[\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}][^\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}]",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("0123"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("4567"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("89abcd"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("efAB"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("CDEF01"),
+ newToken(tokenKindRBrace, nullChar),
+
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("0123"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("4567"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("89abcd"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("efAB"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("CDEF01"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindBExpClose, nullChar),
+
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("0123"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("4567"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("89abcd"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("efAB"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("CDEF01"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindBExpClose, nullChar),
+
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "a one digit hex string isn't a valid code point",
+ src: "\\u{0",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ },
+ err: synErrInvalidCodePoint,
+ },
+ {
+ caption: "a two digits hex string isn't a valid code point",
+ src: "\\u{01",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ },
+ err: synErrInvalidCodePoint,
+ },
+ {
+ caption: "a three digits hex string isn't a valid code point",
+ src: "\\u{012",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ },
+ err: synErrInvalidCodePoint,
+ },
+ {
+ caption: "a four digits hex string is a valid code point",
+ src: "\\u{0123}",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("0123"),
+ newToken(tokenKindRBrace, nullChar),
+ },
+ },
+ {
+ caption: "a five digits hex string isn't a valid code point",
+ src: "\\u{01234",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ },
+ err: synErrInvalidCodePoint,
+ },
+ {
+ caption: "a six digits hex string is a valid code point",
+ src: "\\u{012345}",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("012345"),
+ newToken(tokenKindRBrace, nullChar),
+ },
+ },
+ {
+ caption: "a seven digits hex string isn't a valid code point",
+ src: "\\u{0123456",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ },
+ err: synErrInvalidCodePoint,
+ },
+ {
+ caption: "a code point must be hex digits",
+ src: "\\u{g",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ },
+ err: synErrInvalidCodePoint,
+ },
+ {
+ caption: "a code point must be hex digits",
+ src: "\\u{G",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ },
+ err: synErrInvalidCodePoint,
+ },
+ {
+ caption: "lexer can recognize the special characters and symbols in character property expression mode",
+ src: "\\p{Letter}\\p{General_Category=Letter}[\\p{Letter}\\p{General_Category=Letter}][^\\p{Letter}\\p{General_Category=Letter}]",
+ tokens: []*token{
+ newToken(tokenKindCharPropLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCharPropSymbolToken("Letter"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCharPropLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCharPropSymbolToken("General_Category"),
+ newToken(tokenKindEqual, nullChar),
+ newCharPropSymbolToken("Letter"),
+ newToken(tokenKindRBrace, nullChar),
+
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindCharPropLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCharPropSymbolToken("Letter"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCharPropLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCharPropSymbolToken("General_Category"),
+ newToken(tokenKindEqual, nullChar),
+ newCharPropSymbolToken("Letter"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindBExpClose, nullChar),
+
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindCharPropLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCharPropSymbolToken("Letter"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCharPropLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCharPropSymbolToken("General_Category"),
+ newToken(tokenKindEqual, nullChar),
+ newCharPropSymbolToken("Letter"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindBExpClose, nullChar),
+
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "lexer can recognize the special characters and symbols in fragment expression mode",
+ src: "\\f{integer}",
+ tokens: []*token{
+ newToken(tokenKindFragmentLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newFragmentSymbolToken("integer"),
+ newToken(tokenKindRBrace, nullChar),
+
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "a fragment expression is not supported in a bracket expression",
+ src: "[\\f",
+ tokens: []*token{
+ newToken(tokenKindBExpOpen, nullChar),
+ },
+ err: synErrInvalidEscSeq,
+ },
+ {
+ caption: "a fragment expression is not supported in an inverse bracket expression",
+ src: "[^\\f",
+ tokens: []*token{
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ },
+ err: synErrInvalidEscSeq,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.caption, func(t *testing.T) {
+ lex := newLexer(strings.NewReader(tt.src))
+ var err error
+ var tok *token
+ i := 0
+ for {
+ tok, err = lex.next()
+ if err != nil {
+ break
+ }
+ if i >= len(tt.tokens) {
+ break
+ }
+ eTok := tt.tokens[i]
+ i++
+ testToken(t, tok, eTok)
+
+ if tok.kind == tokenKindEOF {
+ break
+ }
+ }
+ if tt.err != nil {
+ if err != ParseErr {
+ t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err)
+ }
+ detail, cause := lex.error()
+ if cause != tt.err {
+ t.Fatalf("unexpected error: want: %v, got: %v (%v)", tt.err, cause, detail)
+ }
+ } else {
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ }
+ if i < len(tt.tokens) {
+ t.Fatalf("expecte more tokens")
+ }
+ })
+ }
+}
+
+func testToken(t *testing.T, a, e *token) {
+ t.Helper()
+ if e.kind != a.kind || e.char != a.char || e.codePoint != a.codePoint {
+ t.Fatalf("unexpected token: want: %+v, got: %+v", e, a)
+ }
+}
diff --git a/tests/unit/grammar/lexical/parser/parser_test.go b/tests/unit/grammar/lexical/parser/parser_test.go
new file mode 100644
index 0000000..4c9557d
--- /dev/null
+++ b/tests/unit/grammar/lexical/parser/parser_test.go
@@ -0,0 +1,1389 @@
+package parser
+
+import (
+ "fmt"
+ "reflect"
+ "strings"
+ "testing"
+
+ spec "urubu/spec/grammar"
+ "urubu/ucd"
+)
+
+func TestParse(t *testing.T) {
+ tests := []struct {
+ pattern string
+ fragments map[spec.LexKindName]string
+ ast CPTree
+ syntaxError error
+
+ // When an AST is large, as patterns containing a character property expression, this test only checks
+ // that the pattern is parsable. The check of the validity of such AST is performed by checking that it
+ // can be matched correctly using the driver.
+ skipTestAST bool
+ }{
+ {
+ pattern: "a",
+ ast: newSymbolNode('a'),
+ },
+ {
+ pattern: "abc",
+ ast: genConcatNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ },
+ {
+ pattern: "a?",
+ ast: newOptionNode(
+ newSymbolNode('a'),
+ ),
+ },
+ {
+ pattern: "[abc]?",
+ ast: newOptionNode(
+ genAltNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ ),
+ },
+ {
+ pattern: "\\u{3042}?",
+ ast: newOptionNode(
+ newSymbolNode('\u3042'),
+ ),
+ },
+ {
+ pattern: "\\p{Letter}?",
+ skipTestAST: true,
+ },
+ {
+ pattern: "\\f{a2c}?",
+ fragments: map[spec.LexKindName]string{
+ "a2c": "abc",
+ },
+ ast: newOptionNode(
+ newFragmentNode("a2c",
+ genConcatNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ ),
+ ),
+ },
+ {
+ pattern: "(a)?",
+ ast: newOptionNode(
+ newSymbolNode('a'),
+ ),
+ },
+ {
+ pattern: "((a?)?)?",
+ ast: newOptionNode(
+ newOptionNode(
+ newOptionNode(
+ newSymbolNode('a'),
+ ),
+ ),
+ ),
+ },
+ {
+ pattern: "(abc)?",
+ ast: newOptionNode(
+ genConcatNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ ),
+ },
+ {
+ pattern: "(a|b)?",
+ ast: newOptionNode(
+ genAltNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ ),
+ ),
+ },
+ {
+ pattern: "?",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "(?)",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "a|?",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "?|b",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "a??",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "a*",
+ ast: newRepeatNode(
+ newSymbolNode('a'),
+ ),
+ },
+ {
+ pattern: "[abc]*",
+ ast: newRepeatNode(
+ genAltNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ ),
+ },
+ {
+ pattern: "\\u{3042}*",
+ ast: newRepeatNode(
+ newSymbolNode('\u3042'),
+ ),
+ },
+ {
+ pattern: "\\p{Letter}*",
+ skipTestAST: true,
+ },
+ {
+ pattern: "\\f{a2c}*",
+ fragments: map[spec.LexKindName]string{
+ "a2c": "abc",
+ },
+ ast: newRepeatNode(
+ newFragmentNode("a2c",
+ genConcatNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ ),
+ ),
+ },
+ {
+ pattern: "((a*)*)*",
+ ast: newRepeatNode(
+ newRepeatNode(
+ newRepeatNode(
+ newSymbolNode('a'),
+ ),
+ ),
+ ),
+ },
+ {
+ pattern: "(abc)*",
+ ast: newRepeatNode(
+ genConcatNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ ),
+ },
+ {
+ pattern: "(a|b)*",
+ ast: newRepeatNode(
+ genAltNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ ),
+ ),
+ },
+ {
+ pattern: "*",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "(*)",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "a|*",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "*|b",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "a**",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "a+",
+ ast: genConcatNode(
+ newSymbolNode('a'),
+ newRepeatNode(
+ newSymbolNode('a'),
+ ),
+ ),
+ },
+ {
+ pattern: "[abc]+",
+ ast: genConcatNode(
+ genAltNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ newRepeatNode(
+ genAltNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ ),
+ ),
+ },
+ {
+ pattern: "\\u{3042}+",
+ ast: genConcatNode(
+ newSymbolNode('\u3042'),
+ newRepeatNode(
+ newSymbolNode('\u3042'),
+ ),
+ ),
+ },
+ {
+ pattern: "\\p{Letter}+",
+ skipTestAST: true,
+ },
+ {
+ pattern: "\\f{a2c}+",
+ fragments: map[spec.LexKindName]string{
+ "a2c": "abc",
+ },
+ ast: genConcatNode(
+ newFragmentNode("a2c",
+ genConcatNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ ),
+ newRepeatNode(
+ newFragmentNode("a2c",
+ genConcatNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ ),
+ ),
+ ),
+ },
+ {
+ pattern: "((a+)+)+",
+ ast: genConcatNode(
+ genConcatNode(
+ genConcatNode(
+ genConcatNode(
+ newSymbolNode('a'),
+ newRepeatNode(
+ newSymbolNode('a'),
+ ),
+ ),
+ newRepeatNode(
+ genConcatNode(
+ newSymbolNode('a'),
+ newRepeatNode(
+ newSymbolNode('a'),
+ ),
+ ),
+ ),
+ ),
+ newRepeatNode(
+ genConcatNode(
+ genConcatNode(
+ newSymbolNode('a'),
+ newRepeatNode(
+ newSymbolNode('a'),
+ ),
+ ),
+ newRepeatNode(
+ genConcatNode(
+ newSymbolNode('a'),
+ newRepeatNode(
+ newSymbolNode('a'),
+ ),
+ ),
+ ),
+ ),
+ ),
+ ),
+ ),
+ },
+ {
+ pattern: "(abc)+",
+ ast: genConcatNode(
+ genConcatNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ newRepeatNode(
+ genConcatNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ ),
+ ),
+ },
+ {
+ pattern: "(a|b)+",
+ ast: genConcatNode(
+ genAltNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ ),
+ newRepeatNode(
+ genAltNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ ),
+ ),
+ ),
+ },
+ {
+ pattern: "+",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "(+)",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "a|+",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "+|b",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "a++",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: ".",
+ ast: newRangeSymbolNode(0x00, 0x10FFFF),
+ },
+ {
+ pattern: "[a]",
+ ast: newSymbolNode('a'),
+ },
+ {
+ pattern: "[abc]",
+ ast: genAltNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ },
+ {
+ pattern: "[a-z]",
+ ast: newRangeSymbolNode('a', 'z'),
+ },
+ {
+ pattern: "[A-Za-z]",
+ ast: genAltNode(
+ newRangeSymbolNode('A', 'Z'),
+ newRangeSymbolNode('a', 'z'),
+ ),
+ },
+ {
+ pattern: "[\\u{004E}]",
+ ast: newSymbolNode('N'),
+ },
+ {
+ pattern: "[\\u{0061}-\\u{007A}]",
+ ast: newRangeSymbolNode('a', 'z'),
+ },
+ {
+ pattern: "[\\p{Lu}]",
+ skipTestAST: true,
+ },
+ {
+ pattern: "[a-\\p{Lu}]",
+ syntaxError: synErrRangePropIsUnavailable,
+ },
+ {
+ pattern: "[\\p{Lu}-z]",
+ syntaxError: synErrRangePropIsUnavailable,
+ },
+ {
+ pattern: "[\\p{Lu}-\\p{Ll}]",
+ syntaxError: synErrRangePropIsUnavailable,
+ },
+ {
+ pattern: "[z-a]",
+ syntaxError: synErrRangeInvalidOrder,
+ },
+ {
+ pattern: "a[]",
+ syntaxError: synErrBExpNoElem,
+ },
+ {
+ pattern: "[]a",
+ syntaxError: synErrBExpNoElem,
+ },
+ {
+ pattern: "[]",
+ syntaxError: synErrBExpNoElem,
+ },
+ {
+ pattern: "[^\\u{004E}]",
+ ast: genAltNode(
+ newRangeSymbolNode(0x00, '\u004E'-1),
+ newRangeSymbolNode('\u004E'+1, 0x10FFFF),
+ ),
+ },
+ {
+ pattern: "[^\\u{0061}-\\u{007A}]",
+ ast: genAltNode(
+ newRangeSymbolNode(0x00, '\u0061'-1),
+ newRangeSymbolNode('\u007A'+1, 0x10FFFF),
+ ),
+ },
+ {
+ pattern: "[^\\p{Lu}]",
+ skipTestAST: true,
+ },
+ {
+ pattern: "[^a-\\p{Lu}]",
+ syntaxError: synErrRangePropIsUnavailable,
+ },
+ {
+ pattern: "[^\\p{Lu}-z]",
+ syntaxError: synErrRangePropIsUnavailable,
+ },
+ {
+ pattern: "[^\\p{Lu}-\\p{Ll}]",
+ syntaxError: synErrRangePropIsUnavailable,
+ },
+ {
+ pattern: "[^\\u{0000}-\\u{10FFFF}]",
+ syntaxError: synErrUnmatchablePattern,
+ },
+ {
+ pattern: "[^\\u{0000}-\\u{FFFF}\\u{010000}-\\u{10FFFF}]",
+ syntaxError: synErrUnmatchablePattern,
+ },
+ {
+ pattern: "[^]",
+ ast: newSymbolNode('^'),
+ },
+ {
+ pattern: "[",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "([",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "[a",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "([a",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "[a-",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "([a-",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "[^",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "([^",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "[^a",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "([^a",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "[^a-",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "([^a-",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "]",
+ ast: newSymbolNode(']'),
+ },
+ {
+ pattern: "(]",
+ syntaxError: synErrGroupUnclosed,
+ },
+ {
+ pattern: "a]",
+ ast: genConcatNode(
+ newSymbolNode('a'),
+ newSymbolNode(']'),
+ ),
+ },
+ {
+ pattern: "(a]",
+ syntaxError: synErrGroupUnclosed,
+ },
+ {
+ pattern: "([)",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "([a)",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "[a-]",
+ ast: genAltNode(
+ newSymbolNode('a'),
+ newSymbolNode('-'),
+ ),
+ },
+ {
+ pattern: "[^a-]",
+ ast: genAltNode(
+ newRangeSymbolNode(0x00, 0x2C),
+ newRangeSymbolNode(0x2E, 0x60),
+ newRangeSymbolNode(0x62, 0x10FFFF),
+ ),
+ },
+ {
+ pattern: "[-z]",
+ ast: genAltNode(
+ newSymbolNode('-'),
+ newSymbolNode('z'),
+ ),
+ },
+ {
+ pattern: "[^-z]",
+ ast: newAltNode(
+ newRangeSymbolNode(0x00, 0x2C),
+ newAltNode(
+ newRangeSymbolNode(0x2E, 0x79),
+ newRangeSymbolNode(0x7B, 0x10FFFF),
+ ),
+ ),
+ },
+ {
+ pattern: "[-]",
+ ast: newSymbolNode('-'),
+ },
+ {
+ pattern: "[^-]",
+ ast: genAltNode(
+ newRangeSymbolNode(0x00, 0x2C),
+ newRangeSymbolNode(0x2E, 0x10FFFF),
+ ),
+ },
+ {
+ pattern: "[^01]",
+ ast: genAltNode(
+ newRangeSymbolNode(0x00, '0'-1),
+ newRangeSymbolNode('1'+1, 0x10FFFF),
+ ),
+ },
+ {
+ pattern: "[^10]",
+ ast: genAltNode(
+ newRangeSymbolNode(0x00, '0'-1),
+ newRangeSymbolNode('1'+1, 0x10FFFF),
+ ),
+ },
+ {
+ pattern: "[^a-z]",
+ ast: genAltNode(
+ newRangeSymbolNode(0x00, 'a'-1),
+ newRangeSymbolNode('z'+1, 0x10FFFF),
+ ),
+ },
+ {
+ pattern: "[^az]",
+ ast: genAltNode(
+ newRangeSymbolNode(0x00, 'a'-1),
+ genAltNode(
+ newRangeSymbolNode('a'+1, 'z'-1),
+ newRangeSymbolNode('z'+1, 0x10FFFF),
+ ),
+ ),
+ },
+ {
+ pattern: "\\u{006E}",
+ ast: newSymbolNode('\u006E'),
+ },
+ {
+ pattern: "\\u{03BD}",
+ ast: newSymbolNode('\u03BD'),
+ },
+ {
+ pattern: "\\u{306B}",
+ ast: newSymbolNode('\u306B'),
+ },
+ {
+ pattern: "\\u{01F638}",
+ ast: newSymbolNode('\U0001F638'),
+ },
+ {
+ pattern: "\\u{0000}",
+ ast: newSymbolNode('\u0000'),
+ },
+ {
+ pattern: "\\u{10FFFF}",
+ ast: newSymbolNode('\U0010FFFF'),
+ },
+ {
+ pattern: "\\u{110000}",
+ syntaxError: synErrCPExpOutOfRange,
+ },
+ {
+ pattern: "\\u",
+ syntaxError: synErrCPExpInvalidForm,
+ },
+ {
+ pattern: "\\u{",
+ syntaxError: synErrCPExpInvalidForm,
+ },
+ {
+ pattern: "\\u{03BD",
+ syntaxError: synErrCPExpInvalidForm,
+ },
+ {
+ pattern: "\\u{}",
+ syntaxError: synErrCPExpInvalidForm,
+ },
+ {
+ pattern: "\\p{Letter}",
+ skipTestAST: true,
+ },
+ {
+ pattern: "\\p{General_Category=Letter}",
+ skipTestAST: true,
+ },
+ {
+ pattern: "\\p{ Letter }",
+ skipTestAST: true,
+ },
+ {
+ pattern: "\\p{ General_Category = Letter }",
+ skipTestAST: true,
+ },
+ {
+ pattern: "\\p",
+ syntaxError: synErrCharPropExpInvalidForm,
+ },
+ {
+ pattern: "\\p{",
+ syntaxError: synErrCharPropExpInvalidForm,
+ },
+ {
+ pattern: "\\p{Letter",
+ syntaxError: synErrCharPropExpInvalidForm,
+ },
+ {
+ pattern: "\\p{General_Category=}",
+ syntaxError: synErrCharPropExpInvalidForm,
+ },
+ {
+ pattern: "\\p{General_Category= }",
+ syntaxError: synErrCharPropInvalidSymbol,
+ },
+ {
+ pattern: "\\p{=Letter}",
+ syntaxError: synErrCharPropExpInvalidForm,
+ },
+ {
+ pattern: "\\p{ =Letter}",
+ syntaxError: synErrCharPropInvalidSymbol,
+ },
+ {
+ pattern: "\\p{=}",
+ syntaxError: synErrCharPropExpInvalidForm,
+ },
+ {
+ pattern: "\\p{}",
+ syntaxError: synErrCharPropExpInvalidForm,
+ },
+ {
+ pattern: "\\f{a2c}",
+ fragments: map[spec.LexKindName]string{
+ "a2c": "abc",
+ },
+ ast: newFragmentNode("a2c",
+ genConcatNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ ),
+ },
+ {
+ pattern: "\\f{ a2c }",
+ fragments: map[spec.LexKindName]string{
+ "a2c": "abc",
+ },
+ ast: newFragmentNode("a2c",
+ genConcatNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ ),
+ },
+ {
+ pattern: "\\f",
+ syntaxError: synErrFragmentExpInvalidForm,
+ },
+ {
+ pattern: "\\f{",
+ syntaxError: synErrFragmentExpInvalidForm,
+ },
+ {
+ pattern: "\\f{a2c",
+ fragments: map[spec.LexKindName]string{
+ "a2c": "abc",
+ },
+ syntaxError: synErrFragmentExpInvalidForm,
+ },
+ {
+ pattern: "(a)",
+ ast: newSymbolNode('a'),
+ },
+ {
+ pattern: "(((a)))",
+ ast: newSymbolNode('a'),
+ },
+ {
+ pattern: "a()",
+ syntaxError: synErrGroupNoElem,
+ },
+ {
+ pattern: "()a",
+ syntaxError: synErrGroupNoElem,
+ },
+ {
+ pattern: "()",
+ syntaxError: synErrGroupNoElem,
+ },
+ {
+ pattern: "(",
+ syntaxError: synErrGroupUnclosed,
+ },
+ {
+ pattern: "a(",
+ syntaxError: synErrGroupUnclosed,
+ },
+ {
+ pattern: "(a",
+ syntaxError: synErrGroupUnclosed,
+ },
+ {
+ pattern: "((",
+ syntaxError: synErrGroupUnclosed,
+ },
+ {
+ pattern: "((a)",
+ syntaxError: synErrGroupUnclosed,
+ },
+ {
+ pattern: ")",
+ syntaxError: synErrGroupNoInitiator,
+ },
+ {
+ pattern: "a)",
+ syntaxError: synErrGroupNoInitiator,
+ },
+ {
+ pattern: ")a",
+ syntaxError: synErrGroupNoInitiator,
+ },
+ {
+ pattern: "))",
+ syntaxError: synErrGroupNoInitiator,
+ },
+ {
+ pattern: "(a))",
+ syntaxError: synErrGroupNoInitiator,
+ },
+ {
+ pattern: "Mulder|Scully",
+ ast: genAltNode(
+ genConcatNode(
+ newSymbolNode('M'),
+ newSymbolNode('u'),
+ newSymbolNode('l'),
+ newSymbolNode('d'),
+ newSymbolNode('e'),
+ newSymbolNode('r'),
+ ),
+ genConcatNode(
+ newSymbolNode('S'),
+ newSymbolNode('c'),
+ newSymbolNode('u'),
+ newSymbolNode('l'),
+ newSymbolNode('l'),
+ newSymbolNode('y'),
+ ),
+ ),
+ },
+ {
+ pattern: "Langly|Frohike|Byers",
+ ast: genAltNode(
+ genConcatNode(
+ newSymbolNode('L'),
+ newSymbolNode('a'),
+ newSymbolNode('n'),
+ newSymbolNode('g'),
+ newSymbolNode('l'),
+ newSymbolNode('y'),
+ ),
+ genConcatNode(
+ newSymbolNode('F'),
+ newSymbolNode('r'),
+ newSymbolNode('o'),
+ newSymbolNode('h'),
+ newSymbolNode('i'),
+ newSymbolNode('k'),
+ newSymbolNode('e'),
+ ),
+ genConcatNode(
+ newSymbolNode('B'),
+ newSymbolNode('y'),
+ newSymbolNode('e'),
+ newSymbolNode('r'),
+ newSymbolNode('s'),
+ ),
+ ),
+ },
+ {
+ pattern: "|",
+ syntaxError: synErrAltLackOfOperand,
+ },
+ {
+ pattern: "||",
+ syntaxError: synErrAltLackOfOperand,
+ },
+ {
+ pattern: "Mulder|",
+ syntaxError: synErrAltLackOfOperand,
+ },
+ {
+ pattern: "|Scully",
+ syntaxError: synErrAltLackOfOperand,
+ },
+ {
+ pattern: "Langly|Frohike|",
+ syntaxError: synErrAltLackOfOperand,
+ },
+ {
+ pattern: "Langly||Byers",
+ syntaxError: synErrAltLackOfOperand,
+ },
+ {
+ pattern: "|Frohike|Byers",
+ syntaxError: synErrAltLackOfOperand,
+ },
+ {
+ pattern: "|Frohike|",
+ syntaxError: synErrAltLackOfOperand,
+ },
+ {
+ pattern: "Fox(|)Mulder",
+ syntaxError: synErrAltLackOfOperand,
+ },
+ {
+ pattern: "(Fox|)Mulder",
+ syntaxError: synErrAltLackOfOperand,
+ },
+ {
+ pattern: "Fox(|Mulder)",
+ syntaxError: synErrAltLackOfOperand,
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v %v", i, tt.pattern), func(t *testing.T) {
+ fragmentTrees := map[spec.LexKindName]CPTree{}
+ for kind, pattern := range tt.fragments {
+ p := NewParser(kind, strings.NewReader(pattern))
+ root, err := p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ fragmentTrees[kind] = root
+ }
+ err := CompleteFragments(fragmentTrees)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ p := NewParser(spec.LexKindName("test"), strings.NewReader(tt.pattern))
+ root, err := p.Parse()
+ if tt.syntaxError != nil {
+ // printCPTree(os.Stdout, root, "", "")
+ if err != ParseErr {
+ t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err)
+ }
+ _, synErr := p.Error()
+ if synErr != tt.syntaxError {
+ t.Fatalf("unexpected syntax error: want: %v, got: %v", tt.syntaxError, synErr)
+ }
+ if root != nil {
+ t.Fatalf("tree must be nil")
+ }
+ } else {
+ if err != nil {
+ detail, cause := p.Error()
+ t.Fatalf("%v: %v: %v", err, cause, detail)
+ }
+ if root == nil {
+ t.Fatal("tree must be non-nil")
+ }
+
+ complete, err := ApplyFragments(root, fragmentTrees)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if !complete {
+ t.Fatalf("incomplete fragments")
+ }
+
+ // printCPTree(os.Stdout, root, "", "")
+ if !tt.skipTestAST {
+ r := root.(*rootNode)
+ testAST(t, tt.ast, r.tree)
+ }
+ }
+ })
+ }
+}
+
+func TestParse_ContributoryPropertyIsNotExposed(t *testing.T) {
+ for _, cProp := range ucd.ContributoryProperties() {
+ t.Run(fmt.Sprintf("%v", cProp), func(t *testing.T) {
+ p := NewParser(spec.LexKindName("test"), strings.NewReader(fmt.Sprintf(`\p{%v=yes}`, cProp)))
+ root, err := p.Parse()
+ if err == nil {
+ t.Fatalf("expected syntax error: got: nil")
+ }
+ _, synErr := p.Error()
+ if synErr != synErrCharPropUnsupported {
+ t.Fatalf("unexpected syntax error: want: %v, got: %v", synErrCharPropUnsupported, synErr)
+ }
+ if root != nil {
+ t.Fatalf("tree is not nil")
+ }
+ })
+ }
+}
+
+func TestExclude(t *testing.T) {
+ for _, test := range []struct {
+ caption string
+ target CPTree
+ base CPTree
+ result CPTree
+ }{
+ // t.From > b.From && t.To < b.To
+
+ // |t.From - b.From| = 1
+ // |b.To - t.To| = 1
+ //
+ // Target (t): +--+
+ // Base (b): +--+--+--+
+ // Result (b - t): +--+ +--+
+ {
+ caption: "|t.From - b.From| = 1 && |b.To - t.To| = 1",
+ target: newSymbolNode('1'),
+ base: newRangeSymbolNode('0', '2'),
+ result: newAltNode(
+ newSymbolNode('0'),
+ newSymbolNode('2'),
+ ),
+ },
+ // |t.From - b.From| > 1
+ // |b.To - t.To| > 1
+ //
+ // Target (t): +--+
+ // Base (b): +--+--+--+--+--+
+ // Result (b - t): +--+--+ +--+--+
+ {
+ caption: "|t.From - b.From| > 1 && |b.To - t.To| > 1",
+ target: newSymbolNode('2'),
+ base: newRangeSymbolNode('0', '4'),
+ result: newAltNode(
+ newRangeSymbolNode('0', '1'),
+ newRangeSymbolNode('3', '4'),
+ ),
+ },
+
+ // t.From <= b.From && t.To >= b.From && t.To < b.To
+
+ // |b.From - t.From| = 0
+ // |t.To - b.From| = 0
+ // |b.To - t.To| = 1
+ //
+ // Target (t): +--+
+ // Base (b): +--+--+
+ // Result (b - t): +--+
+ {
+ caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1",
+ target: newSymbolNode('0'),
+ base: newRangeSymbolNode('0', '1'),
+ result: newSymbolNode('1'),
+ },
+ // |b.From - t.From| = 0
+ // |t.To - b.From| = 0
+ // |b.To - t.To| > 1
+ //
+ // Target (t): +--+
+ // Base (b): +--+--+--+
+ // Result (b - t): +--+--+
+ {
+ caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1",
+ target: newSymbolNode('0'),
+ base: newRangeSymbolNode('0', '2'),
+ result: newRangeSymbolNode('1', '2'),
+ },
+ // |b.From - t.From| = 0
+ // |t.To - b.From| > 0
+ // |b.To - t.To| = 1
+ //
+ // Target (t): +--+--+
+ // Base (b): +--+--+--+
+ // Result (b - t): +--+
+ {
+ caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1",
+ target: newRangeSymbolNode('0', '1'),
+ base: newRangeSymbolNode('0', '2'),
+ result: newSymbolNode('2'),
+ },
+ // |b.From - t.From| = 0
+ // |t.To - b.From| > 0
+ // |b.To - t.To| > 1
+ //
+ // Target (t): +--+--+
+ // Base (b): +--+--+--+--+
+ // Result (b - t): +--+--+
+ {
+ caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1",
+ target: newRangeSymbolNode('0', '1'),
+ base: newRangeSymbolNode('0', '3'),
+ result: newRangeSymbolNode('2', '3'),
+ },
+ // |b.From - t.From| > 0
+ // |t.To - b.From| = 0
+ // |b.To - t.To| = 1
+ //
+ // Target (t): +--+--+
+ // Base (b): +--+--+
+ // Result (b - t): +--+
+ {
+ caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1",
+ target: newRangeSymbolNode('0', '1'),
+ base: newRangeSymbolNode('1', '2'),
+ result: newSymbolNode('2'),
+ },
+ // |b.From - t.From| > 0
+ // |t.To - b.From| = 0
+ // |b.To - t.To| > 1
+ //
+ // Target (t): +--+--+
+ // Base (b): +--+--+--+
+ // Result (b - t): +--+--+
+ {
+ caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1",
+ target: newRangeSymbolNode('0', '1'),
+ base: newRangeSymbolNode('1', '3'),
+ result: newRangeSymbolNode('2', '3'),
+ },
+ // |b.From - t.From| > 0
+ // |t.To - b.From| > 0
+ // |b.To - t.To| = 1
+ //
+ // Target (t): +--+--+--+
+ // Base (b): +--+--+--+
+ // Result (b - t): +--+
+ {
+ caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1",
+ target: newRangeSymbolNode('0', '2'),
+ base: newRangeSymbolNode('1', '3'),
+ result: newSymbolNode('3'),
+ },
+ // |b.From - t.From| > 0
+ // |t.To - b.From| > 0
+ // |b.To - t.To| > 1
+ //
+ // Target (t): +--+--+--+
+ // Base (b): +--+--+--+--+
+ // Result (b - t): +--+--+
+ {
+ caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1",
+ target: newRangeSymbolNode('0', '2'),
+ base: newRangeSymbolNode('1', '4'),
+ result: newRangeSymbolNode('3', '4'),
+ },
+
+ // t.From > b.From && t.From <= b.To && t.To >= b.To
+
+ // |t.From - b.From| = 1
+ // |b.To - t.From| = 0
+ // |t.To - b.To| = 0
+ //
+ // Target (t): +--+
+ // Base (b): +--+--+
+ // Result (b - t): +--+
+ {
+ caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0",
+ target: newSymbolNode('1'),
+ base: newRangeSymbolNode('0', '1'),
+ result: newSymbolNode('0'),
+ },
+ // |t.From - b.From| = 1
+ // |b.To - t.From| = 0
+ // |t.To - b.To| > 0
+ //
+ // Target (t): +--+--+
+ // Base (b): +--+--+
+ // Result (b - t): +--+
+ {
+ caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0",
+ target: newRangeSymbolNode('1', '2'),
+ base: newRangeSymbolNode('0', '1'),
+ result: newSymbolNode('0'),
+ },
+ // |t.From - b.From| = 1
+ // |b.To - t.From| > 0
+ // |t.To - b.To| = 0
+ //
+ // Target (t): +--+--+
+ // Base (b): +--+--+--+
+ // Result (b - t): +--+
+ {
+ caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0",
+ target: newRangeSymbolNode('1', '2'),
+ base: newRangeSymbolNode('0', '2'),
+ result: newSymbolNode('0'),
+ },
+ // |t.From - b.From| = 1
+ // |b.To - t.From| > 0
+ // |t.To - b.To| > 0
+ //
+ // Target (t): +--+--+--+
+ // Base (b): +--+--+--+
+ // Result (b - t): +--+
+ {
+ caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0",
+ target: newRangeSymbolNode('1', '3'),
+ base: newRangeSymbolNode('0', '2'),
+ result: newSymbolNode('0'),
+ },
+ // |t.From - b.From| > 1
+ // |b.To - t.From| = 0
+ // |t.To - b.To| = 0
+ //
+ // Target (t): +--+
+ // Base (b): +--+--+--+
+ // Result (b - t): +--+--+
+ {
+ caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0",
+ target: newSymbolNode('2'),
+ base: newRangeSymbolNode('0', '2'),
+ result: newRangeSymbolNode('0', '1'),
+ },
+ // |t.From - b.From| > 1
+ // |b.To - t.From| = 0
+ // |t.To - b.To| > 0
+ //
+ // Target (t): +--+--+
+ // Base (b): +--+--+--+
+ // Result (b - t): +--+--+
+ {
+ caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0",
+ target: newRangeSymbolNode('2', '3'),
+ base: newRangeSymbolNode('0', '2'),
+ result: newRangeSymbolNode('0', '1'),
+ },
+ // |t.From - b.From| > 1
+ // |b.To - t.From| > 0
+ // |t.To - b.To| = 0
+ //
+ // Target (t): +--+--+
+ // Base (b): +--+--+--+--+
+ // Result (b - t): +--+--+
+ {
+ caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0",
+ target: newRangeSymbolNode('2', '3'),
+ base: newRangeSymbolNode('0', '3'),
+ result: newRangeSymbolNode('0', '1'),
+ },
+ // |t.From - b.From| > 1
+ // |b.To - t.From| > 0
+ // |t.To - b.To| > 0
+ //
+ // Target (t): +--+--+--+
+ // Base (b): +--+--+--+--+
+ // Result (b - t): +--+--+
+ {
+ caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0",
+ target: newRangeSymbolNode('2', '4'),
+ base: newRangeSymbolNode('0', '3'),
+ result: newRangeSymbolNode('0', '1'),
+ },
+
+ // t.From <= b.From && t.To >= b.To
+
+ // |b.From - t.From| = 0
+ // |t.To - b.To| = 0
+ //
+ // Target (t): +--+
+ // Base (b): +--+
+ // Result (b - t): N/A
+ {
+ caption: "|b.From - t.From| = 0 && |t.To - b.To| = 0",
+ target: newSymbolNode('0'),
+ base: newSymbolNode('0'),
+ result: nil,
+ },
+ // |b.From - t.From| = 0
+ // |t.To - b.To| > 0
+ //
+ // Target (t): +--+--+
+ // Base (b): +--+
+ // Result (b - t): N/A
+ {
+ caption: "|b.From - t.From| = 0 && |t.To - b.To| > 0",
+ target: newRangeSymbolNode('0', '1'),
+ base: newSymbolNode('0'),
+ result: nil,
+ },
+ // |b.From - t.From| > 0
+ // |t.To - b.To| = 0
+ //
+ // Target (t): +--+--+
+ // Base (b): +--+
+ // Result (b - t): N/A
+ {
+ caption: "|b.From - t.From| > 0 && |t.To - b.To| = 0",
+ target: newRangeSymbolNode('0', '1'),
+ base: newSymbolNode('1'),
+ result: nil,
+ },
+ // |b.From - t.From| > 0
+ // |t.To - b.To| > 0
+ //
+ // Target (t): +--+--+--+
+ // Base (b): +--+
+ // Result (b - t): N/A
+ {
+ caption: "|b.From - t.From| > 0 && |t.To - b.To| > 0",
+ target: newRangeSymbolNode('0', '2'),
+ base: newSymbolNode('1'),
+ result: nil,
+ },
+
+ // Others
+
+ // |b.From - t.From| = 1
+ //
+ // Target (t): +--+
+ // Base (b): +--+
+ // Result (b - t): +--+
+ {
+ caption: "|b.From - t.From| = 1",
+ target: newSymbolNode('0'),
+ base: newSymbolNode('1'),
+ result: newSymbolNode('1'),
+ },
+ // |b.From - t.From| > 1
+ //
+ // Target (t): +--+
+ // Base (b): +--+
+ // Result (b - t): +--+
+ {
+ caption: "|b.From - t.From| > 1",
+ target: newSymbolNode('0'),
+ base: newSymbolNode('2'),
+ result: newSymbolNode('2'),
+ },
+ // |t.To - b.To| = 1
+ //
+ // Target (t): +--+
+ // Base (b): +--+
+ // Result (b - t): +--+
+ {
+ caption: "|t.To - b.To| = 1",
+ target: newSymbolNode('1'),
+ base: newSymbolNode('0'),
+ result: newSymbolNode('0'),
+ },
+ // |t.To - b.To| > 1
+ //
+ // Target (t): +--+
+ // Base (b): +--+
+ // Result (b - t): +--+
+ {
+ caption: "|t.To - b.To| > 1",
+ target: newSymbolNode('2'),
+ base: newSymbolNode('0'),
+ result: newSymbolNode('0'),
+ },
+ } {
+ t.Run(test.caption, func(t *testing.T) {
+ r := exclude(test.target, test.base)
+ testAST(t, test.result, r)
+ })
+ }
+}
+
+func testAST(t *testing.T, expected, actual CPTree) {
+ t.Helper()
+
+ aTy := reflect.TypeOf(actual)
+ eTy := reflect.TypeOf(expected)
+ if eTy != aTy {
+ t.Fatalf("unexpected node: want: %+v, got: %+v", eTy, aTy)
+ }
+
+ if actual == nil {
+ return
+ }
+
+ switch e := expected.(type) {
+ case *symbolNode:
+ a := actual.(*symbolNode)
+ if a.From != e.From || a.To != e.To {
+ t.Fatalf("unexpected node: want: %+v, got: %+v", e, a)
+ }
+ }
+ eLeft, eRight := expected.children()
+ aLeft, aRight := actual.children()
+ testAST(t, eLeft, aLeft)
+ testAST(t, eRight, aRight)
+}
diff --git a/tests/unit/grammar/lr0_test.go b/tests/unit/grammar/lr0_test.go
new file mode 100644
index 0000000..0a9ec24
--- /dev/null
+++ b/tests/unit/grammar/lr0_test.go
@@ -0,0 +1,448 @@
+package grammar
+
+import (
+ "fmt"
+ "strings"
+ "testing"
+
+ "urubu/grammar/symbol"
+ "urubu/spec/grammar/parser"
+)
+
+type expectedLRState struct {
+ kernelItems []*lrItem
+ nextStates map[symbol.Symbol][]*lrItem
+ reducibleProds []*production
+ emptyProdItems []*lrItem
+}
+
+func TestGenLR0Automaton(t *testing.T) {
+ src := `
+#name test;
+
+expr
+ : expr add term
+ | term
+ ;
+term
+ : term mul factor
+ | factor
+ ;
+factor
+ : l_paren expr r_paren
+ | id
+ ;
+add: "\+";
+mul: "\*";
+l_paren: "\(";
+r_paren: "\)";
+id: "[A-Za-z_][0-9A-Za-z_]*";
+`
+
+ var gram *Grammar
+ var automaton *lr0Automaton
+ {
+ ast, err := parser.Parse(strings.NewReader(src))
+ if err != nil {
+ t.Fatal(err)
+ }
+ b := GrammarBuilder{
+ AST: ast,
+ }
+ gram, err = b.build()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
+ if err != nil {
+ t.Fatalf("failed to create a LR0 automaton: %v", err)
+ }
+ if automaton == nil {
+ t.Fatalf("genLR0Automaton returns nil without any error")
+ }
+ }
+
+ initialState := automaton.states[automaton.initialState]
+ if initialState == nil {
+ t.Errorf("failed to get an initial status: %v", automaton.initialState)
+ }
+
+ genSym := newTestSymbolGenerator(t, gram.symbolTable)
+ genProd := newTestProductionGenerator(t, genSym)
+ genLR0Item := newTestLR0ItemGenerator(t, genProd)
+
+ expectedKernels := map[int][]*lrItem{
+ 0: {
+ genLR0Item("expr'", 0, "expr"),
+ },
+ 1: {
+ genLR0Item("expr'", 1, "expr"),
+ genLR0Item("expr", 1, "expr", "add", "term"),
+ },
+ 2: {
+ genLR0Item("expr", 1, "term"),
+ genLR0Item("term", 1, "term", "mul", "factor"),
+ },
+ 3: {
+ genLR0Item("term", 1, "factor"),
+ },
+ 4: {
+ genLR0Item("factor", 1, "l_paren", "expr", "r_paren"),
+ },
+ 5: {
+ genLR0Item("factor", 1, "id"),
+ },
+ 6: {
+ genLR0Item("expr", 2, "expr", "add", "term"),
+ },
+ 7: {
+ genLR0Item("term", 2, "term", "mul", "factor"),
+ },
+ 8: {
+ genLR0Item("expr", 1, "expr", "add", "term"),
+ genLR0Item("factor", 2, "l_paren", "expr", "r_paren"),
+ },
+ 9: {
+ genLR0Item("expr", 3, "expr", "add", "term"),
+ genLR0Item("term", 1, "term", "mul", "factor"),
+ },
+ 10: {
+ genLR0Item("term", 3, "term", "mul", "factor"),
+ },
+ 11: {
+ genLR0Item("factor", 3, "l_paren", "expr", "r_paren"),
+ },
+ }
+
+ expectedStates := []*expectedLRState{
+ {
+ kernelItems: expectedKernels[0],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("expr"): expectedKernels[1],
+ genSym("term"): expectedKernels[2],
+ genSym("factor"): expectedKernels[3],
+ genSym("l_paren"): expectedKernels[4],
+ genSym("id"): expectedKernels[5],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[1],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("add"): expectedKernels[6],
+ },
+ reducibleProds: []*production{
+ genProd("expr'", "expr"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[2],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("mul"): expectedKernels[7],
+ },
+ reducibleProds: []*production{
+ genProd("expr", "term"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[3],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("term", "factor"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[4],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("expr"): expectedKernels[8],
+ genSym("term"): expectedKernels[2],
+ genSym("factor"): expectedKernels[3],
+ genSym("l_paren"): expectedKernels[4],
+ genSym("id"): expectedKernels[5],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[5],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("factor", "id"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[6],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("term"): expectedKernels[9],
+ genSym("factor"): expectedKernels[3],
+ genSym("l_paren"): expectedKernels[4],
+ genSym("id"): expectedKernels[5],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[7],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("factor"): expectedKernels[10],
+ genSym("l_paren"): expectedKernels[4],
+ genSym("id"): expectedKernels[5],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[8],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("add"): expectedKernels[6],
+ genSym("r_paren"): expectedKernels[11],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[9],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("mul"): expectedKernels[7],
+ },
+ reducibleProds: []*production{
+ genProd("expr", "expr", "add", "term"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[10],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("term", "term", "mul", "factor"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[11],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("factor", "l_paren", "expr", "r_paren"),
+ },
+ },
+ }
+
+ testLRAutomaton(t, expectedStates, automaton)
+}
+
+func TestLR0AutomatonContainingEmptyProduction(t *testing.T) {
+ src := `
+#name test;
+
+s
+ : foo bar
+ ;
+foo
+ :
+ ;
+bar
+ : b
+ |
+ ;
+
+b: "bar";
+`
+
+ var gram *Grammar
+ var automaton *lr0Automaton
+ {
+ ast, err := parser.Parse(strings.NewReader(src))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ b := GrammarBuilder{
+ AST: ast,
+ }
+ gram, err = b.build()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
+ if err != nil {
+ t.Fatalf("failed to create a LR0 automaton: %v", err)
+ }
+ if automaton == nil {
+ t.Fatalf("genLR0Automaton returns nil without any error")
+ }
+ }
+
+ initialState := automaton.states[automaton.initialState]
+ if initialState == nil {
+ t.Errorf("failed to get an initial status: %v", automaton.initialState)
+ }
+
+ genSym := newTestSymbolGenerator(t, gram.symbolTable)
+ genProd := newTestProductionGenerator(t, genSym)
+ genLR0Item := newTestLR0ItemGenerator(t, genProd)
+
+ expectedKernels := map[int][]*lrItem{
+ 0: {
+ genLR0Item("s'", 0, "s"),
+ },
+ 1: {
+ genLR0Item("s'", 1, "s"),
+ },
+ 2: {
+ genLR0Item("s", 1, "foo", "bar"),
+ },
+ 3: {
+ genLR0Item("s", 2, "foo", "bar"),
+ },
+ 4: {
+ genLR0Item("bar", 1, "b"),
+ },
+ }
+
+ expectedStates := []*expectedLRState{
+ {
+ kernelItems: expectedKernels[0],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("s"): expectedKernels[1],
+ genSym("foo"): expectedKernels[2],
+ },
+ reducibleProds: []*production{
+ genProd("foo"),
+ },
+ emptyProdItems: []*lrItem{
+ genLR0Item("foo", 0),
+ },
+ },
+ {
+ kernelItems: expectedKernels[1],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("s'", "s"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[2],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("bar"): expectedKernels[3],
+ genSym("b"): expectedKernels[4],
+ },
+ reducibleProds: []*production{
+ genProd("bar"),
+ },
+ emptyProdItems: []*lrItem{
+ genLR0Item("bar", 0),
+ },
+ },
+ {
+ kernelItems: expectedKernels[3],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("s", "foo", "bar"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[4],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("bar", "b"),
+ },
+ },
+ }
+
+ testLRAutomaton(t, expectedStates, automaton)
+}
+
+func testLRAutomaton(t *testing.T, expected []*expectedLRState, automaton *lr0Automaton) {
+ if len(automaton.states) != len(expected) {
+ t.Errorf("state count is mismatched; want: %v, got: %v", len(expected), len(automaton.states))
+ }
+
+ for i, eState := range expected {
+ t.Run(fmt.Sprintf("state #%v", i), func(t *testing.T) {
+ k, err := newKernel(eState.kernelItems)
+ if err != nil {
+ t.Fatalf("failed to create a kernel item: %v", err)
+ }
+
+ state, ok := automaton.states[k.id]
+ if !ok {
+ t.Fatalf("a kernel was not found: %v", k.id)
+ }
+
+ // test look-ahead symbols
+ {
+ if len(state.kernel.items) != len(eState.kernelItems) {
+ t.Errorf("kernels is mismatched; want: %v, got: %v", len(eState.kernelItems), len(state.kernel.items))
+ }
+ for _, eKItem := range eState.kernelItems {
+ var kItem *lrItem
+ for _, it := range state.kernel.items {
+ if it.id != eKItem.id {
+ continue
+ }
+ kItem = it
+ break
+ }
+ if kItem == nil {
+ t.Fatalf("kernel item not found; want: %v, got: %v", eKItem.id, kItem.id)
+ }
+
+ if len(kItem.lookAhead.symbols) != len(eKItem.lookAhead.symbols) {
+ t.Errorf("look-ahead symbols are mismatched; want: %v symbols, got: %v symbols", len(eKItem.lookAhead.symbols), len(kItem.lookAhead.symbols))
+ }
+
+ for eSym := range eKItem.lookAhead.symbols {
+ if _, ok := kItem.lookAhead.symbols[eSym]; !ok {
+ t.Errorf("look-ahead symbol not found: %v", eSym)
+ }
+ }
+ }
+ }
+
+ // test next states
+ {
+ if len(state.next) != len(eState.nextStates) {
+ t.Errorf("next state count is mismcthed; want: %v, got: %v", len(eState.nextStates), len(state.next))
+ }
+ for eSym, eKItems := range eState.nextStates {
+ nextStateKernel, err := newKernel(eKItems)
+ if err != nil {
+ t.Fatalf("failed to create a kernel item: %v", err)
+ }
+ nextState, ok := state.next[eSym]
+ if !ok {
+ t.Fatalf("next state was not found; state: %v, symbol: %v (%v)", state.id, "expr", eSym)
+ }
+ if nextState != nextStateKernel.id {
+ t.Fatalf("a kernel ID of the next state is mismatched; want: %v, got: %v", nextStateKernel.id, nextState)
+ }
+ }
+ }
+
+ // test reducible productions
+ {
+ if len(state.reducible) != len(eState.reducibleProds) {
+ t.Errorf("reducible production count is mismatched; want: %v, got: %v", len(eState.reducibleProds), len(state.reducible))
+ }
+ for _, eProd := range eState.reducibleProds {
+ if _, ok := state.reducible[eProd.id]; !ok {
+ t.Errorf("reducible production was not found: %v", eProd.id)
+ }
+ }
+
+ if len(state.emptyProdItems) != len(eState.emptyProdItems) {
+ t.Errorf("empty production item is mismatched; want: %v, got: %v", len(eState.emptyProdItems), len(state.emptyProdItems))
+ }
+ for _, eItem := range eState.emptyProdItems {
+ found := false
+ for _, item := range state.emptyProdItems {
+ if item.id != eItem.id {
+ continue
+ }
+ found = true
+ break
+ }
+ if !found {
+ t.Errorf("empty production item not found: %v", eItem.id)
+ }
+ }
+ }
+ })
+ }
+}
diff --git a/tests/unit/grammar/parsing_table_test.go b/tests/unit/grammar/parsing_table_test.go
new file mode 100644
index 0000000..342e187
--- /dev/null
+++ b/tests/unit/grammar/parsing_table_test.go
@@ -0,0 +1,387 @@
+package grammar
+
+import (
+ "fmt"
+ "strings"
+ "testing"
+
+ "urubu/grammar/symbol"
+ "urubu/spec/grammar/parser"
+)
+
+type expectedState struct {
+ kernelItems []*lrItem
+ acts map[symbol.Symbol]testActionEntry
+ goTos map[symbol.Symbol][]*lrItem
+}
+
+func TestGenLALRParsingTable(t *testing.T) {
+ src := `
+#name test;
+
+s: l eq r | r;
+l: ref r | id;
+r: l;
+eq: '=';
+ref: '*';
+id: "[A-Za-z0-9_]+";
+`
+
+ var ptab *ParsingTable
+ var automaton *lalr1Automaton
+ var gram *Grammar
+ var nonTermCount int
+ var termCount int
+ {
+ ast, err := parser.Parse(strings.NewReader(src))
+ if err != nil {
+ t.Fatal(err)
+ }
+ b := GrammarBuilder{
+ AST: ast,
+ }
+ gram, err = b.build()
+ if err != nil {
+ t.Fatal(err)
+ }
+ first, err := genFirstSet(gram.productionSet)
+ if err != nil {
+ t.Fatal(err)
+ }
+ lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
+ if err != nil {
+ t.Fatal(err)
+ }
+ automaton, err = genLALR1Automaton(lr0, gram.productionSet, first)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ nonTermTexts, err := gram.symbolTable.NonTerminalTexts()
+ if err != nil {
+ t.Fatal(err)
+ }
+ termTexts, err := gram.symbolTable.TerminalTexts()
+ if err != nil {
+ t.Fatal(err)
+ }
+ nonTermCount = len(nonTermTexts)
+ termCount = len(termTexts)
+
+ lalr := &lrTableBuilder{
+ automaton: automaton.lr0Automaton,
+ prods: gram.productionSet,
+ termCount: termCount,
+ nonTermCount: nonTermCount,
+ symTab: gram.symbolTable,
+ }
+ ptab, err = lalr.build()
+ if err != nil {
+ t.Fatalf("failed to create a LALR parsing table: %v", err)
+ }
+ if ptab == nil {
+ t.Fatal("genLALRParsingTable returns nil without any error")
+ }
+ }
+
+ genSym := newTestSymbolGenerator(t, gram.symbolTable)
+ genProd := newTestProductionGenerator(t, genSym)
+ genLR0Item := newTestLR0ItemGenerator(t, genProd)
+
+ expectedKernels := map[int][]*lrItem{
+ 0: {
+ withLookAhead(genLR0Item("s'", 0, "s"), symbol.SymbolEOF),
+ },
+ 1: {
+ withLookAhead(genLR0Item("s'", 1, "s"), symbol.SymbolEOF),
+ },
+ 2: {
+ withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbol.SymbolEOF),
+ withLookAhead(genLR0Item("r", 1, "l"), symbol.SymbolEOF),
+ },
+ 3: {
+ withLookAhead(genLR0Item("s", 1, "r"), symbol.SymbolEOF),
+ },
+ 4: {
+ withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbol.SymbolEOF),
+ },
+ 5: {
+ withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbol.SymbolEOF),
+ },
+ 6: {
+ withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbol.SymbolEOF),
+ },
+ 7: {
+ withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbol.SymbolEOF),
+ },
+ 8: {
+ withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbol.SymbolEOF),
+ },
+ 9: {
+ withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbol.SymbolEOF),
+ },
+ }
+
+ expectedStates := []expectedState{
+ {
+ kernelItems: expectedKernels[0],
+ acts: map[symbol.Symbol]testActionEntry{
+ genSym("ref"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[4],
+ },
+ genSym("id"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[5],
+ },
+ },
+ goTos: map[symbol.Symbol][]*lrItem{
+ genSym("s"): expectedKernels[1],
+ genSym("l"): expectedKernels[2],
+ genSym("r"): expectedKernels[3],
+ },
+ },
+ {
+ kernelItems: expectedKernels[1],
+ acts: map[symbol.Symbol]testActionEntry{
+ symbol.SymbolEOF: {
+ ty: ActionTypeReduce,
+ production: genProd("s'", "s"),
+ },
+ },
+ },
+ {
+ kernelItems: expectedKernels[2],
+ acts: map[symbol.Symbol]testActionEntry{
+ genSym("eq"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[6],
+ },
+ symbol.SymbolEOF: {
+ ty: ActionTypeReduce,
+ production: genProd("r", "l"),
+ },
+ },
+ },
+ {
+ kernelItems: expectedKernels[3],
+ acts: map[symbol.Symbol]testActionEntry{
+ symbol.SymbolEOF: {
+ ty: ActionTypeReduce,
+ production: genProd("s", "r"),
+ },
+ },
+ },
+ {
+ kernelItems: expectedKernels[4],
+ acts: map[symbol.Symbol]testActionEntry{
+ genSym("ref"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[4],
+ },
+ genSym("id"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[5],
+ },
+ },
+ goTos: map[symbol.Symbol][]*lrItem{
+ genSym("r"): expectedKernels[7],
+ genSym("l"): expectedKernels[8],
+ },
+ },
+ {
+ kernelItems: expectedKernels[5],
+ acts: map[symbol.Symbol]testActionEntry{
+ genSym("eq"): {
+ ty: ActionTypeReduce,
+ production: genProd("l", "id"),
+ },
+ symbol.SymbolEOF: {
+ ty: ActionTypeReduce,
+ production: genProd("l", "id"),
+ },
+ },
+ },
+ {
+ kernelItems: expectedKernels[6],
+ acts: map[symbol.Symbol]testActionEntry{
+ genSym("ref"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[4],
+ },
+ genSym("id"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[5],
+ },
+ },
+ goTos: map[symbol.Symbol][]*lrItem{
+ genSym("l"): expectedKernels[8],
+ genSym("r"): expectedKernels[9],
+ },
+ },
+ {
+ kernelItems: expectedKernels[7],
+ acts: map[symbol.Symbol]testActionEntry{
+ genSym("eq"): {
+ ty: ActionTypeReduce,
+ production: genProd("l", "ref", "r"),
+ },
+ symbol.SymbolEOF: {
+ ty: ActionTypeReduce,
+ production: genProd("l", "ref", "r"),
+ },
+ },
+ },
+ {
+ kernelItems: expectedKernels[8],
+ acts: map[symbol.Symbol]testActionEntry{
+ genSym("eq"): {
+ ty: ActionTypeReduce,
+ production: genProd("r", "l"),
+ },
+ symbol.SymbolEOF: {
+ ty: ActionTypeReduce,
+ production: genProd("r", "l"),
+ },
+ },
+ },
+ {
+ kernelItems: expectedKernels[9],
+ acts: map[symbol.Symbol]testActionEntry{
+ symbol.SymbolEOF: {
+ ty: ActionTypeReduce,
+ production: genProd("s", "l", "eq", "r"),
+ },
+ },
+ },
+ }
+
+ t.Run("initial state", func(t *testing.T) {
+ iniState := findStateByNum(automaton.states, ptab.InitialState)
+ if iniState == nil {
+ t.Fatalf("the initial state was not found: #%v", ptab.InitialState)
+ }
+ eIniState, err := newKernel(expectedKernels[0])
+ if err != nil {
+ t.Fatalf("failed to create a kernel item: %v", err)
+ }
+ if iniState.id != eIniState.id {
+ t.Fatalf("the initial state is mismatched; want: %v, got: %v", eIniState.id, iniState.id)
+ }
+ })
+
+ for i, eState := range expectedStates {
+ t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+ k, err := newKernel(eState.kernelItems)
+ if err != nil {
+ t.Fatalf("failed to create a kernel item: %v", err)
+ }
+ state, ok := automaton.states[k.id]
+ if !ok {
+ t.Fatalf("state was not found: #%v", 0)
+ }
+
+ testAction(t, &eState, state, ptab, automaton.lr0Automaton, gram, termCount)
+ testGoTo(t, &eState, state, ptab, automaton.lr0Automaton, nonTermCount)
+ })
+ }
+}
+
+func testAction(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, gram *Grammar, termCount int) {
+ nonEmptyEntries := map[symbol.SymbolNum]struct{}{}
+ for eSym, eAct := range expectedState.acts {
+ nonEmptyEntries[eSym.Num()] = struct{}{}
+
+ ty, stateNum, prodNum := ptab.getAction(state.num, eSym.Num())
+ if ty != eAct.ty {
+ t.Fatalf("action type is mismatched; want: %v, got: %v", eAct.ty, ty)
+ }
+ switch eAct.ty {
+ case ActionTypeShift:
+ eNextState, err := newKernel(eAct.nextState)
+ if err != nil {
+ t.Fatal(err)
+ }
+ nextState := findStateByNum(automaton.states, stateNum)
+ if nextState == nil {
+ t.Fatalf("state was not found; state: #%v", stateNum)
+ }
+ if nextState.id != eNextState.id {
+ t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id)
+ }
+ case ActionTypeReduce:
+ prod := findProductionByNum(gram.productionSet, prodNum)
+ if prod == nil {
+ t.Fatalf("production was not found: #%v", prodNum)
+ }
+ if prod.id != eAct.production.id {
+ t.Fatalf("production is mismatched; symbol: %v, want: %v, got: %v", eSym, eAct.production.id, prod.id)
+ }
+ }
+ }
+ for symNum := 0; symNum < termCount; symNum++ {
+ if _, checked := nonEmptyEntries[symbol.SymbolNum(symNum)]; checked {
+ continue
+ }
+ ty, stateNum, prodNum := ptab.getAction(state.num, symbol.SymbolNum(symNum))
+ if ty != ActionTypeError {
+ t.Errorf("unexpected ACTION entry; state: #%v, symbol: #%v, action type: %v, next state: #%v, prodction: #%v", state.num, symNum, ty, stateNum, prodNum)
+ }
+ }
+}
+
+func testGoTo(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, nonTermCount int) {
+ nonEmptyEntries := map[symbol.SymbolNum]struct{}{}
+ for eSym, eGoTo := range expectedState.goTos {
+ nonEmptyEntries[eSym.Num()] = struct{}{}
+
+ eNextState, err := newKernel(eGoTo)
+ if err != nil {
+ t.Fatal(err)
+ }
+ ty, stateNum := ptab.getGoTo(state.num, eSym.Num())
+ if ty != GoToTypeRegistered {
+ t.Fatalf("GOTO entry was not found; state: #%v, symbol: #%v", state.num, eSym)
+ }
+ nextState := findStateByNum(automaton.states, stateNum)
+ if nextState == nil {
+ t.Fatalf("state was not found: #%v", stateNum)
+ }
+ if nextState.id != eNextState.id {
+ t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id)
+ }
+ }
+ for symNum := 0; symNum < nonTermCount; symNum++ {
+ if _, checked := nonEmptyEntries[symbol.SymbolNum(symNum)]; checked {
+ continue
+ }
+ ty, _ := ptab.getGoTo(state.num, symbol.SymbolNum(symNum))
+ if ty != GoToTypeError {
+ t.Errorf("unexpected GOTO entry; state: #%v, symbol: #%v", state.num, symNum)
+ }
+ }
+}
+
+type testActionEntry struct {
+ ty ActionType
+ nextState []*lrItem
+ production *production
+}
+
+func findStateByNum(states map[kernelID]*lrState, num stateNum) *lrState {
+ for _, state := range states {
+ if state.num == num {
+ return state
+ }
+ }
+ return nil
+}
+
+func findProductionByNum(prods *productionSet, num productionNum) *production {
+ for _, prod := range prods.getAllProductions() {
+ if prod.num == num {
+ return prod
+ }
+ }
+ return nil
+}
diff --git a/tests/unit/grammar/symbol/symbol_test.go b/tests/unit/grammar/symbol/symbol_test.go
new file mode 100644
index 0000000..31c3edd
--- /dev/null
+++ b/tests/unit/grammar/symbol/symbol_test.go
@@ -0,0 +1,159 @@
+package symbol
+
+import "testing"
+
+func TestSymbol(t *testing.T) {
+ tab := NewSymbolTable()
+ w := tab.Writer()
+ _, _ = w.RegisterStartSymbol("expr'")
+ _, _ = w.RegisterNonTerminalSymbol("expr")
+ _, _ = w.RegisterNonTerminalSymbol("term")
+ _, _ = w.RegisterNonTerminalSymbol("factor")
+ _, _ = w.RegisterTerminalSymbol("id")
+ _, _ = w.RegisterTerminalSymbol("add")
+ _, _ = w.RegisterTerminalSymbol("mul")
+ _, _ = w.RegisterTerminalSymbol("l_paren")
+ _, _ = w.RegisterTerminalSymbol("r_paren")
+
+ nonTermTexts := []string{
+ "", // Nil
+ "expr'",
+ "expr",
+ "term",
+ "factor",
+ }
+
+ termTexts := []string{
+ "", // Nil
+ symbolNameEOF, // EOF
+ "id",
+ "add",
+ "mul",
+ "l_paren",
+ "r_paren",
+ }
+
+ tests := []struct {
+ text string
+ isNil bool
+ isStart bool
+ isEOF bool
+ isNonTerminal bool
+ isTerminal bool
+ }{
+ {
+ text: "expr'",
+ isStart: true,
+ isNonTerminal: true,
+ },
+ {
+ text: "expr",
+ isNonTerminal: true,
+ },
+ {
+ text: "term",
+ isNonTerminal: true,
+ },
+ {
+ text: "factor",
+ isNonTerminal: true,
+ },
+ {
+ text: "id",
+ isTerminal: true,
+ },
+ {
+ text: "add",
+ isTerminal: true,
+ },
+ {
+ text: "mul",
+ isTerminal: true,
+ },
+ {
+ text: "l_paren",
+ isTerminal: true,
+ },
+ {
+ text: "r_paren",
+ isTerminal: true,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.text, func(t *testing.T) {
+ r := tab.Reader()
+ sym, ok := r.ToSymbol(tt.text)
+ if !ok {
+ t.Fatalf("symbol was not found")
+ }
+ testSymbolProperty(t, sym, tt.isNil, tt.isStart, tt.isEOF, tt.isNonTerminal, tt.isTerminal)
+ text, ok := r.ToText(sym)
+ if !ok {
+ t.Fatalf("text was not found")
+ }
+ if text != tt.text {
+ t.Fatalf("unexpected text representation; want: %v, got: %v", tt.text, text)
+ }
+ })
+ }
+
+ t.Run("EOF", func(t *testing.T) {
+ testSymbolProperty(t, SymbolEOF, false, false, true, false, true)
+ })
+
+ t.Run("Nil", func(t *testing.T) {
+ testSymbolProperty(t, SymbolNil, true, false, false, false, false)
+ })
+
+ t.Run("texts of non-terminals", func(t *testing.T) {
+ r := tab.Reader()
+ ts, err := r.NonTerminalTexts()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if len(ts) != len(nonTermTexts) {
+ t.Fatalf("unexpected non-terminal count; want: %v (%#v), got: %v (%#v)", len(nonTermTexts), nonTermTexts, len(ts), ts)
+ }
+ for i, text := range ts {
+ if text != nonTermTexts[i] {
+ t.Fatalf("unexpected non-terminal; want: %v, got: %v", nonTermTexts[i], text)
+ }
+ }
+ })
+
+ t.Run("texts of terminals", func(t *testing.T) {
+ r := tab.Reader()
+ ts, err := r.TerminalTexts()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if len(ts) != len(termTexts) {
+ t.Fatalf("unexpected terminal count; want: %v (%#v), got: %v (%#v)", len(termTexts), termTexts, len(ts), ts)
+ }
+ for i, text := range ts {
+ if text != termTexts[i] {
+ t.Fatalf("unexpected terminal; want: %v, got: %v", termTexts[i], text)
+ }
+ }
+ })
+}
+
+func testSymbolProperty(t *testing.T, sym Symbol, isNil, isStart, isEOF, isNonTerminal, isTerminal bool) {
+ t.Helper()
+
+ if v := sym.IsNil(); v != isNil {
+ t.Fatalf("isNil property is mismatched; want: %v, got: %v", isNil, v)
+ }
+ if v := sym.IsStart(); v != isStart {
+ t.Fatalf("isStart property is mismatched; want: %v, got: %v", isStart, v)
+ }
+ if v := sym.isEOF(); v != isEOF {
+ t.Fatalf("isEOF property is mismatched; want: %v, got: %v", isEOF, v)
+ }
+ if v := sym.isNonTerminal(); v != isNonTerminal {
+ t.Fatalf("isNonTerminal property is mismatched; want: %v, got: %v", isNonTerminal, v)
+ }
+ if v := sym.IsTerminal(); v != isTerminal {
+ t.Fatalf("isTerminal property is mismatched; want: %v, got: %v", isTerminal, v)
+ }
+}
diff --git a/tests/unit/grammar/test_helper_test.go b/tests/unit/grammar/test_helper_test.go
new file mode 100644
index 0000000..546d2c1
--- /dev/null
+++ b/tests/unit/grammar/test_helper_test.go
@@ -0,0 +1,68 @@
+package grammar
+
+import (
+ "testing"
+
+ "urubu/grammar/symbol"
+)
+
+type testSymbolGenerator func(text string) symbol.Symbol
+
+func newTestSymbolGenerator(t *testing.T, symTab *symbol.SymbolTableReader) testSymbolGenerator {
+ return func(text string) symbol.Symbol {
+ t.Helper()
+
+ sym, ok := symTab.ToSymbol(text)
+ if !ok {
+ t.Fatalf("symbol was not found: %v", text)
+ }
+ return sym
+ }
+}
+
+type testProductionGenerator func(lhs string, rhs ...string) *production
+
+func newTestProductionGenerator(t *testing.T, genSym testSymbolGenerator) testProductionGenerator {
+ return func(lhs string, rhs ...string) *production {
+ t.Helper()
+
+ rhsSym := []symbol.Symbol{}
+ for _, text := range rhs {
+ rhsSym = append(rhsSym, genSym(text))
+ }
+ prod, err := newProduction(genSym(lhs), rhsSym)
+ if err != nil {
+ t.Fatalf("failed to create a production: %v", err)
+ }
+
+ return prod
+ }
+}
+
+type testLR0ItemGenerator func(lhs string, dot int, rhs ...string) *lrItem
+
+func newTestLR0ItemGenerator(t *testing.T, genProd testProductionGenerator) testLR0ItemGenerator {
+ return func(lhs string, dot int, rhs ...string) *lrItem {
+ t.Helper()
+
+ prod := genProd(lhs, rhs...)
+ item, err := newLR0Item(prod, dot)
+ if err != nil {
+ t.Fatalf("failed to create a LR0 item: %v", err)
+ }
+
+ return item
+ }
+}
+
+func withLookAhead(item *lrItem, lookAhead ...symbol.Symbol) *lrItem {
+ if item.lookAhead.symbols == nil {
+ item.lookAhead.symbols = map[symbol.Symbol]struct{}{}
+ }
+
+ for _, a := range lookAhead {
+ item.lookAhead.symbols[a] = struct{}{}
+ }
+
+ return item
+}
diff --git a/tests/unit/spec/grammar/parser/lexer_test.go b/tests/unit/spec/grammar/parser/lexer_test.go
new file mode 100644
index 0000000..c402b42
--- /dev/null
+++ b/tests/unit/spec/grammar/parser/lexer_test.go
@@ -0,0 +1,236 @@
+package parser
+
+import (
+ "strings"
+ "testing"
+
+ verr "urubu/error"
+)
+
+func TestLexer_Run(t *testing.T) {
+ idTok := func(text string) *token {
+ return newIDToken(text, newPosition(1, 0))
+ }
+
+ termPatTok := func(text string) *token {
+ return newTerminalPatternToken(text, newPosition(1, 0))
+ }
+
+ strTok := func(text string) *token {
+ return newStringLiteralToken(text, newPosition(1, 0))
+ }
+
+ symTok := func(kind tokenKind) *token {
+ return newSymbolToken(kind, newPosition(1, 0))
+ }
+
+ invalidTok := func(text string) *token {
+ return newInvalidToken(text, newPosition(1, 0))
+ }
+
+ tests := []struct {
+ caption string
+ src string
+ tokens []*token
+ err error
+ }{
+ {
+ caption: "the lexer can recognize all kinds of tokens",
+ src: `id"terminal"'string':|;@...#$()`,
+ tokens: []*token{
+ idTok("id"),
+ termPatTok("terminal"),
+ strTok(`string`),
+ symTok(tokenKindColon),
+ symTok(tokenKindOr),
+ symTok(tokenKindSemicolon),
+ symTok(tokenKindLabelMarker),
+ symTok(tokenKindExpantion),
+ symTok(tokenKindDirectiveMarker),
+ symTok(tokenKindOrderedSymbolMarker),
+ symTok(tokenKindLParen),
+ symTok(tokenKindRParen),
+ newEOFToken(),
+ },
+ },
+ {
+ caption: "the lexer can recognize keywords",
+ src: `fragment`,
+ tokens: []*token{
+ symTok(tokenKindKWFragment),
+ newEOFToken(),
+ },
+ },
+ {
+ caption: "the lexer can recognize character sequences and escape sequences in a terminal",
+ src: `"abc\"\\"`,
+ tokens: []*token{
+ termPatTok(`abc"\\`),
+ newEOFToken(),
+ },
+ },
+ {
+ caption: "backslashes are recognized as they are because escape sequences are not allowed in strings",
+ src: `'\\\'`,
+ tokens: []*token{
+ strTok(`\\\`),
+ newEOFToken(),
+ },
+ },
+ {
+ caption: "a pattern must include at least one character",
+ src: `""`,
+ err: synErrEmptyPattern,
+ },
+ {
+ caption: "a string must include at least one character",
+ src: `''`,
+ err: synErrEmptyString,
+ },
+ {
+ caption: "the lexer can recognize newlines and combine consecutive newlines into one",
+ src: "\u000A | \u000D | \u000D\u000A | \u000A\u000A \u000D\u000D \u000D\u000A\u000D\u000A",
+ tokens: []*token{
+ symTok(tokenKindNewline),
+ symTok(tokenKindOr),
+ symTok(tokenKindNewline),
+ symTok(tokenKindOr),
+ symTok(tokenKindNewline),
+ symTok(tokenKindOr),
+ symTok(tokenKindNewline),
+ newEOFToken(),
+ },
+ },
+ {
+ caption: "the lexer ignores line comments",
+ src: `
+// This is the first comment.
+foo
+// This is the second comment.
+// This is the third comment.
+bar // This is the fourth comment.
+`,
+ tokens: []*token{
+ symTok(tokenKindNewline),
+ idTok("foo"),
+ symTok(tokenKindNewline),
+ idTok("bar"),
+ symTok(tokenKindNewline),
+ newEOFToken(),
+ },
+ },
+ {
+ caption: "an identifier cannot contain the capital-case letters",
+ src: `Abc`,
+ err: synErrIDInvalidChar,
+ },
+ {
+ caption: "an identifier cannot contain the capital-case letters",
+ src: `Zyx`,
+ err: synErrIDInvalidChar,
+ },
+ {
+ caption: "the underscore cannot be placed at the beginning of an identifier",
+ src: `_abc`,
+ err: synErrIDInvalidUnderscorePos,
+ },
+ {
+ caption: "the underscore cannot be placed at the end of an identifier",
+ src: `abc_`,
+ err: synErrIDInvalidUnderscorePos,
+ },
+ {
+ caption: "the underscore cannot be placed consecutively",
+ src: `a__b`,
+ err: synErrIDConsecutiveUnderscores,
+ },
+ {
+ caption: "the digits cannot be placed at the biginning of an identifier",
+ src: `0abc`,
+ err: synErrIDInvalidDigitsPos,
+ },
+ {
+ caption: "the digits cannot be placed at the biginning of an identifier",
+ src: `9abc`,
+ err: synErrIDInvalidDigitsPos,
+ },
+ {
+ caption: "an unclosed terminal is not a valid token",
+ src: `"abc`,
+ err: synErrUnclosedTerminal,
+ },
+ {
+ caption: "an incompleted escape sequence in a pattern is not a valid token",
+ src: `"\`,
+ err: synErrIncompletedEscSeq,
+ },
+ {
+ caption: "an unclosed string is not a valid token",
+ src: `'abc`,
+ err: synErrUnclosedString,
+ },
+ {
+ caption: "the lexer can recognize valid tokens following an invalid token",
+ src: `abc!!!def`,
+ tokens: []*token{
+ idTok("abc"),
+ invalidTok("!!!"),
+ idTok("def"),
+ newEOFToken(),
+ },
+ },
+ {
+ caption: "the lexer skips white spaces",
+ // \u0009: HT
+ // \u0020: SP
+ src: "a\u0009b\u0020c",
+ tokens: []*token{
+ idTok("a"),
+ idTok("b"),
+ idTok("c"),
+ newEOFToken(),
+ },
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.caption, func(t *testing.T) {
+ l, err := newLexer(strings.NewReader(tt.src))
+ if err != nil {
+ t.Fatal(err)
+ }
+ n := 0
+ for {
+ var tok *token
+ tok, err = l.next()
+ if err != nil {
+ break
+ }
+ testToken(t, tok, tt.tokens[n])
+ n++
+ if tok.kind == tokenKindEOF {
+ break
+ }
+ }
+ if tt.err != nil {
+ synErr, ok := err.(*verr.SpecError)
+ if !ok {
+ t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err)
+ }
+ if tt.err != synErr.Cause {
+ t.Fatalf("unexpected error; want: %v, got: %v", tt.err, synErr.Cause)
+ }
+ } else {
+ if err != nil {
+ t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err)
+ }
+ }
+ })
+ }
+}
+
+func testToken(t *testing.T, tok, expected *token) {
+ t.Helper()
+ if tok.kind != expected.kind || tok.text != expected.text {
+ t.Fatalf("unexpected token; want: %+v, got: %+v", expected, tok)
+ }
+}
diff --git a/tests/unit/spec/grammar/parser/parser_test.go b/tests/unit/spec/grammar/parser/parser_test.go
new file mode 100644
index 0000000..4161f6b
--- /dev/null
+++ b/tests/unit/spec/grammar/parser/parser_test.go
@@ -0,0 +1,1211 @@
+package parser
+
+import (
+ "strings"
+ "testing"
+
+ verr "urubu/error"
+)
+
+func TestParse(t *testing.T) {
+ name := func(param *ParameterNode) *DirectiveNode {
+ return &DirectiveNode{
+ Name: "name",
+ Parameters: []*ParameterNode{param},
+ }
+ }
+ prec := func(param *ParameterNode) *DirectiveNode {
+ return &DirectiveNode{
+ Name: "prec",
+ Parameters: []*ParameterNode{param},
+ }
+ }
+ leftAssoc := func(params ...*ParameterNode) *DirectiveNode {
+ return &DirectiveNode{
+ Name: "left",
+ Parameters: params,
+ }
+ }
+ rightAssoc := func(params ...*ParameterNode) *DirectiveNode {
+ return &DirectiveNode{
+ Name: "right",
+ Parameters: params,
+ }
+ }
+ assign := func(params ...*ParameterNode) *DirectiveNode {
+ return &DirectiveNode{
+ Name: "assign",
+ Parameters: params,
+ }
+ }
+ prod := func(lhs string, alts ...*AlternativeNode) *ProductionNode {
+ return &ProductionNode{
+ LHS: lhs,
+ RHS: alts,
+ }
+ }
+ withProdPos := func(prod *ProductionNode, pos Position) *ProductionNode {
+ prod.Pos = pos
+ return prod
+ }
+ withProdDir := func(prod *ProductionNode, dirs ...*DirectiveNode) *ProductionNode {
+ prod.Directives = dirs
+ return prod
+ }
+ alt := func(elems ...*ElementNode) *AlternativeNode {
+ return &AlternativeNode{
+ Elements: elems,
+ }
+ }
+ withAltPos := func(alt *AlternativeNode, pos Position) *AlternativeNode {
+ alt.Pos = pos
+ return alt
+ }
+ withAltDir := func(alt *AlternativeNode, dirs ...*DirectiveNode) *AlternativeNode {
+ alt.Directives = dirs
+ return alt
+ }
+ dir := func(name string, params ...*ParameterNode) *DirectiveNode {
+ return &DirectiveNode{
+ Name: name,
+ Parameters: params,
+ }
+ }
+ withDirPos := func(dir *DirectiveNode, pos Position) *DirectiveNode {
+ dir.Pos = pos
+ return dir
+ }
+ idParam := func(id string) *ParameterNode {
+ return &ParameterNode{
+ ID: id,
+ }
+ }
+ ordSymParam := func(id string) *ParameterNode {
+ return &ParameterNode{
+ OrderedSymbol: id,
+ }
+ }
+ exp := func(param *ParameterNode) *ParameterNode {
+ param.Expansion = true
+ return param
+ }
+ group := func(dirs ...*DirectiveNode) *ParameterNode {
+ return &ParameterNode{
+ Group: dirs,
+ }
+ }
+ withParamPos := func(param *ParameterNode, pos Position) *ParameterNode {
+ param.Pos = pos
+ return param
+ }
+ id := func(id string) *ElementNode {
+ return &ElementNode{
+ ID: id,
+ }
+ }
+ pat := func(p string) *ElementNode {
+ return &ElementNode{
+ Pattern: p,
+ }
+ }
+ label := func(name string) *LabelNode {
+ return &LabelNode{
+ Name: name,
+ }
+ }
+ withLabelPos := func(label *LabelNode, pos Position) *LabelNode {
+ label.Pos = pos
+ return label
+ }
+ withLabel := func(elem *ElementNode, label *LabelNode) *ElementNode {
+ elem.Label = label
+ return elem
+ }
+ withElemPos := func(elem *ElementNode, pos Position) *ElementNode {
+ elem.Pos = pos
+ return elem
+ }
+ frag := func(lhs string, rhs string) *FragmentNode {
+ return &FragmentNode{
+ LHS: lhs,
+ RHS: rhs,
+ }
+ }
+ withFragmentPos := func(frag *FragmentNode, pos Position) *FragmentNode {
+ frag.Pos = pos
+ return frag
+ }
+ newPos := func(row int) Position {
+ return Position{
+ Row: row,
+ Col: 0,
+ }
+ }
+
+ tests := []struct {
+ caption string
+ src string
+ checkPosition bool
+ ast *RootNode
+ synErr *SyntaxError
+ }{
+ {
+ caption: "a grammar can contain top-level directives",
+ src: `
+#name test;
+
+#prec (
+ #left a b $x1
+ #right c d $x2
+ #assign e f $x3
+);
+`,
+ ast: &RootNode{
+ Directives: []*DirectiveNode{
+ withDirPos(
+ name(
+ withParamPos(
+ idParam("test"),
+ newPos(2),
+ ),
+ ),
+ newPos(2),
+ ),
+ withDirPos(
+ prec(
+ withParamPos(
+ group(
+ withDirPos(
+ leftAssoc(
+ withParamPos(
+ idParam("a"),
+ newPos(5),
+ ),
+ withParamPos(
+ idParam("b"),
+ newPos(5),
+ ),
+ withParamPos(
+ ordSymParam("x1"),
+ newPos(5),
+ ),
+ ),
+ newPos(5),
+ ),
+ withDirPos(
+ rightAssoc(
+ withParamPos(
+ idParam("c"),
+ newPos(6),
+ ),
+ withParamPos(
+ idParam("d"),
+ newPos(6),
+ ),
+ withParamPos(
+ ordSymParam("x2"),
+ newPos(6),
+ ),
+ ),
+ newPos(6),
+ ),
+ withDirPos(
+ assign(
+ withParamPos(
+ idParam("e"),
+ newPos(7),
+ ),
+ withParamPos(
+ idParam("f"),
+ newPos(7),
+ ),
+ withParamPos(
+ ordSymParam("x3"),
+ newPos(7),
+ ),
+ ),
+ newPos(7),
+ ),
+ ),
+ newPos(4),
+ ),
+ ),
+ newPos(4),
+ ),
+ },
+ },
+ },
+ {
+ caption: "a top-level directive must be followed by ';'",
+ src: `
+#name test
+`,
+ synErr: synErrTopLevelDirNoSemicolon,
+ },
+ {
+ caption: "a directive group must be closed by ')'",
+ src: `
+#prec (
+ #left a b
+;
+`,
+ synErr: synErrUnclosedDirGroup,
+ },
+ {
+ caption: "an ordered symbol marker '$' must be followed by and ID",
+ src: `
+#prec (
+ #assign $
+);
+`,
+ synErr: synErrNoOrderedSymbolName,
+ },
+ {
+ caption: "single production is a valid grammar",
+ src: `a: "a";`,
+ ast: &RootNode{
+ LexProductions: []*ProductionNode{
+ prod("a", alt(pat("a"))),
+ },
+ },
+ },
+ {
+ caption: "multiple productions are a valid grammar",
+ src: `
+e
+ : e add t
+ | e sub t
+ | t
+ ;
+t
+ : t mul f
+ | t div f
+ | f
+ ;
+f
+ : l_paren e r_paren
+ | id
+ ;
+
+add
+ : '+';
+sub
+ : '-';
+mul
+ : '*';
+div
+ : '/';
+l_paren
+ : '(';
+r_paren
+ : ')';
+id
+ : "[A-Za-z_][0-9A-Za-z_]*";
+`,
+ ast: &RootNode{
+ Productions: []*ProductionNode{
+ prod("e",
+ alt(id("e"), id("add"), id("t")),
+ alt(id("e"), id("sub"), id("t")),
+ alt(id("t")),
+ ),
+ prod("t",
+ alt(id("t"), id("mul"), id("f")),
+ alt(id("t"), id("div"), id("f")),
+ alt(id("f")),
+ ),
+ prod("f",
+ alt(id("l_paren"), id("e"), id("r_paren")),
+ alt(id("id")),
+ ),
+ },
+ LexProductions: []*ProductionNode{
+ prod("add", alt(pat(`+`))),
+ prod("sub", alt(pat(`-`))),
+ prod("mul", alt(pat(`*`))),
+ prod("div", alt(pat(`/`))),
+ prod("l_paren", alt(pat(`(`))),
+ prod("r_paren", alt(pat(`)`))),
+ prod("id", alt(pat(`[A-Za-z_][0-9A-Za-z_]*`))),
+ },
+ },
+ },
+ {
+ caption: "productions can contain the empty alternative",
+ src: `
+a
+ : foo
+ |
+ ;
+b
+ :
+ | bar
+ ;
+c
+ :
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+`,
+ ast: &RootNode{
+ Productions: []*ProductionNode{
+ prod("a",
+ alt(id("foo")),
+ alt(),
+ ),
+ prod("b",
+ alt(),
+ alt(id("bar")),
+ ),
+ prod("c",
+ alt(),
+ ),
+ },
+ LexProductions: []*ProductionNode{
+ prod("foo", alt(pat(`foo`))),
+ prod("bar", alt(pat(`bar`))),
+ },
+ },
+ },
+ {
+ caption: "a production cannot contain an ordered symbol",
+ src: `
+a: $x;
+`,
+ synErr: synErrNoSemicolon,
+ },
+ {
+ caption: "an alternative cannot contain a pattern directly",
+ src: `
+s
+ : "foo" bar
+ ;
+
+bar
+ : "bar";
+`,
+ synErr: synErrPatternInAlt,
+ },
+ {
+ caption: "an alternative cannot contain a string directly",
+ src: `
+s
+ : 'foo' bar
+ ;
+bar
+ : "bar";
+`,
+ synErr: synErrPatternInAlt,
+ },
+ {
+ caption: "a terminal symbol can be defined using a string literal",
+ src: `
+foo
+ : 'foo';
+`,
+ ast: &RootNode{
+ LexProductions: []*ProductionNode{
+ prod("foo",
+ alt(pat(`foo`)),
+ ),
+ },
+ },
+ },
+ {
+ caption: "a terminal symbol can be defined using a pattern",
+ src: `
+foo
+ : "foo";
+`,
+ ast: &RootNode{
+ LexProductions: []*ProductionNode{
+ prod("foo",
+ alt(pat(`foo`)),
+ ),
+ },
+ },
+ },
+ {
+ caption: "`fragment` is a reserved word",
+ src: `fragment: 'fragment';`,
+ synErr: synErrNoProductionName,
+ },
+ {
+ caption: "when a source contains an unknown token, the parser raises a syntax error",
+ src: `a: !;`,
+ synErr: synErrInvalidToken,
+ },
+ {
+ caption: "a production must have its name as the first element",
+ src: `: "a";`,
+ synErr: synErrNoProductionName,
+ },
+ {
+ caption: "':' must precede an alternative",
+ src: `a "a";`,
+ synErr: synErrNoColon,
+ },
+ {
+ caption: "';' must follow a production",
+ src: `a: "a"`,
+ synErr: synErrNoSemicolon,
+ },
+ {
+ caption: "';' can only appear at the end of a production",
+ src: `;`,
+ synErr: synErrNoProductionName,
+ },
+ {
+ caption: "a grammar can contain fragments",
+ src: `
+s
+ : tagline
+ ;
+tagline: "\f{words} IS OUT THERE.";
+fragment words: "[A-Za-z\u{0020}]+";
+`,
+ ast: &RootNode{
+ Productions: []*ProductionNode{
+ prod("s",
+ alt(id("tagline")),
+ ),
+ },
+ LexProductions: []*ProductionNode{
+ prod("tagline",
+ alt(pat(`\f{words} IS OUT THERE.`)),
+ ),
+ },
+ Fragments: []*FragmentNode{
+ frag("words", `[A-Za-z\u{0020}]+`),
+ },
+ },
+ },
+ {
+ caption: "the lexer treats consecutive lines as a single token but can count lines correctly",
+ src: `// This line precedes line comments and blank lines.
+// This is a line comment.
+
+
+s
+ : foo
+ ;
+
+
+// This line is sandwiched between blank lines.
+
+
+foo: 'foo';
+`,
+ checkPosition: true,
+ ast: &RootNode{
+ Productions: []*ProductionNode{
+ withProdPos(
+ prod("s",
+ withAltPos(
+ alt(
+ withElemPos(
+ id("foo"),
+ newPos(6),
+ ),
+ ),
+ newPos(6),
+ ),
+ ),
+ newPos(5),
+ ),
+ },
+ LexProductions: []*ProductionNode{
+ withProdPos(
+ prod("foo",
+ withAltPos(
+ alt(
+ withElemPos(
+ pat(`foo`),
+ newPos(13),
+ ),
+ ),
+ newPos(13),
+ ),
+ ),
+ newPos(13),
+ ),
+ },
+ },
+ },
+ {
+ caption: "a grammar can contain production directives and alternative directives",
+ src: `
+mode_tran_seq
+ : mode_tran_seq mode_tran
+ | mode_tran
+ ;
+mode_tran
+ : push_m1
+ | push_m2
+ | pop_m1
+ | pop_m2
+ ;
+
+push_m1 #push m1
+ : "->";
+push_m2 #mode m1 #push m2
+ : "-->";
+pop_m1 #mode m1 #pop
+ : "<-";
+pop_m2 #mode m2 #pop
+ : "<--";
+whitespace #mode default m1 m2 #skip
+ : "\u{0020}+";
+`,
+ ast: &RootNode{
+ Productions: []*ProductionNode{
+ prod("mode_tran_seq",
+ alt(id("mode_tran_seq"), id("mode_tran")),
+ alt(id("mode_tran")),
+ ),
+ prod("mode_tran",
+ alt(id("push_m1")),
+ alt(id("push_m2")),
+ alt(id("pop_m1")),
+ alt(id("pop_m2")),
+ ),
+ },
+ LexProductions: []*ProductionNode{
+ withProdDir(
+ prod("push_m1",
+ alt(pat(`->`)),
+ ),
+ dir("push", idParam("m1")),
+ ),
+ withProdDir(
+ prod("push_m2",
+ alt(pat(`-->`)),
+ ),
+ dir("mode", idParam("m1")),
+ dir("push", idParam("m2")),
+ ),
+ withProdDir(
+ prod("pop_m1",
+ alt(pat(`<-`)),
+ ),
+ dir("mode", idParam("m1")),
+ dir("pop"),
+ ),
+ withProdDir(
+ prod("pop_m2",
+ alt(pat(`<--`)),
+ ),
+ dir("mode", idParam("m2")),
+ dir("pop"),
+ ),
+ withProdDir(
+ prod("whitespace",
+ alt(pat(`\u{0020}+`)),
+ ),
+ dir("mode", idParam("default"), idParam("m1"), idParam("m2")),
+ dir("skip"),
+ ),
+ },
+ },
+ },
+ {
+ caption: "an alternative of a production can have multiple alternative directives",
+ src: `
+s
+ : foo bar #prec baz #ast foo bar
+ ;
+`,
+ ast: &RootNode{
+ Productions: []*ProductionNode{
+ prod("s",
+ withAltDir(
+ alt(id("foo"), id("bar")),
+ dir("prec", idParam("baz")),
+ dir("ast", idParam("foo"), idParam("bar")),
+ ),
+ ),
+ },
+ },
+ },
+ {
+ caption: "a lexical production can have multiple production directives",
+ src: `
+foo #mode a #push b
+ : 'foo';
+`,
+ ast: &RootNode{
+ LexProductions: []*ProductionNode{
+ withProdDir(
+ prod("foo",
+ alt(pat("foo")),
+ ),
+ dir("mode", idParam("a")),
+ dir("push", idParam("b")),
+ ),
+ },
+ },
+ },
+ {
+ caption: "a production must be followed by a newline",
+ src: `
+s: foo; foo: "foo";
+`,
+ synErr: synErrSemicolonNoNewline,
+ },
+ {
+ caption: "a grammar can contain 'ast' directives and expansion operator",
+ src: `
+s
+ : foo bar_list #ast foo bar_list
+ ;
+bar_list
+ : bar_list bar #ast bar_list... bar
+ | bar #ast bar
+ ;
+foo: "foo";
+bar: "bar";
+`,
+ ast: &RootNode{
+ Productions: []*ProductionNode{
+ prod("s",
+ withAltDir(
+ alt(id("foo"), id("bar_list")),
+ dir("ast", idParam("foo"), idParam("bar_list")),
+ ),
+ ),
+ prod("bar_list",
+ withAltDir(
+ alt(id("bar_list"), id("bar")),
+ dir("ast", exp(idParam("bar_list")), idParam("bar")),
+ ),
+ withAltDir(
+ alt(id("bar")),
+ dir("ast", idParam("bar")),
+ ),
+ ),
+ },
+ LexProductions: []*ProductionNode{
+ prod("foo",
+ alt(pat("foo")),
+ ),
+ prod("bar",
+ alt(pat("bar")),
+ ),
+ },
+ },
+ },
+ {
+ caption: "an expansion operator must be preceded by an identifier",
+ src: `
+s
+ : foo #ast ...
+ ;
+`,
+ synErr: synErrStrayExpOp,
+ },
+ {
+ caption: "an expansion operator must be preceded by an identifier",
+ src: `
+a
+ : foo #ast ... foo
+ ;
+`,
+ synErr: synErrStrayExpOp,
+ },
+ {
+ caption: "an expansion operator cannot be applied to a pattern",
+ src: `
+a
+ : "foo" #ast "foo"...
+ ;
+`,
+ synErr: synErrInvalidExpOperand,
+ },
+ {
+ caption: "an expansion operator cannot be applied to a string",
+ src: `
+a
+ : 'foo' #ast 'foo'...
+ ;
+`,
+ synErr: synErrInvalidExpOperand,
+ },
+ {
+ caption: "an expansion operator cannot be applied to an ordered symbol",
+ src: `
+a
+ : foo #ast $foo...
+ ;
+`,
+ synErr: synErrInvalidExpOperand,
+ },
+ {
+ caption: "an expansion operator cannot be applied to a directive group",
+ src: `
+a
+ : foo #ast ()...
+ ;
+`,
+ synErr: synErrInvalidExpOperand,
+ },
+ {
+ caption: "an AST has node positions",
+ src: `
+exp
+ : exp add id #ast exp id
+ | id
+ ;
+
+whitespace #skip
+ : "\u{0020}+";
+add
+ : '+';
+id
+ : "\f{letter}(\f{letter}|\f{number})*";
+fragment letter
+ : "[A-Za-z_]";
+fragment number
+ : "[0-9]";
+`,
+ checkPosition: true,
+ ast: &RootNode{
+ Productions: []*ProductionNode{
+ withProdPos(
+ prod("exp",
+ withAltPos(
+ withAltDir(
+ alt(
+ withElemPos(id("exp"), newPos(3)),
+ withElemPos(id("add"), newPos(3)),
+ withElemPos(id("id"), newPos(3)),
+ ),
+ withDirPos(
+ dir("ast",
+ withParamPos(idParam("exp"), newPos(3)),
+ withParamPos(idParam("id"), newPos(3)),
+ ),
+ newPos(3),
+ ),
+ ),
+ newPos(3),
+ ),
+ withAltPos(
+ alt(
+ withElemPos(id("id"), newPos(4)),
+ ),
+ newPos(4),
+ ),
+ ),
+ newPos(2),
+ ),
+ },
+ LexProductions: []*ProductionNode{
+ withProdPos(
+ withProdDir(
+ prod("whitespace",
+ withAltPos(
+ alt(
+ withElemPos(
+ pat(`\u{0020}+`),
+ newPos(8),
+ ),
+ ),
+ newPos(8),
+ ),
+ ),
+ withDirPos(
+ dir("skip"),
+ newPos(7),
+ ),
+ ),
+ newPos(7),
+ ),
+ withProdPos(
+ prod("add",
+ withAltPos(
+ alt(
+ withElemPos(
+ pat(`+`),
+ newPos(10),
+ ),
+ ),
+ newPos(10),
+ ),
+ ),
+ newPos(9),
+ ),
+ withProdPos(
+ prod("id",
+ withAltPos(
+ alt(
+ withElemPos(
+ pat(`\f{letter}(\f{letter}|\f{number})*`),
+ newPos(12),
+ ),
+ ),
+ newPos(12),
+ ),
+ ),
+ newPos(11),
+ ),
+ },
+ Fragments: []*FragmentNode{
+ withFragmentPos(
+ frag("letter", "[A-Za-z_]"),
+ newPos(13),
+ ),
+ withFragmentPos(
+ frag("number", "[0-9]"),
+ newPos(15),
+ ),
+ },
+ },
+ },
+ {
+ caption: "a symbol can have a label",
+ src: `
+expr
+ : term@lhs add term@rhs
+ ;
+`,
+ ast: &RootNode{
+ Productions: []*ProductionNode{
+ withProdPos(
+ prod("expr",
+ withAltPos(
+ alt(
+ withElemPos(
+ withLabel(
+ id("term"),
+ withLabelPos(
+ label("lhs"),
+ newPos(3),
+ ),
+ ),
+ newPos(3),
+ ),
+ withElemPos(
+ id("add"),
+ newPos(3),
+ ),
+ withElemPos(
+ withLabel(
+ id("term"),
+ withLabelPos(
+ label("rhs"),
+ newPos(3),
+ ),
+ ),
+ newPos(3),
+ ),
+ ),
+ newPos(3),
+ ),
+ ),
+ newPos(2),
+ ),
+ },
+ },
+ },
+ {
+ caption: "a label must be an identifier, not a string",
+ src: `
+foo
+ : bar@'baz'
+ ;
+`,
+ synErr: synErrNoLabel,
+ },
+ {
+ caption: "a label must be an identifier, not a pattern",
+ src: `
+foo
+ : bar@"baz"
+ ;
+`,
+ synErr: synErrNoLabel,
+ },
+ {
+ caption: "the symbol marker @ must be followed by an identifier",
+ src: `
+foo
+ : bar@
+ ;
+`,
+ synErr: synErrNoLabel,
+ },
+ {
+ caption: "a symbol cannot have more than or equal to two labels",
+ src: `
+foo
+ : bar@baz@bra
+ ;
+`,
+ synErr: synErrLabelWithNoSymbol,
+ },
+ {
+ caption: "a label must follow a symbol",
+ src: `
+foo
+ : @baz
+ ;
+`,
+ synErr: synErrLabelWithNoSymbol,
+ },
+ {
+ caption: "a grammar can contain left and right associativities",
+ src: `
+#prec (
+ #left l1 l2
+ #left l3
+ #right r1 r2
+ #right r3
+);
+
+s
+ : id l1 id l2 id l3 id
+ | id r1 id r2 id r3 id
+ ;
+
+whitespaces #skip
+ : "[\u{0009}\u{0020}]+";
+l1
+ : 'l1';
+l2
+ : 'l2';
+l3
+ : 'l3';
+r1
+ : 'r1';
+r2
+ : 'r2';
+r3
+ : 'r3';
+id
+ : "[A-Za-z0-9_]+";
+`,
+ ast: &RootNode{
+ Directives: []*DirectiveNode{
+ withDirPos(
+ prec(
+ withParamPos(
+ group(
+ withDirPos(
+ leftAssoc(
+ withParamPos(idParam("l1"), newPos(3)),
+ withParamPos(idParam("l2"), newPos(3)),
+ ),
+ newPos(3),
+ ),
+ withDirPos(
+ leftAssoc(
+ withParamPos(idParam("l3"), newPos(4)),
+ ),
+ newPos(4),
+ ),
+ withDirPos(
+ rightAssoc(
+ withParamPos(idParam("r1"), newPos(5)),
+ withParamPos(idParam("r2"), newPos(5)),
+ ),
+ newPos(5),
+ ),
+ withDirPos(
+ rightAssoc(
+ withParamPos(idParam("r3"), newPos(6)),
+ ),
+ newPos(6),
+ ),
+ ),
+ newPos(2),
+ ),
+ ),
+ newPos(2),
+ ),
+ },
+ Productions: []*ProductionNode{
+ prod("s",
+ alt(id(`id`), id(`l1`), id(`id`), id(`l2`), id(`id`), id(`l3`), id(`id`)),
+ alt(id(`id`), id(`r1`), id(`id`), id(`r2`), id(`id`), id(`r3`), id(`id`)),
+ ),
+ },
+ LexProductions: []*ProductionNode{
+ withProdDir(
+ prod("whitespaces",
+ alt(pat(`[\u{0009}\u{0020}]+`)),
+ ),
+ dir("skip"),
+ ),
+ prod("l1", alt(pat(`l1`))),
+ prod("l2", alt(pat(`l2`))),
+ prod("l3", alt(pat(`l3`))),
+ prod("r1", alt(pat(`r1`))),
+ prod("r2", alt(pat(`r2`))),
+ prod("r3", alt(pat(`r3`))),
+ prod("id", alt(pat(`[A-Za-z0-9_]+`))),
+ },
+ },
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.caption, func(t *testing.T) {
+ ast, err := Parse(strings.NewReader(tt.src))
+ if tt.synErr != nil {
+ synErrs, ok := err.(verr.SpecErrors)
+ if !ok {
+ t.Fatalf("unexpected error; want: %v, got: %v", tt.synErr, err)
+ }
+ synErr := synErrs[0]
+ if tt.synErr != synErr.Cause {
+ t.Fatalf("unexpected error; want: %v, got: %v", tt.synErr, synErr.Cause)
+ }
+ if ast != nil {
+ t.Fatalf("AST must be nil")
+ }
+ } else {
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if ast == nil {
+ t.Fatalf("AST must be non-nil")
+ }
+ testRootNode(t, ast, tt.ast, tt.checkPosition)
+ }
+ })
+ }
+}
+
+func testRootNode(t *testing.T, root, expected *RootNode, checkPosition bool) {
+ t.Helper()
+ if len(root.Productions) != len(expected.Productions) {
+ t.Fatalf("unexpected length of productions; want: %v, got: %v", len(expected.Productions), len(root.Productions))
+ }
+ if len(root.Directives) != len(expected.Directives) {
+ t.Fatalf("unexpected length of top-level directives; want: %v, got: %v", len(expected.Directives), len(root.Directives))
+ }
+ for i, dir := range root.Directives {
+ testDirectives(t, []*DirectiveNode{dir}, []*DirectiveNode{expected.Directives[i]}, true)
+ }
+ for i, prod := range root.Productions {
+ testProductionNode(t, prod, expected.Productions[i], checkPosition)
+ }
+ for i, prod := range root.LexProductions {
+ testProductionNode(t, prod, expected.LexProductions[i], checkPosition)
+ }
+ for i, frag := range root.Fragments {
+ testFragmentNode(t, frag, expected.Fragments[i], checkPosition)
+ }
+}
+
+func testProductionNode(t *testing.T, prod, expected *ProductionNode, checkPosition bool) {
+ t.Helper()
+ if len(expected.Directives) != len(prod.Directives) {
+ t.Fatalf("unexpected directive count; want: %v directives, got: %v directives", len(expected.Directives), len(prod.Directives))
+ }
+ if len(expected.Directives) > 0 {
+ testDirectives(t, prod.Directives, expected.Directives, checkPosition)
+ }
+ if prod.LHS != expected.LHS {
+ t.Fatalf("unexpected LHS; want: %v, got: %v", expected.LHS, prod.LHS)
+ }
+ if len(prod.RHS) != len(expected.RHS) {
+ t.Fatalf("unexpected length of an RHS; want: %v, got: %v", len(expected.RHS), len(prod.RHS))
+ }
+ for i, alt := range prod.RHS {
+ testAlternativeNode(t, alt, expected.RHS[i], checkPosition)
+ }
+ if checkPosition {
+ testPosition(t, prod.Pos, expected.Pos)
+ }
+}
+
+func testFragmentNode(t *testing.T, frag, expected *FragmentNode, checkPosition bool) {
+ t.Helper()
+ if frag.LHS != expected.LHS {
+ t.Fatalf("unexpected LHS; want: %v, got: %v", expected.LHS, frag.LHS)
+ }
+ if frag.RHS != expected.RHS {
+ t.Fatalf("unexpected RHS; want: %v, got: %v", expected.RHS, frag.RHS)
+ }
+ if checkPosition {
+ testPosition(t, frag.Pos, expected.Pos)
+ }
+}
+
+func testAlternativeNode(t *testing.T, alt, expected *AlternativeNode, checkPosition bool) {
+ t.Helper()
+ if len(alt.Elements) != len(expected.Elements) {
+ t.Fatalf("unexpected length of elements; want: %v, got: %v", len(expected.Elements), len(alt.Elements))
+ }
+ for i, elem := range alt.Elements {
+ testElementNode(t, elem, expected.Elements[i], checkPosition)
+ }
+ if len(alt.Directives) != len(expected.Directives) {
+ t.Fatalf("unexpected alternative directive count; want: %v directive, got: %v directive", len(expected.Directives), len(alt.Directives))
+ }
+ if len(alt.Directives) > 0 {
+ testDirectives(t, alt.Directives, expected.Directives, checkPosition)
+ }
+ if checkPosition {
+ testPosition(t, alt.Pos, expected.Pos)
+ }
+}
+
+func testElementNode(t *testing.T, elem, expected *ElementNode, checkPosition bool) {
+ t.Helper()
+ if elem.ID != expected.ID {
+ t.Fatalf("unexpected ID; want: %v, got: %v", expected.ID, elem.ID)
+ }
+ if elem.Pattern != expected.Pattern {
+ t.Fatalf("unexpected pattern; want: %v, got: %v", expected.Pattern, elem.Pattern)
+ }
+ if checkPosition {
+ testPosition(t, elem.Pos, expected.Pos)
+ }
+}
+
+func testDirectives(t *testing.T, dirs, expected []*DirectiveNode, checkPosition bool) {
+ t.Helper()
+ for i, exp := range expected {
+ dir := dirs[i]
+
+ if exp.Name != dir.Name {
+ t.Fatalf("unexpected directive name; want: %+v, got: %+v", exp.Name, dir.Name)
+ }
+ if len(exp.Parameters) != len(dir.Parameters) {
+ t.Fatalf("unexpected directive parameter; want: %+v, got: %+v", exp.Parameters, dir.Parameters)
+ }
+ for j, expParam := range exp.Parameters {
+ testParameter(t, dir.Parameters[j], expParam, checkPosition)
+ }
+ if checkPosition {
+ testPosition(t, dir.Pos, exp.Pos)
+ }
+ }
+}
+
+func testParameter(t *testing.T, param, expected *ParameterNode, checkPosition bool) {
+ t.Helper()
+ if param.ID != expected.ID {
+ t.Fatalf("unexpected ID parameter; want: %v, got: %v", expected.ID, param.ID)
+ }
+ if param.String != expected.String {
+ t.Fatalf("unexpected string parameter; want: %v, got: %v", expected.ID, param.ID)
+ }
+ if param.Expansion != expected.Expansion {
+ t.Fatalf("unexpected expansion; want: %v, got: %v", expected.Expansion, param.Expansion)
+ }
+ if checkPosition {
+ testPosition(t, param.Pos, expected.Pos)
+ }
+}
+
+func testPosition(t *testing.T, pos, expected Position) {
+ t.Helper()
+ if pos.Row != expected.Row {
+ t.Fatalf("unexpected position want: %+v, got: %+v", expected, pos)
+ }
+}
diff --git a/tests/unit/spec/test/parser_test.go b/tests/unit/spec/test/parser_test.go
new file mode 100644
index 0000000..eddba92
--- /dev/null
+++ b/tests/unit/spec/test/parser_test.go
@@ -0,0 +1,411 @@
+package test
+
+import (
+ "fmt"
+ "reflect"
+ "strings"
+ "testing"
+)
+
+func TestTree_Format(t *testing.T) {
+ expected := `(a
+ (b
+ (c))
+ (d)
+ (e))`
+ tree := NewNonTerminalTree("a",
+ NewNonTerminalTree("b",
+ NewNonTerminalTree("c"),
+ ),
+ NewNonTerminalTree("d"),
+ NewNonTerminalTree("e"),
+ )
+ actual := string(tree.Format())
+ if actual != expected {
+ t.Fatalf("unexpected format:\n%v", actual)
+ }
+}
+
+func TestDiffTree(t *testing.T) {
+ tests := []struct {
+ t1 *Tree
+ t2 *Tree
+ different bool
+ }{
+ {
+ t1: NewTerminalNode("a", "a"),
+ t2: NewTerminalNode("a", "a"),
+ },
+ {
+ t1: NewTerminalNode("a", "a"),
+ t2: NewTerminalNode("a", "A"),
+ different: true,
+ },
+ {
+ t1: NewTerminalNode("a", "a"),
+ t2: NewTerminalNode("A", "a"),
+ different: true,
+ },
+ {
+ t1: NewNonTerminalTree("a"),
+ t2: NewNonTerminalTree("a"),
+ },
+ {
+ t1: NewNonTerminalTree("a",
+ NewNonTerminalTree("b"),
+ ),
+ t2: NewNonTerminalTree("a",
+ NewNonTerminalTree("b"),
+ ),
+ },
+ {
+ t1: NewNonTerminalTree("a",
+ NewNonTerminalTree("b"),
+ NewNonTerminalTree("c"),
+ NewNonTerminalTree("d"),
+ ),
+ t2: NewNonTerminalTree("a",
+ NewNonTerminalTree("b"),
+ NewNonTerminalTree("c"),
+ NewNonTerminalTree("d"),
+ ),
+ },
+ {
+ t1: NewNonTerminalTree("a",
+ NewNonTerminalTree("b",
+ NewNonTerminalTree("c"),
+ ),
+ NewNonTerminalTree("d",
+ NewNonTerminalTree("d"),
+ ),
+ ),
+ t2: NewNonTerminalTree("a",
+ NewNonTerminalTree("b",
+ NewNonTerminalTree("c"),
+ ),
+ NewNonTerminalTree("d",
+ NewNonTerminalTree("d"),
+ ),
+ ),
+ },
+ {
+ t1: NewNonTerminalTree("a"),
+ t2: NewNonTerminalTree("b"),
+ different: true,
+ },
+ {
+ t1: NewNonTerminalTree("a",
+ NewNonTerminalTree("b"),
+ ),
+ t2: NewNonTerminalTree("a"),
+ different: true,
+ },
+ {
+ t1: NewNonTerminalTree("a"),
+ t2: NewNonTerminalTree("a",
+ NewNonTerminalTree("b"),
+ ),
+ different: true,
+ },
+ {
+ t1: NewNonTerminalTree("a",
+ NewNonTerminalTree("b"),
+ ),
+ t2: NewNonTerminalTree("a",
+ NewNonTerminalTree("c"),
+ ),
+ different: true,
+ },
+ {
+ t1: NewNonTerminalTree("a",
+ NewNonTerminalTree("b"),
+ NewNonTerminalTree("c"),
+ NewNonTerminalTree("d"),
+ ),
+ t2: NewNonTerminalTree("a",
+ NewNonTerminalTree("b"),
+ NewNonTerminalTree("c"),
+ ),
+ different: true,
+ },
+ {
+ t1: NewNonTerminalTree("a",
+ NewNonTerminalTree("b"),
+ NewNonTerminalTree("c"),
+ ),
+ t2: NewNonTerminalTree("a",
+ NewNonTerminalTree("b"),
+ NewNonTerminalTree("c"),
+ NewNonTerminalTree("d"),
+ ),
+ different: true,
+ },
+ {
+ t1: NewNonTerminalTree("a",
+ NewNonTerminalTree("b",
+ NewNonTerminalTree("c"),
+ ),
+ ),
+ t2: NewNonTerminalTree("a",
+ NewNonTerminalTree("b",
+ NewNonTerminalTree("d"),
+ ),
+ ),
+ different: true,
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+ diffs := DiffTree(tt.t1, tt.t2)
+ if tt.different && len(diffs) == 0 {
+ t.Fatalf("unexpected result")
+ } else if !tt.different && len(diffs) > 0 {
+ t.Fatalf("unexpected result")
+ }
+ })
+ }
+}
+
+func TestParseTestCase(t *testing.T) {
+ tests := []struct {
+ src string
+ tc *TestCase
+ parseErr bool
+ }{
+ {
+ src: `test
+---
+foo
+---
+(foo)
+`,
+ tc: &TestCase{
+ Description: "test",
+ Source: []byte("foo"),
+ Output: NewNonTerminalTree("foo").Fill(),
+ },
+ },
+ {
+ src: `
+test
+
+---
+
+foo
+
+---
+
+(foo)
+
+`,
+ tc: &TestCase{
+ Description: "\ntest\n",
+ Source: []byte("\nfoo\n"),
+ Output: NewNonTerminalTree("foo").Fill(),
+ },
+ },
+ // The length of a part delimiter may be greater than 3.
+ {
+ src: `
+test
+----
+foo
+----
+(foo)
+`,
+ tc: &TestCase{
+ Description: "\ntest",
+ Source: []byte("foo"),
+ Output: NewNonTerminalTree("foo").Fill(),
+ },
+ },
+ // The description part may be empty.
+ {
+ src: `----
+foo
+----
+(foo)
+`,
+ tc: &TestCase{
+ Description: "",
+ Source: []byte("foo"),
+ Output: NewNonTerminalTree("foo").Fill(),
+ },
+ },
+ // The source part may be empty.
+ {
+ src: `test
+---
+---
+(foo)
+`,
+ tc: &TestCase{
+ Description: "test",
+ Source: []byte{},
+ Output: NewNonTerminalTree("foo").Fill(),
+ },
+ },
+ // NOTE: If there is a delimiter at the end of a test case, we really want to make it a syntax error,
+ // but we allow it to simplify the implementation of the parser.
+ {
+ src: `test
+----
+foo
+----
+(foo)
+---
+`,
+ tc: &TestCase{
+ Description: "test",
+ Source: []byte("foo"),
+ Output: NewNonTerminalTree("foo").Fill(),
+ },
+ },
+ {
+ src: ``,
+ parseErr: true,
+ },
+ {
+ src: `test
+---
+`,
+ parseErr: true,
+ },
+ {
+ src: `test
+---
+foo
+`,
+ parseErr: true,
+ },
+ {
+ src: `test
+---
+foo
+---
+`,
+ parseErr: true,
+ },
+ {
+ src: `test
+--
+foo
+--
+(foo)
+`,
+ parseErr: true,
+ },
+ // A node may have just one string node.
+ {
+ src: `test
+----
+foo bar
+----
+(foo (bar 'bar'))
+`,
+ tc: &TestCase{
+ Description: "test",
+ Source: []byte("foo bar"),
+ Output: NewNonTerminalTree("foo",
+ NewTerminalNode("bar", "bar"),
+ ).Fill(),
+ },
+ },
+ // A node may have just one pattern node.
+ {
+ src: `test
+----
+foo bar
+----
+(foo (bar "bar"))
+`,
+ tc: &TestCase{
+ Description: "test",
+ Source: []byte("foo bar"),
+ Output: NewNonTerminalTree("foo",
+ NewTerminalNode("bar", "bar"),
+ ).Fill(),
+ },
+ },
+ // A node may be the error node.
+ {
+ src: `test
+----
+foo x
+----
+(foo (error))
+`,
+ tc: &TestCase{
+ Description: "test",
+ Source: []byte("foo x"),
+ Output: NewNonTerminalTree("foo",
+ NewTerminalNode("error", ""),
+ ).Fill(),
+ },
+ },
+ // The error node cannot have a string node.
+ {
+ src: `test
+----
+foo x
+----
+(foo (error 'x'))
+`,
+ parseErr: true,
+ },
+ // The error node cannot have a pattern node.
+ {
+ src: `test
+----
+foo x
+----
+(foo (error "x"))
+`,
+ parseErr: true,
+ },
+ // The error node cannot have another node.
+ {
+ src: `test
+----
+foo x
+----
+(foo (error (a)))
+`,
+ parseErr: true,
+ },
+ {
+ src: `test
+---
+foo
+---
+?
+`,
+ parseErr: true,
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+ tc, err := ParseTestCase(strings.NewReader(tt.src))
+ if tt.parseErr {
+ if err == nil {
+ t.Fatalf("an expected error didn't occur")
+ }
+ } else {
+ if err != nil {
+ t.Fatal(err)
+ }
+ testTestCase(t, tt.tc, tc)
+ }
+ })
+ }
+}
+
+func testTestCase(t *testing.T, expected, actual *TestCase) {
+ t.Helper()
+
+ if expected.Description != actual.Description ||
+ !reflect.DeepEqual(expected.Source, actual.Source) ||
+ len(DiffTree(expected.Output, actual.Output)) > 0 {
+ t.Fatalf("unexpected test case: want: %#v, got: %#v", expected, actual)
+ }
+}
diff --git a/tests/unit/tester/tester_test.go b/tests/unit/tester/tester_test.go
new file mode 100644
index 0000000..3c6b1db
--- /dev/null
+++ b/tests/unit/tester/tester_test.go
@@ -0,0 +1,169 @@
+package tester
+
+import (
+ "fmt"
+ "strings"
+ "testing"
+
+ "urubu/grammar"
+ "urubu/spec/grammar/parser"
+ tspec "urubu/spec/test"
+)
+
+func TestTester_Run(t *testing.T) {
+ grammarSrc1 := `
+#name test;
+
+s
+ : foo bar baz
+ | foo error baz #recover
+ ;
+
+ws #skip
+ : "[\u{0009}\u{0020}]+";
+foo
+ : 'foo';
+bar
+ : 'bar';
+baz
+ : 'baz';
+`
+
+ grammarSrc2 := `
+#name test;
+
+s
+ : foos
+ ;
+foos
+ : foos foo #ast foos... foo
+ | foo
+ ;
+
+ws #skip
+ : "[\u{0009}\u{0020}]+";
+foo
+ : 'foo';
+`
+
+ tests := []struct {
+ grammarSrc string
+ testSrc string
+ error bool
+ }{
+ {
+ grammarSrc: grammarSrc1,
+ testSrc: `
+Test
+---
+foo bar baz
+---
+(s
+ (foo 'foo') (bar 'bar') (baz 'baz'))
+`,
+ },
+ {
+ grammarSrc: grammarSrc1,
+ testSrc: `
+Test
+---
+foo ? baz
+---
+(s
+ (foo 'foo') (error) (baz 'baz'))
+`,
+ },
+ {
+ grammarSrc: grammarSrc1,
+ testSrc: `
+Test
+---
+foo bar baz
+---
+(s)
+`,
+ error: true,
+ },
+ {
+ grammarSrc: grammarSrc1,
+ testSrc: `
+Test
+---
+foo bar baz
+---
+(s
+ (foo) (bar))
+`,
+ error: true,
+ },
+ {
+ grammarSrc: grammarSrc1,
+ testSrc: `
+Test
+---
+foo bar baz
+---
+(s
+ (foo) (bar) (xxx))
+`,
+ error: true,
+ },
+ {
+ grammarSrc: grammarSrc2,
+ testSrc: `
+Test
+---
+foo foo foo
+---
+(s
+ (foos
+ (foo 'foo') (foo 'foo') (foo 'foo')))
+`,
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+ ast, err := parser.Parse(strings.NewReader(tt.grammarSrc))
+ if err != nil {
+ t.Fatal(err)
+ }
+ b := grammar.GrammarBuilder{
+ AST: ast,
+ }
+ cg, _, err := b.Build()
+ if err != nil {
+ t.Fatal(err)
+ }
+ c, err := tspec.ParseTestCase(strings.NewReader(tt.testSrc))
+ if err != nil {
+ t.Fatal(err)
+ }
+ tester := &Tester{
+ Grammar: cg,
+ Cases: []*TestCaseWithMetadata{
+ {
+ TestCase: c,
+ },
+ },
+ }
+ rs := tester.Run()
+ if tt.error {
+ errOccurred := false
+ for _, r := range rs {
+ if r.Error != nil {
+ errOccurred = true
+ }
+ }
+ if !errOccurred {
+ t.Fatal("this test must fail, but it passed")
+ }
+ } else {
+ for _, r := range rs {
+ if r.Error != nil {
+ t.Fatalf("unexpected error occurred: %v", r.Error)
+ }
+ }
+ }
+ })
+ }
+}
diff --git a/tests/unit/utf8/utf8_test.go b/tests/unit/utf8/utf8_test.go
new file mode 100644
index 0000000..2dc8093
--- /dev/null
+++ b/tests/unit/utf8/utf8_test.go
@@ -0,0 +1,181 @@
+package utf8
+
+import (
+ "fmt"
+ "testing"
+)
+
+func TestGenCharBlocks_WellFormed(t *testing.T) {
+ cBlk := func(from []byte, to []byte) *CharBlock {
+ return &CharBlock{
+ From: from,
+ To: to,
+ }
+ }
+
+ seq := func(b ...byte) []byte {
+ return b
+ }
+
+ tests := []struct {
+ from rune
+ to rune
+ blocks []*CharBlock
+ }{
+ {
+ from: '\u0000',
+ to: '\u007f',
+ blocks: []*CharBlock{
+ cBlk(seq(0x00), seq(0x7f)),
+ },
+ },
+ {
+ from: '\u0080',
+ to: '\u07ff',
+ blocks: []*CharBlock{
+ cBlk(seq(0xc2, 0x80), seq(0xdf, 0xbf)),
+ },
+ },
+ {
+ from: '\u0800',
+ to: '\u0fff',
+ blocks: []*CharBlock{
+ cBlk(seq(0xe0, 0xa0, 0x80), seq(0xe0, 0xbf, 0xbf)),
+ },
+ },
+ {
+ from: '\u1000',
+ to: '\ucfff',
+ blocks: []*CharBlock{
+ cBlk(seq(0xe1, 0x80, 0x80), seq(0xec, 0xbf, 0xbf)),
+ },
+ },
+ {
+ from: '\ud000',
+ to: '\ud7ff',
+ blocks: []*CharBlock{
+ cBlk(seq(0xed, 0x80, 0x80), seq(0xed, 0x9f, 0xbf)),
+ },
+ },
+ {
+ from: '\ue000',
+ to: '\uffff',
+ blocks: []*CharBlock{
+ cBlk(seq(0xee, 0x80, 0x80), seq(0xef, 0xbf, 0xbf)),
+ },
+ },
+ {
+ from: '\U00010000',
+ to: '\U0003ffff',
+ blocks: []*CharBlock{
+ cBlk(seq(0xf0, 0x90, 0x80, 0x80), seq(0xf0, 0xbf, 0xbf, 0xbf)),
+ },
+ },
+ {
+ from: '\U00040000',
+ to: '\U000fffff',
+ blocks: []*CharBlock{
+ cBlk(seq(0xf1, 0x80, 0x80, 0x80), seq(0xf3, 0xbf, 0xbf, 0xbf)),
+ },
+ },
+ {
+ from: '\U00100000',
+ to: '\U0010ffff',
+ blocks: []*CharBlock{
+ cBlk(seq(0xf4, 0x80, 0x80, 0x80), seq(0xf4, 0x8f, 0xbf, 0xbf)),
+ },
+ },
+ {
+ from: '\u0000',
+ to: '\U0010ffff',
+ blocks: []*CharBlock{
+ cBlk(seq(0x00), seq(0x7f)),
+ cBlk(seq(0xc2, 0x80), seq(0xdf, 0xbf)),
+ cBlk(seq(0xe0, 0xa0, 0x80), seq(0xe0, 0xbf, 0xbf)),
+ cBlk(seq(0xe1, 0x80, 0x80), seq(0xec, 0xbf, 0xbf)),
+ cBlk(seq(0xed, 0x80, 0x80), seq(0xed, 0x9f, 0xbf)),
+ cBlk(seq(0xee, 0x80, 0x80), seq(0xef, 0xbf, 0xbf)),
+ cBlk(seq(0xf0, 0x90, 0x80, 0x80), seq(0xf0, 0xbf, 0xbf, 0xbf)),
+ cBlk(seq(0xf1, 0x80, 0x80, 0x80), seq(0xf3, 0xbf, 0xbf, 0xbf)),
+ cBlk(seq(0xf4, 0x80, 0x80, 0x80), seq(0xf4, 0x8f, 0xbf, 0xbf)),
+ },
+ },
+ }
+ for _, tt := range tests {
+ t.Run(fmt.Sprintf("%v..%v", tt.from, tt.to), func(t *testing.T) {
+ blks, err := GenCharBlocks(tt.from, tt.to)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if len(blks) != len(tt.blocks) {
+ t.Fatalf("unexpected character block: want: %+v, got: %+v", tt.blocks, blks)
+ }
+ for i, blk := range blks {
+ if len(blk.From) != len(tt.blocks[i].From) || len(blk.To) != len(tt.blocks[i].To) {
+ t.Fatalf("unexpected character block: want: %+v, got: %+v", tt.blocks, blks)
+ }
+ for j := 0; j < len(blk.From); j++ {
+ if blk.From[j] != tt.blocks[i].From[j] || blk.To[j] != tt.blocks[i].To[j] {
+ t.Fatalf("unexpected character block: want: %+v, got: %+v", tt.blocks, blks)
+ }
+ }
+ }
+ })
+ }
+}
+
+func TestGenCharBlocks_IllFormed(t *testing.T) {
+ tests := []struct {
+ from rune
+ to rune
+ }{
+ {
+ // from > to
+ from: '\u0001',
+ to: '\u0000',
+ },
+ {
+ from: -1, // <U+0000
+ to: '\u0000',
+ },
+ {
+ from: '\u0000',
+ to: -1, // <U+0000
+ },
+ {
+ from: 0x110000, // >U+10FFFF
+ to: '\u0000',
+ },
+ {
+ from: '\u0000',
+ to: 0x110000, // >U+10FFFF
+ },
+ {
+ from: 0xd800, // U+D800 (surrogate code point)
+ to: '\ue000',
+ },
+ {
+ from: 0xdfff, // U+DFFF (surrogate code point)
+ to: '\ue000',
+ },
+ {
+ from: '\ucfff',
+ to: 0xd800, // U+D800 (surrogate code point)
+ },
+ {
+ from: '\ucfff',
+ to: 0xdfff, // U+DFFF (surrogate code point)
+ },
+ }
+ for _, tt := range tests {
+ t.Run(fmt.Sprintf("%v..%v", tt.from, tt.to), func(t *testing.T) {
+ blks, err := GenCharBlocks(tt.from, tt.to)
+ if err == nil {
+ t.Fatal("expected error didn't occur")
+ }
+ if blks != nil {
+ t.Fatal("character blocks must be nil")
+ }
+ })
+ }
+}