diff options
Diffstat (limited to 'tests/unit/spec/grammar/parser/lexer_test.go')
-rw-r--r-- | tests/unit/spec/grammar/parser/lexer_test.go | 236 |
1 files changed, 0 insertions, 236 deletions
diff --git a/tests/unit/spec/grammar/parser/lexer_test.go b/tests/unit/spec/grammar/parser/lexer_test.go deleted file mode 100644 index c402b42..0000000 --- a/tests/unit/spec/grammar/parser/lexer_test.go +++ /dev/null @@ -1,236 +0,0 @@ -package parser - -import ( - "strings" - "testing" - - verr "urubu/error" -) - -func TestLexer_Run(t *testing.T) { - idTok := func(text string) *token { - return newIDToken(text, newPosition(1, 0)) - } - - termPatTok := func(text string) *token { - return newTerminalPatternToken(text, newPosition(1, 0)) - } - - strTok := func(text string) *token { - return newStringLiteralToken(text, newPosition(1, 0)) - } - - symTok := func(kind tokenKind) *token { - return newSymbolToken(kind, newPosition(1, 0)) - } - - invalidTok := func(text string) *token { - return newInvalidToken(text, newPosition(1, 0)) - } - - tests := []struct { - caption string - src string - tokens []*token - err error - }{ - { - caption: "the lexer can recognize all kinds of tokens", - src: `id"terminal"'string':|;@...#$()`, - tokens: []*token{ - idTok("id"), - termPatTok("terminal"), - strTok(`string`), - symTok(tokenKindColon), - symTok(tokenKindOr), - symTok(tokenKindSemicolon), - symTok(tokenKindLabelMarker), - symTok(tokenKindExpantion), - symTok(tokenKindDirectiveMarker), - symTok(tokenKindOrderedSymbolMarker), - symTok(tokenKindLParen), - symTok(tokenKindRParen), - newEOFToken(), - }, - }, - { - caption: "the lexer can recognize keywords", - src: `fragment`, - tokens: []*token{ - symTok(tokenKindKWFragment), - newEOFToken(), - }, - }, - { - caption: "the lexer can recognize character sequences and escape sequences in a terminal", - src: `"abc\"\\"`, - tokens: []*token{ - termPatTok(`abc"\\`), - newEOFToken(), - }, - }, - { - caption: "backslashes are recognized as they are because escape sequences are not allowed in strings", - src: `'\\\'`, - tokens: []*token{ - strTok(`\\\`), - newEOFToken(), - }, - }, - { - caption: "a pattern must include at least one character", - src: `""`, - err: synErrEmptyPattern, - }, - { - caption: "a string must include at least one character", - src: `''`, - err: synErrEmptyString, - }, - { - caption: "the lexer can recognize newlines and combine consecutive newlines into one", - src: "\u000A | \u000D | \u000D\u000A | \u000A\u000A \u000D\u000D \u000D\u000A\u000D\u000A", - tokens: []*token{ - symTok(tokenKindNewline), - symTok(tokenKindOr), - symTok(tokenKindNewline), - symTok(tokenKindOr), - symTok(tokenKindNewline), - symTok(tokenKindOr), - symTok(tokenKindNewline), - newEOFToken(), - }, - }, - { - caption: "the lexer ignores line comments", - src: ` -// This is the first comment. -foo -// This is the second comment. -// This is the third comment. -bar // This is the fourth comment. -`, - tokens: []*token{ - symTok(tokenKindNewline), - idTok("foo"), - symTok(tokenKindNewline), - idTok("bar"), - symTok(tokenKindNewline), - newEOFToken(), - }, - }, - { - caption: "an identifier cannot contain the capital-case letters", - src: `Abc`, - err: synErrIDInvalidChar, - }, - { - caption: "an identifier cannot contain the capital-case letters", - src: `Zyx`, - err: synErrIDInvalidChar, - }, - { - caption: "the underscore cannot be placed at the beginning of an identifier", - src: `_abc`, - err: synErrIDInvalidUnderscorePos, - }, - { - caption: "the underscore cannot be placed at the end of an identifier", - src: `abc_`, - err: synErrIDInvalidUnderscorePos, - }, - { - caption: "the underscore cannot be placed consecutively", - src: `a__b`, - err: synErrIDConsecutiveUnderscores, - }, - { - caption: "the digits cannot be placed at the biginning of an identifier", - src: `0abc`, - err: synErrIDInvalidDigitsPos, - }, - { - caption: "the digits cannot be placed at the biginning of an identifier", - src: `9abc`, - err: synErrIDInvalidDigitsPos, - }, - { - caption: "an unclosed terminal is not a valid token", - src: `"abc`, - err: synErrUnclosedTerminal, - }, - { - caption: "an incompleted escape sequence in a pattern is not a valid token", - src: `"\`, - err: synErrIncompletedEscSeq, - }, - { - caption: "an unclosed string is not a valid token", - src: `'abc`, - err: synErrUnclosedString, - }, - { - caption: "the lexer can recognize valid tokens following an invalid token", - src: `abc!!!def`, - tokens: []*token{ - idTok("abc"), - invalidTok("!!!"), - idTok("def"), - newEOFToken(), - }, - }, - { - caption: "the lexer skips white spaces", - // \u0009: HT - // \u0020: SP - src: "a\u0009b\u0020c", - tokens: []*token{ - idTok("a"), - idTok("b"), - idTok("c"), - newEOFToken(), - }, - }, - } - for _, tt := range tests { - t.Run(tt.caption, func(t *testing.T) { - l, err := newLexer(strings.NewReader(tt.src)) - if err != nil { - t.Fatal(err) - } - n := 0 - for { - var tok *token - tok, err = l.next() - if err != nil { - break - } - testToken(t, tok, tt.tokens[n]) - n++ - if tok.kind == tokenKindEOF { - break - } - } - if tt.err != nil { - synErr, ok := err.(*verr.SpecError) - if !ok { - t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err) - } - if tt.err != synErr.Cause { - t.Fatalf("unexpected error; want: %v, got: %v", tt.err, synErr.Cause) - } - } else { - if err != nil { - t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err) - } - } - }) - } -} - -func testToken(t *testing.T, tok, expected *token) { - t.Helper() - if tok.kind != expected.kind || tok.text != expected.text { - t.Fatalf("unexpected token; want: %+v, got: %+v", expected, tok) - } -} |