diff options
Diffstat (limited to 'spec/grammar/parser/lexer_test.go')
-rw-r--r-- | spec/grammar/parser/lexer_test.go | 236 |
1 files changed, 236 insertions, 0 deletions
diff --git a/spec/grammar/parser/lexer_test.go b/spec/grammar/parser/lexer_test.go new file mode 100644 index 0000000..64c2aa2 --- /dev/null +++ b/spec/grammar/parser/lexer_test.go @@ -0,0 +1,236 @@ +package parser + +import ( + "strings" + "testing" + + verr "github.com/nihei9/vartan/error" +) + +func TestLexer_Run(t *testing.T) { + idTok := func(text string) *token { + return newIDToken(text, newPosition(1, 0)) + } + + termPatTok := func(text string) *token { + return newTerminalPatternToken(text, newPosition(1, 0)) + } + + strTok := func(text string) *token { + return newStringLiteralToken(text, newPosition(1, 0)) + } + + symTok := func(kind tokenKind) *token { + return newSymbolToken(kind, newPosition(1, 0)) + } + + invalidTok := func(text string) *token { + return newInvalidToken(text, newPosition(1, 0)) + } + + tests := []struct { + caption string + src string + tokens []*token + err error + }{ + { + caption: "the lexer can recognize all kinds of tokens", + src: `id"terminal"'string':|;@...#$()`, + tokens: []*token{ + idTok("id"), + termPatTok("terminal"), + strTok(`string`), + symTok(tokenKindColon), + symTok(tokenKindOr), + symTok(tokenKindSemicolon), + symTok(tokenKindLabelMarker), + symTok(tokenKindExpantion), + symTok(tokenKindDirectiveMarker), + symTok(tokenKindOrderedSymbolMarker), + symTok(tokenKindLParen), + symTok(tokenKindRParen), + newEOFToken(), + }, + }, + { + caption: "the lexer can recognize keywords", + src: `fragment`, + tokens: []*token{ + symTok(tokenKindKWFragment), + newEOFToken(), + }, + }, + { + caption: "the lexer can recognize character sequences and escape sequences in a terminal", + src: `"abc\"\\"`, + tokens: []*token{ + termPatTok(`abc"\\`), + newEOFToken(), + }, + }, + { + caption: "backslashes are recognized as they are because escape sequences are not allowed in strings", + src: `'\\\'`, + tokens: []*token{ + strTok(`\\\`), + newEOFToken(), + }, + }, + { + caption: "a pattern must include at least one character", + src: `""`, + err: synErrEmptyPattern, + }, + { + caption: "a string must include at least one character", + src: `''`, + err: synErrEmptyString, + }, + { + caption: "the lexer can recognize newlines and combine consecutive newlines into one", + src: "\u000A | \u000D | \u000D\u000A | \u000A\u000A \u000D\u000D \u000D\u000A\u000D\u000A", + tokens: []*token{ + symTok(tokenKindNewline), + symTok(tokenKindOr), + symTok(tokenKindNewline), + symTok(tokenKindOr), + symTok(tokenKindNewline), + symTok(tokenKindOr), + symTok(tokenKindNewline), + newEOFToken(), + }, + }, + { + caption: "the lexer ignores line comments", + src: ` +// This is the first comment. +foo +// This is the second comment. +// This is the third comment. +bar // This is the fourth comment. +`, + tokens: []*token{ + symTok(tokenKindNewline), + idTok("foo"), + symTok(tokenKindNewline), + idTok("bar"), + symTok(tokenKindNewline), + newEOFToken(), + }, + }, + { + caption: "an identifier cannot contain the capital-case letters", + src: `Abc`, + err: synErrIDInvalidChar, + }, + { + caption: "an identifier cannot contain the capital-case letters", + src: `Zyx`, + err: synErrIDInvalidChar, + }, + { + caption: "the underscore cannot be placed at the beginning of an identifier", + src: `_abc`, + err: synErrIDInvalidUnderscorePos, + }, + { + caption: "the underscore cannot be placed at the end of an identifier", + src: `abc_`, + err: synErrIDInvalidUnderscorePos, + }, + { + caption: "the underscore cannot be placed consecutively", + src: `a__b`, + err: synErrIDConsecutiveUnderscores, + }, + { + caption: "the digits cannot be placed at the biginning of an identifier", + src: `0abc`, + err: synErrIDInvalidDigitsPos, + }, + { + caption: "the digits cannot be placed at the biginning of an identifier", + src: `9abc`, + err: synErrIDInvalidDigitsPos, + }, + { + caption: "an unclosed terminal is not a valid token", + src: `"abc`, + err: synErrUnclosedTerminal, + }, + { + caption: "an incompleted escape sequence in a pattern is not a valid token", + src: `"\`, + err: synErrIncompletedEscSeq, + }, + { + caption: "an unclosed string is not a valid token", + src: `'abc`, + err: synErrUnclosedString, + }, + { + caption: "the lexer can recognize valid tokens following an invalid token", + src: `abc!!!def`, + tokens: []*token{ + idTok("abc"), + invalidTok("!!!"), + idTok("def"), + newEOFToken(), + }, + }, + { + caption: "the lexer skips white spaces", + // \u0009: HT + // \u0020: SP + src: "a\u0009b\u0020c", + tokens: []*token{ + idTok("a"), + idTok("b"), + idTok("c"), + newEOFToken(), + }, + }, + } + for _, tt := range tests { + t.Run(tt.caption, func(t *testing.T) { + l, err := newLexer(strings.NewReader(tt.src)) + if err != nil { + t.Fatal(err) + } + n := 0 + for { + var tok *token + tok, err = l.next() + if err != nil { + break + } + testToken(t, tok, tt.tokens[n]) + n++ + if tok.kind == tokenKindEOF { + break + } + } + if tt.err != nil { + synErr, ok := err.(*verr.SpecError) + if !ok { + t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err) + } + if tt.err != synErr.Cause { + t.Fatalf("unexpected error; want: %v, got: %v", tt.err, synErr.Cause) + } + } else { + if err != nil { + t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err) + } + } + }) + } +} + +func testToken(t *testing.T, tok, expected *token) { + t.Helper() + if tok.kind != expected.kind || tok.text != expected.text { + t.Fatalf("unexpected token; want: %+v, got: %+v", expected, tok) + } +} |