aboutsummaryrefslogtreecommitdiff
path: root/compiler/lexer_test.go
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-04-11 16:41:05 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-04-11 16:41:05 +0900
commita13655e3936ba9166051914832dedcbdb28b056c (patch)
treec6fcd27c4769aecf16d4f8d2a43f1297f62b6caa /compiler/lexer_test.go
parentAdd logging to compile command (diff)
downloadtre-a13655e3936ba9166051914832dedcbdb28b056c.tar.gz
tre-a13655e3936ba9166051914832dedcbdb28b056c.tar.xz
Fix grammar the parser accepts
* Add cases test the parse method. * Fix the parser to pass the cases.
Diffstat (limited to 'compiler/lexer_test.go')
-rw-r--r--compiler/lexer_test.go73
1 files changed, 72 insertions, 1 deletions
diff --git a/compiler/lexer_test.go b/compiler/lexer_test.go
index 0dfe2f7..451c3c7 100644
--- a/compiler/lexer_test.go
+++ b/compiler/lexer_test.go
@@ -31,7 +31,7 @@ func TestLexer(t *testing.T) {
},
{
caption: "lexer can recognize the special characters",
- src: ".*+?|()[-][^^]",
+ src: ".*+?|()[a-z][^^]",
tokens: []*token{
newToken(tokenKindAnyChar, nullChar),
newToken(tokenKindRepeat, nullChar),
@@ -41,7 +41,9 @@ func TestLexer(t *testing.T) {
newToken(tokenKindGroupOpen, nullChar),
newToken(tokenKindGroupClose, nullChar),
newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, 'z'),
newToken(tokenKindBExpClose, nullChar),
newToken(tokenKindInverseBExpOpen, nullChar),
newToken(tokenKindChar, '^'),
@@ -98,6 +100,75 @@ func TestLexer(t *testing.T) {
},
},
{
+ caption: "hyphen symbols that appear in bracket expressions are handled as the character range symbol or ordinary characters",
+ // [...-...][...-][-...][-]
+ // ~~~~~~~ ~ ~ ~
+ // ^ ^ ^ ^
+ // | | | `-- Ordinary Character (b)
+ // | | `-- Ordinary Character (b)
+ // | `-- Ordinary Character (b)
+ // `-- Character Range (a)
+ //
+ // a. *-* is handled as a character range expression.
+ // b. *-, -*, or - are handled as ordinary characters.
+ src: "[a-z][a-][-z][-][--][---][^a-z][^a-][^-z][^-][^--][^---]",
+ tokens: []*token{
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
caption: "caret symbols that appear in bracket expressions are handled as the logical inverse symbol or ordinary characters",
// [^...^...][^]
// ~~ ~ ~~