From 210a76a5aa0e62f8ab48a94e3c5b5212b5da08fa Mon Sep 17 00:00:00 2001 From: Ryo Nihei Date: Sat, 17 Apr 2021 16:14:58 +0900 Subject: Change the lexical specs of regexp and define concrete syntax error values * Make the lexer treat ']' as an ordinary character in default mode * Define values of the syntax error type that represents error information concretely --- compiler/lexer_test.go | 87 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 58 insertions(+), 29 deletions(-) (limited to 'compiler/lexer_test.go') diff --git a/compiler/lexer_test.go b/compiler/lexer_test.go index 451c3c7..c77d7c7 100644 --- a/compiler/lexer_test.go +++ b/compiler/lexer_test.go @@ -1,7 +1,6 @@ package compiler import ( - "reflect" "strings" "testing" ) @@ -30,8 +29,8 @@ func TestLexer(t *testing.T) { }, }, { - caption: "lexer can recognize the special characters", - src: ".*+?|()[a-z][^^]", + caption: "lexer can recognize the special characters in default mode", + src: ".*+?|()[", tokens: []*token{ newToken(tokenKindAnyChar, nullChar), newToken(tokenKindRepeat, nullChar), @@ -41,19 +40,12 @@ func TestLexer(t *testing.T) { newToken(tokenKindGroupOpen, nullChar), newToken(tokenKindGroupClose, nullChar), newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '^'), - newToken(tokenKindBExpClose, nullChar), newToken(tokenKindEOF, nullChar), }, }, { - caption: "lexer can recognize the escape sequences", - src: "\\\\\\.\\*\\+\\?\\|\\(\\)\\[\\][\\^\\-]", + caption: "lexer can recognize the escape sequences in default mode", + src: "\\\\\\.\\*\\+\\?\\|\\(\\)\\[", tokens: []*token{ newToken(tokenKindChar, '\\'), newToken(tokenKindChar, '.'), @@ -64,17 +56,50 @@ func TestLexer(t *testing.T) { newToken(tokenKindChar, '('), newToken(tokenKindChar, ')'), newToken(tokenKindChar, '['), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "] is treated as an ordinary character in default mode", + src: "]", + tokens: []*token{ newToken(tokenKindChar, ']'), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the special characters in bracket expression mode", + src: "[a-z][^a-z]", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the escape sequences in bracket expression mode", + src: "[\\^a\\-z]", + tokens: []*token{ newToken(tokenKindBExpOpen, nullChar), newToken(tokenKindChar, '^'), + newToken(tokenKindChar, 'a'), newToken(tokenKindChar, '-'), + newToken(tokenKindChar, 'z'), newToken(tokenKindBExpClose, nullChar), newToken(tokenKindEOF, nullChar), }, }, { caption: "in a bracket expression, the special characters are also handled as normal characters", - src: "[\\\\.*+?|()[\\]].*|()-][", + src: "[\\\\.*+?|()[", tokens: []*token{ newToken(tokenKindBExpOpen, nullChar), newToken(tokenKindChar, '\\'), @@ -86,16 +111,6 @@ func TestLexer(t *testing.T) { newToken(tokenKindChar, '('), newToken(tokenKindChar, ')'), newToken(tokenKindChar, '['), - newToken(tokenKindChar, ']'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindAnyChar, nullChar), - newToken(tokenKindRepeat, nullChar), - newToken(tokenKindAlt, nullChar), - newToken(tokenKindGroupOpen, nullChar), - newToken(tokenKindGroupClose, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), newToken(tokenKindEOF, nullChar), }, }, @@ -195,12 +210,28 @@ func TestLexer(t *testing.T) { { caption: "lexer raises an error when an invalid escape sequence appears", src: "\\@", - err: &SyntaxError{}, + err: synErrInvalidEscSeq, }, { caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", src: "\\", - err: &SyntaxError{}, + err: synErrIncompletedEscSeq, + }, + { + caption: "lexer raises an error when an invalid escape sequence appears", + src: "[\\@", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + }, + err: synErrInvalidEscSeq, + }, + { + caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", + src: "[\\", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + }, + err: synErrIncompletedEscSeq, }, } for _, tt := range tests { @@ -225,10 +256,8 @@ func TestLexer(t *testing.T) { break } } - ty := reflect.TypeOf(err) - eTy := reflect.TypeOf(tt.err) - if ty != eTy { - t.Fatalf("unexpected error type; want: %v, got: %v", eTy, ty) + if err != tt.err { + t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err) } if i < len(tt.tokens) { t.Fatalf("expecte more tokens") -- cgit v1.2.3