diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2021-04-30 01:54:02 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2021-04-30 01:54:02 +0900 |
commit | 43fdbf94ad87ea91a173c72688cad70a0a5f1ab4 (patch) | |
tree | 655f651e39f13b5e415445d1ef24f4ecb7511041 /compiler/lexer_test.go | |
parent | Add code point expression (Meet RL1.1 of UTS #18) (diff) | |
download | tre-43fdbf94ad87ea91a173c72688cad70a0a5f1ab4.tar.gz tre-43fdbf94ad87ea91a173c72688cad70a0a5f1ab4.tar.xz |
Add character property expression (Meet RL1.2 of UTS #18 partially)
\p{property name=property value} matches a character has the property.
When the property name is General_Category, it can be omitted.
That is, \p{Letter} equals \p{General_Category=Letter}.
Currently, only General_Category is supported.
This feature meets RL1.2 of UTS #18 partially.
RL1.2 Properties: https://unicode.org/reports/tr18/#RL1.2
Diffstat (limited to 'compiler/lexer_test.go')
-rw-r--r-- | compiler/lexer_test.go | 46 |
1 files changed, 45 insertions, 1 deletions
diff --git a/compiler/lexer_test.go b/compiler/lexer_test.go index 87e3a81..089253b 100644 --- a/compiler/lexer_test.go +++ b/compiler/lexer_test.go @@ -401,6 +401,50 @@ func TestLexer(t *testing.T) { }, err: synErrInvalidCodePoint, }, + { + caption: "lexer can recognize the special characters and symbols in character property expression mode", + src: "\\p{Letter}\\p{General_Category=Letter}[\\p{Letter}\\p{General_Category=Letter}][^\\p{Letter}\\p{General_Category=Letter}]", + tokens: []*token{ + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("General_Category"), + newToken(tokenKindEqual, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("General_Category"), + newToken(tokenKindEqual, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("General_Category"), + newToken(tokenKindEqual, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindEOF, nullChar), + }, + }, } for _, tt := range tests { t.Run(tt.caption, func(t *testing.T) { @@ -425,7 +469,7 @@ func TestLexer(t *testing.T) { } } if err != tt.err { - t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err) + t.Fatalf("unexpected error; want: %v, got: %v (%v)", tt.err, err, lex.errMsgDetails) } if i < len(tt.tokens) { t.Fatalf("expecte more tokens") |