From a1d1cfe08ae809d454ac6f1ce80a19395e7940e5 Mon Sep 17 00:00:00 2001 From: Ryo Nihei Date: Sun, 14 Feb 2021 17:38:46 +0900 Subject: Add dot symbol matching any single character The dot symbol matches any single character. When the dot symbol appears, the parser generates an AST matching all of the well-formed UTF-8 byte sequences. Refelences: * https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G7404 * Table 3-6. UTF-8 Bit Distribution * Table 3-7. Well-Formed UTF-8 Byte Sequences --- compiler/ast.go | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'compiler/ast.go') diff --git a/compiler/ast.go b/compiler/ast.go index d31c92b..d4b8956 100644 --- a/compiler/ast.go +++ b/compiler/ast.go @@ -119,6 +119,15 @@ func (s symbolPositionSet) sort() []symbolPosition { return sorted } +type byteRange struct { + from byte + to byte +} + +func (r byteRange) String() string { + return fmt.Sprintf("%v - %v", r.from, r.to) +} + type astNode interface { fmt.Stringer children() (astNode, astNode) @@ -128,21 +137,35 @@ type astNode interface { } type symbolNode struct { + byteRange token *token - value byte pos symbolPosition } func newSymbolNode(tok *token, value byte, pos symbolPosition) *symbolNode { return &symbolNode{ + byteRange: byteRange{ + from: value, + to: value, + }, + token: tok, + pos: pos, + } +} + +func newRangeSymbolNode(tok *token, from, to byte, pos symbolPosition) *symbolNode { + return &symbolNode{ + byteRange: byteRange{ + from: from, + to: to, + }, token: tok, - value: value, pos: pos, } } func (n *symbolNode) String() string { - return fmt.Sprintf("{type: char, char: %v, int: %v, pos: %v}", string(n.token.char), n.token.char, n.pos) + return fmt.Sprintf("{type: symbol, value: %v - %v, token char: %v, pos: %v}", n.from, n.to, string(n.token.char), n.pos) } func (n *symbolNode) children() (astNode, astNode) { -- cgit v1.2.3