aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2022-05-13 22:40:50 +0900
committerRyo Nihei <nihei.dev@gmail.com>2022-05-15 20:42:28 +0900
commit15ea142b25927d6f103ee6ddde4fe8a5e2324831 (patch)
tree8c83fc31ac7d5712374ee82681bea87e071c97fd
parentStop handling panic to print a stack trace (diff)
downloadurubu-15ea142b25927d6f103ee6ddde4fe8a5e2324831.tar.gz
urubu-15ea142b25927d6f103ee6ddde4fe8a5e2324831.tar.xz
Prohibit using escape sequences in string literals
-rw-r--r--driver/parser_test.go24
-rw-r--r--spec/lexer.go12
-rw-r--r--spec/lexer_test.go13
-rw-r--r--spec/lexspec.json22
-rw-r--r--spec/vartan_lexer.go73
5 files changed, 58 insertions, 86 deletions
diff --git a/driver/parser_test.go b/driver/parser_test.go
index 60dd3f4..dc1c141 100644
--- a/driver/parser_test.go
+++ b/driver/parser_test.go
@@ -114,6 +114,30 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
),
),
},
+ // Fragments (\f{}), code point expressions (\u{}), and character property expressions (\p{}) are
+ // not allowed in string literals.
+ {
+ specSrc: `
+#name test;
+
+s
+ : a b c
+ ;
+
+a
+ : '\f{foo}';
+b
+ : '\u{0000}';
+c
+ : '\p{gc=Letter}';
+`,
+ src: `\f{foo}\u{0000}\p{gc=Letter}`,
+ cst: nonTermNode("s",
+ termNode("a", `\f{foo}`),
+ termNode("b", `\u{0000}`),
+ termNode("c", `\p{gc=Letter}`),
+ ),
+ },
// The driver can reduce productions that have the empty alternative and can generate a CST (and AST) node.
{
specSrc: `
diff --git a/spec/lexer.go b/spec/lexer.go
index d1f3ae7..03ba72b 100644
--- a/spec/lexer.go
+++ b/spec/lexer.go
@@ -261,18 +261,6 @@ func (l *lexer) lexAndSkipWSs() (*token, error) {
switch tok.KindID {
case KindIDCharSeq:
fmt.Fprint(&b, string(tok.Lexeme))
- case KindIDEscapedQuot:
- // Remove '\' character.
- fmt.Fprint(&b, `'`)
- case KindIDEscapedBackSlash:
- // Remove '\' character.
- fmt.Fprint(&b, `\`)
- case KindIDEscapeSymbol:
- return nil, &verr.SpecError{
- Cause: synErrIncompletedEscSeq,
- Row: tok.Row + 1,
- Col: tok.Col + 1,
- }
case KindIDStringLiteralClose:
str := b.String()
if str == "" {
diff --git a/spec/lexer_test.go b/spec/lexer_test.go
index 5f555ec..d822de2 100644
--- a/spec/lexer_test.go
+++ b/spec/lexer_test.go
@@ -70,10 +70,10 @@ func TestLexer_Run(t *testing.T) {
},
},
{
- caption: "the lexer can recognize character sequences and escape sequences in a string literal",
- src: `'.*+?|()[\'\\'`,
+ caption: "backslashes are recognized as they are because escape sequences are not allowed in strings",
+ src: `'\\\'`,
tokens: []*token{
- strTok(`.*+?|()['\`),
+ strTok(`\\\`),
newEOFToken(),
},
},
@@ -160,7 +160,7 @@ bar // This is the fourth comment.
err: synErrUnclosedTerminal,
},
{
- caption: "an incompleted terminal in a pattern is not a valid token",
+ caption: "an incompleted escape sequence in a pattern is not a valid token",
src: `"\`,
err: synErrIncompletedEscSeq,
},
@@ -170,11 +170,6 @@ bar // This is the fourth comment.
err: synErrUnclosedString,
},
{
- caption: "an incompleted terminal in a string is not a valid token",
- src: `'\`,
- err: synErrIncompletedEscSeq,
- },
- {
caption: "the lexer can recognize valid tokens following an invalid token",
src: `abc!!!def`,
tokens: []*token{
diff --git a/spec/lexspec.json b/spec/lexspec.json
index b8b67f5..caf1f0e 100644
--- a/spec/lexspec.json
+++ b/spec/lexspec.json
@@ -58,6 +58,11 @@
},
{
"modes": ["terminal"],
+ "kind": "escape_symbol",
+ "pattern": "\\\\"
+ },
+ {
+ "modes": ["terminal"],
"kind": "terminal_close",
"pattern": "\"",
"pop": true
@@ -70,17 +75,7 @@
{
"modes": ["string_literal"],
"kind": "char_seq",
- "pattern": "[^'\\\\]+"
- },
- {
- "modes": ["string_literal"],
- "kind": "escaped_quot",
- "pattern": "\\\\'"
- },
- {
- "modes": ["string_literal"],
- "kind": "escaped_back_slash",
- "pattern": "\\\\\\\\"
+ "pattern": "[^']+"
},
{
"modes": ["string_literal"],
@@ -89,11 +84,6 @@
"pop": true
},
{
- "modes": ["terminal", "string_literal"],
- "kind": "escape_symbol",
- "pattern": "\\\\"
- },
- {
"kind": "colon",
"pattern": ":"
},
diff --git a/spec/vartan_lexer.go b/spec/vartan_lexer.go
index f0a8b3e..b892c43 100644
--- a/spec/vartan_lexer.go
+++ b/spec/vartan_lexer.go
@@ -360,12 +360,10 @@ const (
KindIDLParen KindID = 15
KindIDRParen KindID = 16
KindIDPattern KindID = 17
- KindIDTerminalClose KindID = 18
- KindIDEscapeSymbol KindID = 19
+ KindIDEscapeSymbol KindID = 18
+ KindIDTerminalClose KindID = 19
KindIDCharSeq KindID = 20
- KindIDEscapedQuot KindID = 21
- KindIDEscapedBackSlash KindID = 22
- KindIDStringLiteralClose KindID = 23
+ KindIDStringLiteralClose KindID = 21
)
const (
@@ -387,11 +385,9 @@ const (
KindNameLParen = "l_paren"
KindNameRParen = "r_paren"
KindNamePattern = "pattern"
- KindNameTerminalClose = "terminal_close"
KindNameEscapeSymbol = "escape_symbol"
+ KindNameTerminalClose = "terminal_close"
KindNameCharSeq = "char_seq"
- KindNameEscapedQuot = "escaped_quot"
- KindNameEscapedBackSlash = "escaped_back_slash"
KindNameStringLiteralClose = "string_literal_close"
)
@@ -434,16 +430,12 @@ func KindIDToName(id KindID) string {
return KindNameRParen
case KindIDPattern:
return KindNamePattern
- case KindIDTerminalClose:
- return KindNameTerminalClose
case KindIDEscapeSymbol:
return KindNameEscapeSymbol
+ case KindIDTerminalClose:
+ return KindNameTerminalClose
case KindIDCharSeq:
return KindNameCharSeq
- case KindIDEscapedQuot:
- return KindNameEscapedQuot
- case KindIDEscapedBackSlash:
- return KindNameEscapedBackSlash
case KindIDStringLiteralClose:
return KindNameStringLiteralClose
}
@@ -478,10 +470,10 @@ func NewLexSpec() *lexSpec {
false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
},
{
- false, false, true, false,
+ false, false, false, true,
},
{
- false, false, false, false, true, false,
+ false, false, true,
},
},
push: [][]ModeID{
@@ -493,7 +485,7 @@ func NewLexSpec() *lexSpec {
0, 0, 0, 0,
},
{
- 0, 0, 0, 0, 0, 0,
+ 0, 0, 0,
},
},
modeNames: []string{
@@ -517,14 +509,13 @@ func NewLexSpec() *lexSpec {
},
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+ 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,
},
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
- 2, 3, 4,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
},
},
kindIDs: [][]KindID{
@@ -551,16 +542,13 @@ func NewLexSpec() *lexSpec {
{
KindIDNil,
KindIDPattern,
- KindIDTerminalClose,
KindIDEscapeSymbol,
+ KindIDTerminalClose,
},
{
KindIDNil,
KindIDCharSeq,
- KindIDEscapedQuot,
- KindIDEscapedBackSlash,
KindIDStringLiteralClose,
- KindIDEscapeSymbol,
},
},
kindNames: []string{
@@ -582,11 +570,9 @@ func NewLexSpec() *lexSpec {
KindNameLParen,
KindNameRParen,
KindNamePattern,
- KindNameTerminalClose,
KindNameEscapeSymbol,
+ KindNameTerminalClose,
KindNameCharSeq,
- KindNameEscapedQuot,
- KindNameEscapedBackSlash,
KindNameStringLiteralClose,
},
initialModeID: ModeIDDefault,
@@ -609,8 +595,7 @@ func NewLexSpec() *lexSpec {
},
{
0, 1, 2, 3, 2, 4, 2, 5, 2, 6, 2, 7, 8, 2, 9, 10, 2, 11, 12, 2,
- 13, 2, 14, 2, 15, 2, 16, 2, 17, 2, 18, 19, 2, 20, 21, 2, 22, 23, 2, 24,
- 0, 0, 0,
+ 13, 2, 14, 2, 15, 2, 16, 2, 17, 2, 18, 19, 2, 20, 21, 2, 22, 23, 2, 0,
},
},
rowDisplacements: [][]int{
@@ -626,7 +611,7 @@ func NewLexSpec() *lexSpec {
},
{
0, 0, 246, 1194, 362, 1258, 426, 1114, 490, 554, 618, 1355, 682, 245, 1259, 746, 1323, 810, 1162, 874,
- 938, 1002, 1371, 1066, 1476,
+ 938, 1002, 1371, 1066,
},
},
bounds: [][]int{
@@ -901,7 +886,7 @@ func NewLexSpec() *lexSpec {
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, -1, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
- 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, -1, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, -1, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -960,18 +945,13 @@ func NewLexSpec() *lexSpec {
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 24, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, 24, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1,
},
},
entries: [][]StateID{
@@ -1231,10 +1211,10 @@ func NewLexSpec() *lexSpec {
},
{
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 42,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 39,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 39, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -1246,7 +1226,7 @@ func NewLexSpec() *lexSpec {
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
@@ -1305,18 +1285,13 @@ func NewLexSpec() *lexSpec {
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 37,
- 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 40, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,
},
},
originalColCounts: nil,