1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
|
package main
import (
"encoding/json"
"fmt"
"io"
"os"
"strings"
driver "urubu/driver/parser"
spec "urubu/spec/grammar"
)
func runParse(args []string) error {
cg, err := readCompiledGrammar(args[0])
if err != nil {
return fmt.Errorf("Cannot read a compiled grammar: %w", err)
}
src := os.Stdin
gram := driver.NewGrammar(cg)
tb := driver.NewDefaultSyntaxTreeBuilder()
treeAct := driver.NewCSTActionSet(gram, tb)
opts := []driver.ParserOption{}
opts = append(opts, driver.SemanticAction(treeAct))
toks, err := driver.NewTokenStream(cg, src)
if err != nil {
return err
}
p, err := driver.NewParser(toks, gram, opts...)
if err != nil {
return err
}
err = p.Parse()
if err != nil {
return err
}
// A parser can construct a parse tree even if syntax errors occur.
// When therer is a parse tree, print it.
if tree := tb.Tree(); tree != nil {
b, err := json.Marshal(tree)
if err != nil {
return err
}
fmt.Fprintln(os.Stdout, string(b))
}
if len(p.SyntaxErrors()) > 0 {
var b strings.Builder
synErrs := p.SyntaxErrors()
writeSyntaxErrorMessage(&b, cg, synErrs[0])
for _, synErr := range synErrs[1:] {
fmt.Fprintf(&b, "\n")
writeSyntaxErrorMessage(&b, cg, synErr)
}
if b.Len() > 0 {
return fmt.Errorf(b.String())
}
}
return nil
}
func readCompiledGrammar(path string) (*spec.CompiledGrammar, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
data, err := io.ReadAll(f)
if err != nil {
return nil, err
}
cg := &spec.CompiledGrammar{}
err = json.Unmarshal(data, cg)
if err != nil {
return nil, err
}
return cg, nil
}
func writeSyntaxErrorMessage(b *strings.Builder, cgram *spec.CompiledGrammar, synErr *driver.SyntaxError) {
fmt.Fprintf(b, "%v:%v: %v: ", synErr.Row+1, synErr.Col+1, synErr.Message)
tok := synErr.Token
switch {
case tok.EOF():
fmt.Fprintf(b, "<eof>")
case tok.Invalid():
fmt.Fprintf(b, "'%v' (<invalid>)", string(tok.Lexeme()))
default:
if kind := cgram.Syntactic.Terminals[tok.TerminalID()]; kind != "" {
fmt.Fprintf(b, "'%v' (%v)", string(tok.Lexeme()), kind)
} else {
fmt.Fprintf(b, "'%v'", string(tok.Lexeme()))
}
}
fmt.Fprintf(b, ": expected: %v", synErr.ExpectedTerminals[0])
for _, t := range synErr.ExpectedTerminals[1:] {
fmt.Fprintf(b, ", %v", t)
}
}
|