aboutsummaryrefslogtreecommitdiff
path: root/src/urubu/cmd/vartan/parse.go
blob: 9c5fd9c97f4526160b48f80c4bd8e732636f03dc (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
package main

import (
	"encoding/json"
	"fmt"
	"io"
	"os"
	"strings"

	driver "urubu/driver/parser"
	spec "urubu/spec/grammar"
)



func runParse(args []string) error {
	cg, err := readCompiledGrammar(args[0])
	if err != nil {
		return fmt.Errorf("Cannot read a compiled grammar: %w", err)
	}

	src := os.Stdin
	gram := driver.NewGrammar(cg)

	tb := driver.NewDefaultSyntaxTreeBuilder()
	treeAct := driver.NewCSTActionSet(gram, tb)

	opts := []driver.ParserOption{}
	opts = append(opts, driver.SemanticAction(treeAct))

	toks, err := driver.NewTokenStream(cg, src)
	if err != nil {
		return err
	}

	p, err := driver.NewParser(toks, gram, opts...)
	if err != nil {
		return err
	}

	err = p.Parse()
	if err != nil {
		return err
	}

	// A parser can construct a parse tree even if syntax errors occur.
	// When therer is a parse tree, print it.
	if tree := tb.Tree(); tree != nil {
		b, err := json.Marshal(tree)
		if err != nil {
			return err
		}
		fmt.Fprintln(os.Stdout, string(b))
	}

	if len(p.SyntaxErrors()) > 0 {
		var b strings.Builder
		synErrs := p.SyntaxErrors()
		writeSyntaxErrorMessage(&b, cg, synErrs[0])
		for _, synErr := range synErrs[1:] {
			fmt.Fprintf(&b, "\n")
			writeSyntaxErrorMessage(&b, cg, synErr)
		}
		if b.Len() > 0 {
			return fmt.Errorf(b.String())
		}
	}

	return nil
}

func readCompiledGrammar(path string) (*spec.CompiledGrammar, error) {
	f, err := os.Open(path)
	if err != nil {
		return nil, err
	}
	data, err := io.ReadAll(f)
	if err != nil {
		return nil, err
	}
	cg := &spec.CompiledGrammar{}
	err = json.Unmarshal(data, cg)
	if err != nil {
		return nil, err
	}
	return cg, nil
}

func writeSyntaxErrorMessage(b *strings.Builder, cgram *spec.CompiledGrammar, synErr *driver.SyntaxError) {
	fmt.Fprintf(b, "%v:%v: %v: ", synErr.Row+1, synErr.Col+1, synErr.Message)

	tok := synErr.Token
	switch {
	case tok.EOF():
		fmt.Fprintf(b, "<eof>")
	case tok.Invalid():
		fmt.Fprintf(b, "'%v' (<invalid>)", string(tok.Lexeme()))
	default:
		if kind := cgram.Syntactic.Terminals[tok.TerminalID()]; kind != "" {
			fmt.Fprintf(b, "'%v' (%v)", string(tok.Lexeme()), kind)
		} else {
			fmt.Fprintf(b, "'%v'", string(tok.Lexeme()))
		}
	}

	fmt.Fprintf(b, ": expected: %v", synErr.ExpectedTerminals[0])
	for _, t := range synErr.ExpectedTerminals[1:] {
		fmt.Fprintf(b, ", %v", t)
	}
}