1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"github.com/nihei9/maleeni/driver"
"github.com/nihei9/maleeni/spec"
"github.com/spf13/cobra"
)
var lexFlags = struct {
source *string
output *string
breakOnError *bool
}{}
func init() {
cmd := &cobra.Command{
Use: "lex clexspec",
Short: "Tokenize a text stream",
Long: `lex takes a text stream and tokenizes it according to a compiled lexical specification.
As use ` + "`maleeni compile`" + `, you can generate the specification.
Note that passive mode transitions are not performed. Thus, if there is a mode in
your lexical specification that is set passively, lexemes in that mode will not be recognized.`,
Example: ` cat src | maleeni lex clexspec.json`,
Args: cobra.ExactArgs(1),
RunE: runLex,
}
lexFlags.source = cmd.Flags().StringP("source", "s", "", "source file path (default stdin)")
lexFlags.output = cmd.Flags().StringP("output", "o", "", "output file path (default stdout)")
lexFlags.breakOnError = cmd.Flags().BoolP("break-on-error", "b", false, "break lexical analysis with exit status 1 immediately when an error token appears.")
rootCmd.AddCommand(cmd)
}
func runLex(cmd *cobra.Command, args []string) (retErr error) {
clspec, err := readCompiledLexSpec(args[0])
if err != nil {
return fmt.Errorf("Cannot read a compiled lexical specification: %w", err)
}
var lex *driver.Lexer
{
src := os.Stdin
if *lexFlags.source != "" {
f, err := os.Open(*lexFlags.source)
if err != nil {
return fmt.Errorf("Cannot open the source file %s: %w", *lexFlags.source, err)
}
defer f.Close()
src = f
}
lex, err = driver.NewLexer(driver.NewLexSpec(clspec), src)
if err != nil {
return err
}
}
w := os.Stdout
if *lexFlags.output != "" {
f, err := os.OpenFile(*lexFlags.output, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
return fmt.Errorf("Cannot open the output file %s: %w", *lexFlags.output, err)
}
defer f.Close()
w = f
}
for {
tok, err := lex.Next()
if err != nil {
return err
}
data, err := json.Marshal(tok)
if err != nil {
return fmt.Errorf("failed to marshal a token; token: %v, error: %v\n", tok, err)
}
if tok.Invalid && *lexFlags.breakOnError {
return fmt.Errorf("detected an error token: %v", string(data))
}
fmt.Fprintf(w, "%v\n", string(data))
if tok.EOF {
break
}
}
return nil
}
func readCompiledLexSpec(path string) (*spec.CompiledLexSpec, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
data, err := ioutil.ReadAll(f)
if err != nil {
return nil, err
}
clspec := &spec.CompiledLexSpec{}
err = json.Unmarshal(data, clspec)
if err != nil {
return nil, err
}
return clspec, nil
}
|