aboutsummaryrefslogtreecommitdiff
path: root/cmd/maleeni/lex.go
blob: 37d7d71e532c360e29e1fd2fb3cc4e25c4b57244 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
package main

import (
	"encoding/json"
	"fmt"
	"io/ioutil"
	"os"

	"github.com/nihei9/maleeni/driver"
	"github.com/nihei9/maleeni/spec"
	"github.com/spf13/cobra"
)

var lexFlags = struct {
	source       *string
	output       *string
	breakOnError *bool
}{}

func init() {
	cmd := &cobra.Command{
		Use:   "lex clexspec",
		Short: "Tokenize a text stream",
		Long: `lex takes a text stream and tokenizes it according to a compiled lexical specification.
As use ` + "`maleeni compile`" + `, you can generate the specification.

Note that passive mode transitions are not performed. Thus, if there is a mode in
your lexical specification that is set passively, lexemes in that mode will not be recognized.`,
		Example: `  cat src | maleeni lex clexspec.json`,
		Args:    cobra.ExactArgs(1),
		RunE:    runLex,
	}
	lexFlags.source = cmd.Flags().StringP("source", "s", "", "source file path (default stdin)")
	lexFlags.output = cmd.Flags().StringP("output", "o", "", "output file path (default stdout)")
	lexFlags.breakOnError = cmd.Flags().BoolP("break-on-error", "b", false, "break lexical analysis with exit status 1 immediately when an error token appears.")
	rootCmd.AddCommand(cmd)
}

func runLex(cmd *cobra.Command, args []string) (retErr error) {
	clspec, err := readCompiledLexSpec(args[0])
	if err != nil {
		return fmt.Errorf("Cannot read a compiled lexical specification: %w", err)
	}

	var lex *driver.Lexer
	{
		src := os.Stdin
		if *lexFlags.source != "" {
			f, err := os.Open(*lexFlags.source)
			if err != nil {
				return fmt.Errorf("Cannot open the source file %s: %w", *lexFlags.source, err)
			}
			defer f.Close()
			src = f
		}
		lex, err = driver.NewLexer(driver.NewLexSpec(clspec), src)
		if err != nil {
			return err
		}
	}
	w := os.Stdout
	if *lexFlags.output != "" {
		f, err := os.OpenFile(*lexFlags.output, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
		if err != nil {
			return fmt.Errorf("Cannot open the output file %s: %w", *lexFlags.output, err)
		}
		defer f.Close()
		w = f
	}

	tok2JSON := genTokenJSONMarshaler(clspec)
	for {
		tok, err := lex.Next()
		if err != nil {
			return err
		}
		data, err := tok2JSON(tok)
		if err != nil {
			return fmt.Errorf("failed to marshal a token; token: %v, error: %v\n", tok, err)
		}
		if tok.Invalid && *lexFlags.breakOnError {
			return fmt.Errorf("detected an error token: %v", string(data))
		}
		fmt.Fprintf(w, "%v\n", string(data))
		if tok.EOF {
			break
		}
	}

	return nil
}

func readCompiledLexSpec(path string) (*spec.CompiledLexSpec, error) {
	f, err := os.Open(path)
	if err != nil {
		return nil, err
	}
	data, err := ioutil.ReadAll(f)
	if err != nil {
		return nil, err
	}
	clspec := &spec.CompiledLexSpec{}
	err = json.Unmarshal(data, clspec)
	if err != nil {
		return nil, err
	}
	return clspec, nil
}

func genTokenJSONMarshaler(clspec *spec.CompiledLexSpec) func(tok *driver.Token) ([]byte, error) {
	return func(tok *driver.Token) ([]byte, error) {
		return json.Marshal(struct {
			ModeID     int    `json:"mode_id"`
			ModeName   string `json:"mode_name"`
			KindID     int    `json:"kind_id"`
			ModeKindID int    `json:"mode_kind_id"`
			KindName   string `json:"kind_name"`
			Row        int    `json:"row"`
			Col        int    `json:"col"`
			Lexeme     string `json:"lexeme"`
			EOF        bool   `json:"eof"`
			Invalid    bool   `json:"invalid"`
		}{
			ModeID:     tok.ModeID.Int(),
			ModeName:   clspec.ModeNames[tok.ModeID].String(),
			KindID:     tok.KindID.Int(),
			ModeKindID: tok.ModeKindID.Int(),
			KindName:   clspec.KindNames[tok.KindID].String(),
			Row:        tok.Row,
			Col:        tok.Col,
			Lexeme:     string(tok.Lexeme),
			EOF:        tok.EOF,
			Invalid:    tok.Invalid,
		})
	}
}