aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md26
-rw-r--r--cmd/maleeni-go/generate.go29
-rw-r--r--compiler/compiler.go1
-rw-r--r--compiler/compiler_test.go3
-rw-r--r--driver/lexer_test.go27
-rw-r--r--driver/template.go26
-rw-r--r--example/go.json1
-rw-r--r--spec/spec.go7
8 files changed, 97 insertions, 23 deletions
diff --git a/README.md b/README.md
index e73d865..e736091 100644
--- a/README.md
+++ b/README.md
@@ -26,6 +26,7 @@ First, define your lexical specification in JSON format. As an example, let's wr
```json
{
+ "name": "statement",
"entries": [
{
"kind": "whitespace",
@@ -43,14 +44,14 @@ First, define your lexical specification in JSON format. As an example, let's wr
}
```
-Save the above specification to a file in UTF-8. In this explanation, the file name is lexspec.json.
+Save the above specification to a file in UTF-8. In this explanation, the file name is `statement.json`.
### 2. Compile the lexical specification
Next, generate a DFA from the lexical specification using `maleeni compile` command.
```sh
-$ maleeni compile -l lexspec.json -o clexspec.json
+$ maleeni compile -l statement.json -o statementc.json
```
### 3. Debug (Optional)
@@ -60,7 +61,7 @@ If you want to make sure that the lexical specification behaves as expected, you
⚠️ An encoding that `maleeni lex` and the driver can handle is only UTF-8.
```sh
-$ echo -n 'The truth is out there.' | maleeni lex clexspec.json | jq -r '[.kind_name, .lexeme, .eof] | @csv'
+$ echo -n 'The truth is out there.' | maleeni lex statementc.json | jq -r '[.kind_name, .lexeme, .eof] | @csv'
"word","The",false
"whitespace"," ",false
"word","truth",false
@@ -94,10 +95,10 @@ The JSON format of tokens that `maleeni lex` command prints is as follows:
Using `maleeni-go` command, you can generate a source code of the lexer to recognize your lexical specification.
```sh
-$ maleeni-go clexspec.json > lexer.go
+$ maleeni-go statementc.json
```
-The above command generates the lexer and saves it to `lexer.go` file. To use the lexer, you need to call `NewLexer` function defined in `lexer.go`. The following code is a simple example. In this example, the lexer reads a source code from stdin and writes the result, tokens, to stdout.
+The above command generates the lexer and saves it to `statement_lexer.go` file. By default, the file name will be `{spec name}_lexer.json`. To use the lexer, you need to call `NewLexer` function defined in `statement_lexer.go`. The following code is a simple example. In this example, the lexer reads a source code from stdin and writes the result, tokens, to stdout.
```go
package main
@@ -136,14 +137,14 @@ Please save the above source code to `main.go` and create a directory structure
```
/project_root
-├── lexer.go ... Lexer generated from the compiled lexical specification (the result of `maleeni-go`).
-└── main.go .... Caller of the lexer.
+├── statement_lexer.go ... Lexer generated from the compiled lexical specification (the result of `maleeni-go`).
+└── main.go .............. Caller of the lexer.
```
Now, you can perform the lexical analysis.
```sh
-$ echo -n 'I want to believe.' | go run main.go lexer.go
+$ echo -n 'I want to believe.' | go run main.go statement_lexer.go
valid: word: 'I'
valid: whitespace: ' '
valid: word: 'want'
@@ -164,8 +165,9 @@ The lexical specification format to be passed to `maleeni compile` command is as
top level object:
-| Field | Type | Nullable | Description |
-|---------|------------------------|----------|-----------------------------------------------------------------------------------------------------------------------|
+| Field | Type | Nullable | Description |
+|---------|------------------------|----------|---------------------------------------------------------------------------------------------------------------------------|
+| name | string | false | A specification name. |
| entries | array of entry objects | false | An array of entries sorted by priority. The first element has the highest priority, and the last has the lowest priority. |
entry object:
@@ -292,6 +294,7 @@ For instance, you can define [an identifier of golang](https://golang.org/ref/sp
```json
{
+ "name": "id",
"entries": [
{
"fragment": true,
@@ -326,6 +329,7 @@ For instance, you can define a subset of [the string literal of golang](https://
```json
{
+ "name": "string",
"entries": [
{
"kind": "string_open",
@@ -369,7 +373,7 @@ For instance, you can define a subset of [the string literal of golang](https://
In the above specification, when the `"` mark appears in default mode (it's the initial mode), the driver transitions to the `string` mode and interprets character sequences (`char_seq`) and escape sequences (`escaped_char`). When the `"` mark appears the next time, the driver returns to the `default` mode.
```sh
-$ echo -n '"foo\nbar"foo' | maleeni lex go-string-cspec.json | jq -r '[.mode_name, .kind_name, .lexeme, .eof] | @csv'
+$ echo -n '"foo\nbar"foo' | maleeni lex stringc.json | jq -r '[.mode_name, .kind_name, .lexeme, .eof] | @csv'
"default","string_open","""",false
"string","char_seq","foo",false
"string","escaped_char","\n",false
diff --git a/cmd/maleeni-go/generate.go b/cmd/maleeni-go/generate.go
index d37defd..d31daed 100644
--- a/cmd/maleeni-go/generate.go
+++ b/cmd/maleeni-go/generate.go
@@ -24,13 +24,14 @@ func Execute() error {
var generateFlags = struct {
pkgName *string
+ output *string
}{}
var generateCmd = &cobra.Command{
Use: "maleeni-go",
Short: "Generate a lexer for Go",
Long: `maleeni-go generates a lexer for Go. The lexer recognizes the lexical specification specified as the argument.`,
- Example: ` maleeni-go clexspec.json > lexer.go`,
+ Example: ` maleeni-go clexspec.json`,
Args: cobra.ExactArgs(1),
RunE: runGenerate,
SilenceErrors: true,
@@ -39,6 +40,7 @@ var generateCmd = &cobra.Command{
func init() {
generateFlags.pkgName = generateCmd.Flags().StringP("package", "p", "main", "package name")
+ generateFlags.output = generateCmd.Flags().StringP("output", "o", "", "output file path")
}
func runGenerate(cmd *cobra.Command, args []string) (retErr error) {
@@ -47,7 +49,30 @@ func runGenerate(cmd *cobra.Command, args []string) (retErr error) {
return fmt.Errorf("Cannot read a compiled lexical specification: %w", err)
}
- return driver.GenLexer(clspec, *generateFlags.pkgName)
+ b, err := driver.GenLexer(clspec, *generateFlags.pkgName)
+ if err != nil {
+ return fmt.Errorf("Failed to generate a lexer: %v", err)
+ }
+
+ var filePath string
+ if *generateFlags.output != "" {
+ filePath = *generateFlags.output
+ } else {
+ filePath = fmt.Sprintf("%v_lexer.go", clspec.Name)
+ }
+
+ f, err := os.OpenFile(filePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+ if err != nil {
+ return fmt.Errorf("Failed to create an output file: %v", err)
+ }
+ defer f.Close()
+
+ _, err = f.Write(b)
+ if err != nil {
+ return fmt.Errorf("Failed to write lexer source code: %v", err)
+ }
+
+ return nil
}
func readCompiledLexSpec(path string) (*spec.CompiledLexSpec, error) {
diff --git a/compiler/compiler.go b/compiler/compiler.go
index 5d1a1d5..0c89737 100644
--- a/compiler/compiler.go
+++ b/compiler/compiler.go
@@ -106,6 +106,7 @@ func Compile(lexspec *spec.LexSpec, opts ...CompilerOption) (*spec.CompiledLexSp
}
return &spec.CompiledLexSpec{
+ Name: lexspec.Name,
InitialModeID: spec.LexModeIDDefault,
ModeNames: modeNames,
KindNames: kindNames,
diff --git a/compiler/compiler_test.go b/compiler/compiler_test.go
index c76bb24..456920f 100644
--- a/compiler/compiler_test.go
+++ b/compiler/compiler_test.go
@@ -18,6 +18,7 @@ func TestCompile(t *testing.T) {
Caption: "allow duplicates names between fragments and non-fragments",
Spec: `
{
+ "name": "test",
"entries": [
{
"kind": "a2z",
@@ -36,6 +37,7 @@ func TestCompile(t *testing.T) {
Caption: "don't allow duplicates names in non-fragments",
Spec: `
{
+ "name": "test",
"entries": [
{
"kind": "a2z",
@@ -54,6 +56,7 @@ func TestCompile(t *testing.T) {
Caption: "don't allow duplicates names in fragments",
Spec: `
{
+ "name": "test",
"entries": [
{
"kind": "a2z",
diff --git a/driver/lexer_test.go b/driver/lexer_test.go
index a742bad..8af3817 100644
--- a/driver/lexer_test.go
+++ b/driver/lexer_test.go
@@ -103,6 +103,7 @@ func TestLexer_Next(t *testing.T) {
}{
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
newLexEntryDefaultNOP("t1", "(a|b)*abb"),
newLexEntryDefaultNOP("t2", " +"),
@@ -126,6 +127,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
newLexEntryDefaultNOP("t1", "b?a+"),
newLexEntryDefaultNOP("t2", "(ab)?(cd)+"),
@@ -154,6 +156,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
newLexEntryDefaultNOP("t1", "."),
},
@@ -198,6 +201,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
newLexEntryDefaultNOP("t1", "[ab.*+?|()[\\]]"),
},
@@ -220,6 +224,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
// all 1 byte characters except null character (U+0000)
//
@@ -246,6 +251,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
// all 2 byte characters
newLexEntryDefaultNOP("char2Byte", "[\xc2\x80-\xdf\xbf]"),
@@ -267,6 +273,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
// All bytes are the same.
newLexEntryDefaultNOP("char3Byte", "[\xe0\xa0\x80-\xe0\xa0\x80]"),
@@ -282,6 +289,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
// The first two bytes are the same.
newLexEntryDefaultNOP("char3Byte", "[\xe0\xa0\x80-\xe0\xa0\xbf]"),
@@ -303,6 +311,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
// The first byte are the same.
newLexEntryDefaultNOP("char3Byte", "[\xe0\xa0\x80-\xe0\xbf\xbf]"),
@@ -324,6 +333,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
// all 3 byte characters
newLexEntryDefaultNOP("char3Byte", "[\xe0\xa0\x80-\xef\xbf\xbf]"),
@@ -369,6 +379,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
// All bytes are the same.
newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf0\x90\x80\x80]"),
@@ -384,6 +395,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
// The first 3 bytes are the same.
newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf0\x90\x80\xbf]"),
@@ -405,6 +417,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
// The first 2 bytes are the same.
newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf0\x90\xbf\xbf]"),
@@ -426,6 +439,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
// The first byte are the same.
newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf0\xbf\xbf\xbf]"),
@@ -447,6 +461,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
// all 4 byte characters
newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf4\x8f\xbf\xbf]"),
@@ -484,6 +499,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
newLexEntryDefaultNOP("NonNumber", "[^0-9]+[0-9]"),
},
@@ -496,6 +512,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
newLexEntryDefaultNOP("char1Byte", "\\u{006E}"),
newLexEntryDefaultNOP("char2Byte", "\\u{03BD}"),
@@ -514,6 +531,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
newLexEntryDefaultNOP("codePointsAlt", "[\\u{006E}\\u{03BD}\\u{306B}\\u{01F638}]"),
},
@@ -529,6 +547,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
newLexEntryDefaultNOP("t1", "\\f{a2c}\\f{d2f}+"),
newLexEntryFragment("a2c", "abc"),
@@ -544,6 +563,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
newLexEntryDefaultNOP("t1", "(\\f{a2c}|\\f{d2f})+"),
newLexEntryFragment("a2c", "abc"),
@@ -558,6 +578,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
newLexEntryDefaultNOP("t1", "\\f{a2c_or_d2f}+"),
newLexEntryFragment("a2c_or_d2f", "\\f{a2c}|\\f{d2f}"),
@@ -573,6 +594,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
newLexEntryDefaultNOP("white_space", ` *`),
newLexEntry([]string{"default"}, "string_open", `"`, "string", false),
@@ -598,6 +620,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
// `white_space` is enabled in multiple modes.
newLexEntry([]string{"default", "state_a", "state_b"}, "white_space", ` *`, "", false),
@@ -623,6 +646,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
newLexEntry([]string{"default", "mode_1", "mode_2"}, "white_space", ` *`, "", false),
newLexEntry([]string{"default"}, "char", `.`, "", false),
@@ -671,6 +695,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
newLexEntry([]string{"default", "mode_1", "mode_2"}, "white_space", ` *`, "", false),
newLexEntry([]string{"default"}, "char", `.`, "", false),
@@ -710,6 +735,7 @@ func TestLexer_Next(t *testing.T) {
},
{
lspec: &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
newLexEntryDefaultNOP("dot", spec.EscapePattern(`.`)),
newLexEntryDefaultNOP("star", spec.EscapePattern(`*`)),
@@ -778,6 +804,7 @@ func TestLexer_Next(t *testing.T) {
func TestLexer_Next_WithPosition(t *testing.T) {
lspec := &spec.LexSpec{
+ Name: "test",
Entries: []*spec.LexEntry{
newLexEntryDefaultNOP("newline", `\u{000A}+`),
newLexEntryDefaultNOP("any", `.`),
diff --git a/driver/template.go b/driver/template.go
index 2772135..d2772ae 100644
--- a/driver/template.go
+++ b/driver/template.go
@@ -1,13 +1,13 @@
package driver
import (
+ "bytes"
_ "embed"
"fmt"
"go/ast"
"go/format"
"go/parser"
"go/token"
- "os"
"strings"
"text/template"
@@ -17,19 +17,19 @@ import (
//go:embed lexer.go
var lexerCoreSrc string
-func GenLexer(clspec *spec.CompiledLexSpec, pkgName string) error {
+func GenLexer(clspec *spec.CompiledLexSpec, pkgName string) ([]byte, error) {
var lexerSrc string
{
fset := token.NewFileSet()
f, err := parser.ParseFile(fset, "lexer.go", lexerCoreSrc, parser.ParseComments)
if err != nil {
- return err
+ return nil, err
}
var b strings.Builder
err = format.Node(&b, fset, f)
if err != nil {
- return err
+ return nil, err
}
lexerSrc = b.String()
@@ -100,7 +100,7 @@ func GenLexer(clspec *spec.CompiledLexSpec, pkgName string) error {
{
t, err := template.New("").Funcs(genTemplateFuncs(clspec)).Parse(lexSpecTemplate)
if err != nil {
- return err
+ return nil, err
}
var b strings.Builder
@@ -112,7 +112,7 @@ func GenLexer(clspec *spec.CompiledLexSpec, pkgName string) error {
"compressionLevel": clspec.CompressionLevel,
})
if err != nil {
- return err
+ return nil, err
}
specSrc = b.String()
@@ -136,7 +136,7 @@ func GenLexer(clspec *spec.CompiledLexSpec, pkgName string) error {
t, err := template.New("").Parse(tmpl)
if err != nil {
- return err
+ return nil, err
}
var b strings.Builder
@@ -149,7 +149,7 @@ func GenLexer(clspec *spec.CompiledLexSpec, pkgName string) error {
"specSrc": specSrc,
})
if err != nil {
- return err
+ return nil, err
}
src = b.String()
@@ -158,12 +158,18 @@ func GenLexer(clspec *spec.CompiledLexSpec, pkgName string) error {
fset := token.NewFileSet()
f, err := parser.ParseFile(fset, "", src, parser.ParseComments)
if err != nil {
- return err
+ return nil, err
}
f.Name = ast.NewIdent(pkgName)
- return format.Node(os.Stdout, fset, f)
+ var b bytes.Buffer
+ err = format.Node(&b, fset, f)
+ if err != nil {
+ return nil, err
+ }
+
+ return b.Bytes(), nil
}
const lexSpecTemplate = `
diff --git a/example/go.json b/example/go.json
index 631313d..bf92717 100644
--- a/example/go.json
+++ b/example/go.json
@@ -1,4 +1,5 @@
{
+ "name": "go",
"entries": [
{
"kind": "line_comment_open",
diff --git a/spec/spec.go b/spec/spec.go
index 62acfc4..2360201 100644
--- a/spec/spec.go
+++ b/spec/spec.go
@@ -157,10 +157,16 @@ func (e *LexEntry) validate() error {
}
type LexSpec struct {
+ Name string `json:"name"`
Entries []*LexEntry `json:"entries"`
}
func (s *LexSpec) Validate() error {
+ err := validateIdentifier(s.Name)
+ if err != nil {
+ return fmt.Errorf("invalid specification name: %v", err)
+ }
+
if len(s.Entries) <= 0 {
return fmt.Errorf("the lexical specification must have at least one entry")
}
@@ -364,6 +370,7 @@ type CompiledLexModeSpec struct {
}
type CompiledLexSpec struct {
+ Name string `json:"name"`
InitialModeID LexModeID `json:"initial_mode_id"`
ModeNames []LexModeName `json:"mode_names"`
KindNames []LexKindName `json:"kind_names"`