diff options
Diffstat (limited to '')
-rw-r--r-- | README.md | 108 | ||||
-rw-r--r-- | cmd/maleeni-go/generate.go | 68 | ||||
-rw-r--r-- | cmd/maleeni-go/main.go | 12 | ||||
-rw-r--r-- | driver/template.go | 517 |
4 files changed, 668 insertions, 37 deletions
@@ -1,17 +1,27 @@ # maleeni -maleeni provides a command that generates a portable DFA for lexical analysis and a driver for golang. maleeni also provides a command to perform lexical analysis to allow easy debugging of your lexical specification. +maleeni is a lexer generator for golang. maleeni also provides a command to perform lexical analysis to allow easy debugging of your lexical specification. [](https://github.com/nihei9/maleeni/actions/workflows/test.yml) ## Installation +Compiler: + ```sh $ go install github.com/nihei9/maleeni/cmd/maleeni@latest ``` +Code Generator: + +```sh +$ go install github.com/nihei9/maleeni/cmd/maleeni-go@latest +``` + ## Usage +### 1. Define your lexical specification + First, define your lexical specification in JSON format. As an example, let's write the definitions of whitespace, words, and punctuation. ```json @@ -35,14 +45,17 @@ First, define your lexical specification in JSON format. As an example, let's wr Save the above specification to a file in UTF-8. In this explanation, the file name is lexspec.json. +### 2. Compile the lexical specification + Next, generate a DFA from the lexical specification using `maleeni compile` command. ```sh $ maleeni compile -l lexspec.json -o clexspec.json ``` -If you want to make sure that the lexical specification behaves as expected, you can use `maleeni lex` command to try lexical analysis without having to implement a driver. -`maleeni lex` command outputs tokens in JSON format. For simplicity, print significant fields of the tokens in CSV format using jq command. +### 3. Debug (Optional) + +If you want to make sure that the lexical specification behaves as expected, you can use `maleeni lex` command to try lexical analysis without having to generate a lexer. `maleeni lex` command outputs tokens in JSON format. For simplicity, print significant fields of the tokens in CSV format using jq command. ⚠️ An encoding that `maleeni lex` and the driver can handle is only UTF-8. @@ -76,50 +89,71 @@ The JSON format of tokens that `maleeni lex` command prints is as follows: | eof | bool | When this field is `true`, it means the token is the EOF token. | | invalid | bool | When this field is `true`, it means the token is an error token. | -When using the driver, please import `github.com/nihei9/maleeni/driver` and `github.com/nihei9/maleeni/spec` package. -You can use the driver easily in the following way: +### 4. Generate the lexer + +Using `maleeni-go` command, you can generate a source code of the lexer to recognize your lexical specification. + +```sh +$ maleeni-go clexspec.json > lexer.go +``` + +The above command generates the lexer and saves it to `lexer.go` file. To use the lexer, you need to call `NewLexer` function defined in `lexer.go`. The following code is a simple example. In this example, the lexer reads a source code from stdin and writes the result, tokens, to stdout. ```go -// Read your lexical specification file. -f, err := os.Open(path) -if err != nil { - // error handling -} -data, err := ioutil.ReadAll(f) -if err != nil { - // error handling -} -clexspec := &spec.CompiledLexSpec{} -err = json.Unmarshal(data, clexspec) -if err != nil { - // error handling -} +package main -// Generate a lexer. -lex, err := driver.NewLexer(clexspec, src) -if err != nil { - // error handling -} +import ( + "fmt" + "os" +) -// Perform lexical analysis. -for { - tok, err := lex.Next() +func main() { + lex, err := NewLexer(NewLexSpec(), os.Stdin) if err != nil { - // error handling - } - if tok.Invalid { - // An error token appeared. - // error handling - } - if tok.EOF { - // The EOF token appeared. - break + fmt.Fprintln(os.Stderr, err) + os.Exit(1) } - // Do something using `tok`. + for { + tok, err := lex.Next() + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + if tok.EOF { + break + } + if tok.Invalid { + fmt.Printf("invalid: '%v'\n", string(tok.Lexeme)) + } else { + fmt.Printf("valid: %v: '%v'\n", tok.KindName, string(tok.Lexeme)) + } + } } ``` +Please save the above source code to `main.go` and create a directory structure like the one below. + +``` +/project_root +├── lexer.go ... Lexer generated from the compiled lexical specification (the result of `maleeni-go`). +└── main.go .... Caller of the lexer. +``` + +Now, you can perform the lexical analysis. + +```sh +$ echo -n 'I want to believe.' | go run main.go lexer.go +valid: word: 'I' +valid: whitespace: ' ' +valid: word: 'want' +valid: whitespace: ' ' +valid: word: 'to' +valid: whitespace: ' ' +valid: word: 'believe' +valid: punctuation: '.' +``` + ## More Practical Usage See also [this example](example/README.md). diff --git a/cmd/maleeni-go/generate.go b/cmd/maleeni-go/generate.go new file mode 100644 index 0000000..d37defd --- /dev/null +++ b/cmd/maleeni-go/generate.go @@ -0,0 +1,68 @@ +package main + +import ( + _ "embed" + "encoding/json" + "fmt" + "io/ioutil" + "os" + + "github.com/nihei9/maleeni/driver" + "github.com/nihei9/maleeni/spec" + "github.com/spf13/cobra" +) + +func Execute() error { + err := generateCmd.Execute() + if err != nil { + fmt.Fprintf(os.Stderr, "%v\n", err) + return err + } + + return nil +} + +var generateFlags = struct { + pkgName *string +}{} + +var generateCmd = &cobra.Command{ + Use: "maleeni-go", + Short: "Generate a lexer for Go", + Long: `maleeni-go generates a lexer for Go. The lexer recognizes the lexical specification specified as the argument.`, + Example: ` maleeni-go clexspec.json > lexer.go`, + Args: cobra.ExactArgs(1), + RunE: runGenerate, + SilenceErrors: true, + SilenceUsage: true, +} + +func init() { + generateFlags.pkgName = generateCmd.Flags().StringP("package", "p", "main", "package name") +} + +func runGenerate(cmd *cobra.Command, args []string) (retErr error) { + clspec, err := readCompiledLexSpec(args[0]) + if err != nil { + return fmt.Errorf("Cannot read a compiled lexical specification: %w", err) + } + + return driver.GenLexer(clspec, *generateFlags.pkgName) +} + +func readCompiledLexSpec(path string) (*spec.CompiledLexSpec, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + data, err := ioutil.ReadAll(f) + if err != nil { + return nil, err + } + clspec := &spec.CompiledLexSpec{} + err = json.Unmarshal(data, clspec) + if err != nil { + return nil, err + } + return clspec, nil +} diff --git a/cmd/maleeni-go/main.go b/cmd/maleeni-go/main.go new file mode 100644 index 0000000..701f02f --- /dev/null +++ b/cmd/maleeni-go/main.go @@ -0,0 +1,12 @@ +package main + +import ( + "os" +) + +func main() { + err := Execute() + if err != nil { + os.Exit(1) + } +} diff --git a/driver/template.go b/driver/template.go new file mode 100644 index 0000000..f7caa75 --- /dev/null +++ b/driver/template.go @@ -0,0 +1,517 @@ +package driver + +import ( + _ "embed" + "fmt" + "go/ast" + "go/format" + "go/parser" + "go/token" + "os" + "strings" + "text/template" + + "github.com/nihei9/maleeni/spec" +) + +//go:embed lexer.go +var lexerCoreSrc string + +func GenLexer(clspec *spec.CompiledLexSpec, pkgName string) error { + var lexerSrc string + { + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, "lexer.go", lexerCoreSrc, parser.ParseComments) + if err != nil { + return err + } + + var b strings.Builder + err = format.Node(&b, fset, f) + if err != nil { + return err + } + + lexerSrc = b.String() + } + + var specSrc string + { + t, err := template.New("").Funcs(genTemplateFuncs(clspec)).Parse(lexSpecTemplate) + if err != nil { + return err + } + + var b strings.Builder + err = t.Execute(&b, map[string]interface{}{ + "initialModeID": clspec.InitialModeID, + "modeIDNil": spec.LexModeIDNil, + "modeKindIDNil": spec.LexModeKindIDNil, + "stateIDNil": spec.StateIDNil, + "compressionLevel": clspec.CompressionLevel, + }) + if err != nil { + return err + } + + specSrc = b.String() + } + + var src string + { + tmpl := `// Code generated by maleeni-go. DO NOT EDIT. +{{ .lexerSrc }} + +{{ .specSrc }} +` + + t, err := template.New("").Parse(tmpl) + if err != nil { + return err + } + + var b strings.Builder + err = t.Execute(&b, map[string]string{ + "lexerSrc": lexerSrc, + "specSrc": specSrc, + }) + if err != nil { + return err + } + + src = b.String() + } + + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, "", src, parser.ParseComments) + if err != nil { + return err + } + + f.Name = ast.NewIdent(pkgName) + + return format.Node(os.Stdout, fset, f) +} + +const lexSpecTemplate = ` +type lexSpec struct { + pop [][]bool + push [][]ModeID + modeNames []string + initialStates []StateID + acceptances [][]ModeKindID + kindIDs [][]KindID + kindNames []string + initialModeID ModeID + modeIDNil ModeID + modeKindIDNil ModeKindID + stateIDNil StateID + + rowNums [][]int + rowDisplacements [][]int + bounds [][]int + entries [][]StateID + originalColCounts []int +} + +func NewLexSpec() *lexSpec { + return &lexSpec{ + pop: {{ genPopTable }}, + push: {{ genPushTable }}, + modeNames: {{ genModeNameTable }}, + initialStates: {{ genInitialStateTable }}, + acceptances: {{ genAcceptTable }}, + kindIDs: {{ genKindIDTable }}, + kindNames: {{ genKindNameTable }}, + initialModeID: {{ .initialModeID }}, + modeIDNil: {{ .modeIDNil }}, + modeKindIDNil: {{ .modeKindIDNil }}, + stateIDNil: {{ .stateIDNil }}, + + rowNums: {{ genRowNums }}, + rowDisplacements: {{ genRowDisplacements }}, + bounds: {{ genBounds }}, + entries: {{ genEntries }}, + originalColCounts: {{ genOriginalColCounts }}, + } +} + +func (s *lexSpec) InitialMode() ModeID { + return s.initialModeID +} + +func (s *lexSpec) Pop(mode ModeID, modeKind ModeKindID) bool { + return s.pop[mode][modeKind] +} + +func (s *lexSpec) Push(mode ModeID, modeKind ModeKindID) (ModeID, bool) { + id := s.push[mode][modeKind] + return id, id != s.modeIDNil +} + +func (s *lexSpec) ModeName(mode ModeID) string { + return s.modeNames[mode] +} + +func (s *lexSpec) InitialState(mode ModeID) StateID { + return s.initialStates[mode] +} + +func (s *lexSpec) NextState(mode ModeID, state StateID, v int) (StateID, bool) { +{{ if eq .compressionLevel 2 -}} + rowNum := s.rowNums[mode][state] + d := s.rowDisplacements[mode][rowNum] + if s.bounds[mode][d+v] != rowNum { + return s.stateIDNil, false + } + return s.entries[mode][d+v], true +{{ else if eq .compressionLevel 1 -}} + rowNum := s.rowNums[mode][state] + colCount := s.originalColCounts[mode] + next := s.entries[mode][rowNum*colCount+v] + if next == s.stateIDNil { + return s.stateIDNil, false + } + return next, true +{{ else -}} + colCount := s.originalColCounts[mode] + next := s.entries[mode][int(state)*colCount+v] + if next == s.stateIDNil { + return s.stateIDNil, false + } + return next, true +{{ end -}} +} + +func (s *lexSpec) Accept(mode ModeID, state StateID) (ModeKindID, bool) { + id := s.acceptances[mode][state] + return id, id != s.modeKindIDNil +} + +func (s *lexSpec) KindIDAndName(mode ModeID, modeKind ModeKindID) (KindID, string) { + id := s.kindIDs[mode][modeKind] + return id, s.kindNames[id] +} +` + +func genTemplateFuncs(clspec *spec.CompiledLexSpec) template.FuncMap { + fns := template.FuncMap{ + "genPopTable": func() string { + var b strings.Builder + fmt.Fprintf(&b, "[][]bool{\n") + for i, s := range clspec.Specs { + if i == spec.LexModeIDNil.Int() { + fmt.Fprintf(&b, "nil,\n") + continue + } + + fmt.Fprintf(&b, "{") + fmt.Fprintf(&b, "%v", s.Pop[0] != 0) + for _, v := range s.Pop[1:] { + fmt.Fprintf(&b, ", %v", v != 0) + } + fmt.Fprintf(&b, "},\n") + } + fmt.Fprintf(&b, "}") + return b.String() + }, + "genPushTable": func() string { + var b strings.Builder + fmt.Fprintf(&b, "[][]ModeID{\n") + for i, s := range clspec.Specs { + if i == spec.LexModeIDNil.Int() { + fmt.Fprintf(&b, "nil,\n") + continue + } + + fmt.Fprintf(&b, "{") + fmt.Fprintf(&b, "%v", s.Push[0]) + for _, v := range s.Push[1:] { + fmt.Fprintf(&b, ", %v", v) + } + fmt.Fprintf(&b, "},\n") + } + fmt.Fprintf(&b, "}") + return b.String() + }, + "genModeNameTable": func() string { + var b strings.Builder + fmt.Fprintf(&b, "[]string{\n") + for i, name := range clspec.ModeNames { + if i == spec.LexModeIDNil.Int() { + fmt.Fprintf(&b, "%#v,\n", "") + continue + } + + fmt.Fprintf(&b, "%#v,\n", name) + } + fmt.Fprintf(&b, "}") + return b.String() + }, + "genInitialStateTable": func() string { + var b strings.Builder + fmt.Fprintf(&b, "[]StateID{\n") + for i, s := range clspec.Specs { + if i == spec.LexModeIDNil.Int() { + fmt.Fprintf(&b, "%v,\n", spec.StateIDNil) + continue + } + + fmt.Fprintf(&b, "%v,\n", s.DFA.InitialStateID) + } + fmt.Fprintf(&b, "}") + return b.String() + }, + "genAcceptTable": func() string { + var b strings.Builder + fmt.Fprintf(&b, "[][]ModeKindID{\n") + for i, s := range clspec.Specs { + if i == spec.LexModeIDNil.Int() { + fmt.Fprintf(&b, "nil,\n") + continue + } + + fmt.Fprintf(&b, "{") + fmt.Fprintf(&b, "%v", s.DFA.AcceptingStates[0]) + for _, v := range s.DFA.AcceptingStates[1:] { + fmt.Fprintf(&b, ", %v", v) + } + fmt.Fprintf(&b, "},\n") + } + fmt.Fprintf(&b, "}") + return b.String() + }, + "genKindIDTable": func() string { + var b strings.Builder + fmt.Fprintf(&b, "[][]KindID{\n") + for i, ids := range clspec.KindIDs { + if i == spec.LexModeIDNil.Int() { + fmt.Fprintf(&b, "nil,\n") + continue + } + + fmt.Fprintf(&b, "{") + fmt.Fprintf(&b, "%v", ids[0]) + for _, v := range ids[1:] { + fmt.Fprintf(&b, ", %v", v) + } + fmt.Fprintf(&b, "},\n") + } + fmt.Fprintf(&b, "}") + return b.String() + }, + "genKindNameTable": func() string { + var b strings.Builder + fmt.Fprintf(&b, "[]string{\n") + for i, name := range clspec.KindNames { + if i == spec.LexKindIDNil.Int() { + fmt.Fprintf(&b, "%#v,\n", "") + continue + } + + fmt.Fprintf(&b, "%#v,\n", name) + } + fmt.Fprintf(&b, "}") + return b.String() + }, + } + + switch clspec.CompressionLevel { + case 2: + fns["genRowNums"] = func() string { + var b strings.Builder + fmt.Fprintf(&b, "[][]int{\n") + for i, s := range clspec.Specs { + if i == spec.LexModeIDNil.Int() { + fmt.Fprintf(&b, "nil,\n") + continue + } + + fmt.Fprintf(&b, "{") + fmt.Fprintf(&b, "%v", s.DFA.Transition.RowNums[0]) + for _, v := range s.DFA.Transition.RowNums[1:] { + fmt.Fprintf(&b, ", %v", v) + } + fmt.Fprintf(&b, "},\n") + } + fmt.Fprintf(&b, "}") + return b.String() + } + + fns["genRowDisplacements"] = func() string { + var b strings.Builder + fmt.Fprintf(&b, "[][]int{\n") + for i, s := range clspec.Specs { + if i == spec.LexModeIDNil.Int() { + fmt.Fprintf(&b, "nil,\n") + continue + } + + fmt.Fprintf(&b, "{") + fmt.Fprintf(&b, "%v", s.DFA.Transition.UniqueEntries.RowDisplacement[0]) + for _, d := range s.DFA.Transition.UniqueEntries.RowDisplacement[1:] { + fmt.Fprintf(&b, ", %v", d) + } + fmt.Fprintf(&b, "},\n") + } + fmt.Fprintf(&b, "}") + return b.String() + } + + fns["genBounds"] = func() string { + var b strings.Builder + fmt.Fprintf(&b, "[][]int{\n") + for i, s := range clspec.Specs { + if i == spec.LexModeIDNil.Int() { + fmt.Fprintf(&b, "nil,\n") + continue + } + + fmt.Fprintf(&b, "{") + fmt.Fprintf(&b, "%v", s.DFA.Transition.UniqueEntries.Bounds[0]) + for _, v := range s.DFA.Transition.UniqueEntries.Bounds[1:] { + fmt.Fprintf(&b, ", %v", v) + } + fmt.Fprintf(&b, "},\n") + } + fmt.Fprintf(&b, "}") + return b.String() + } + + fns["genEntries"] = func() string { + var b strings.Builder + fmt.Fprintf(&b, "[][]StateID{\n") + for i, s := range clspec.Specs { + if i == spec.LexModeIDNil.Int() { + fmt.Fprintf(&b, "nil,\n") + continue + } + + fmt.Fprintf(&b, "{") + fmt.Fprintf(&b, "%v", s.DFA.Transition.UniqueEntries.Entries[0]) + for _, v := range s.DFA.Transition.UniqueEntries.Entries[1:] { + fmt.Fprintf(&b, ", %v", v) + } + fmt.Fprintf(&b, "},\n") + } + fmt.Fprintf(&b, "}") + return b.String() + } + + fns["genOriginalColCounts"] = func() string { + return "nil" + } + case 1: + fns["genRowNums"] = func() string { + var b strings.Builder + fmt.Fprintf(&b, "[][]int{\n") + for i, s := range clspec.Specs { + if i == spec.LexModeIDNil.Int() { + fmt.Fprintf(&b, "nil,\n") + continue + } + + fmt.Fprintf(&b, "{") + fmt.Fprintf(&b, "%v", s.DFA.Transition.RowNums[0]) + for _, v := range s.DFA.Transition.RowNums[1:] { + fmt.Fprintf(&b, ", %v", v) + } + fmt.Fprintf(&b, "},\n") + } + fmt.Fprintf(&b, "}") + return b.String() + } + + fns["genRowDisplacements"] = func() string { + return "nil" + } + + fns["genBounds"] = func() string { + return "nil" + } + + fns["genEntries"] = func() string { + var b strings.Builder + fmt.Fprintf(&b, "[][]StateID{\n") + for i, s := range clspec.Specs { + if i == spec.LexModeIDNil.Int() { + fmt.Fprintf(&b, "nil,\n") + continue + } + + fmt.Fprintf(&b, "{") + fmt.Fprintf(&b, "%v", s.DFA.Transition.UncompressedUniqueEntries[0]) + for _, v := range s.DFA.Transition.UncompressedUniqueEntries[1:] { + fmt.Fprintf(&b, ", %v", v) + } + fmt.Fprintf(&b, "},\n") + } + fmt.Fprintf(&b, "}") + return b.String() + } + + fns["genOriginalColCounts"] = func() string { + var b strings.Builder + fmt.Fprintf(&b, "[]int{\n") + for i, s := range clspec.Specs { + if i == spec.LexModeIDNil.Int() { + fmt.Fprintf(&b, "0,\n") + continue + } + + fmt.Fprintf(&b, "%v,\n", s.DFA.Transition.OriginalColCount) + } + fmt.Fprintf(&b, "}") + return b.String() + } + default: + fns["genRowNums"] = func() string { + return "nil" + } + + fns["genRowDisplacements"] = func() string { + return "nil" + } + + fns["genBounds"] = func() string { + return "nil" + } + + fns["genEntries"] = func() string { + var b strings.Builder + fmt.Fprintf(&b, "[][]StateID{\n") + for i, s := range clspec.Specs { + if i == spec.LexModeIDNil.Int() { + fmt.Fprintf(&b, "nil,\n") + continue + } + + fmt.Fprintf(&b, "{") + fmt.Fprintf(&b, "%v", s.DFA.UncompressedTransition[0]) + for _, v := range s.DFA.UncompressedTransition[1:] { + fmt.Fprintf(&b, ", %v", v) + } + fmt.Fprintf(&b, "},\n") + } + fmt.Fprintf(&b, "}") + return b.String() + } + + fns["genOriginalColCounts"] = func() string { + var b strings.Builder + fmt.Fprintf(&b, "[]int{\n") + for i, s := range clspec.Specs { + if i == spec.LexModeIDNil.Int() { + fmt.Fprintf(&b, "0,\n") + continue + } + + fmt.Fprintf(&b, "%v,\n", s.DFA.ColCount) + } + fmt.Fprintf(&b, "}") + return b.String() + } + } + + return fns +} |