aboutsummaryrefslogtreecommitdiff
path: root/cmd
diff options
context:
space:
mode:
Diffstat (limited to 'cmd')
-rw-r--r--cmd/generator/main.go70
-rw-r--r--cmd/maleeni/compile.go115
-rw-r--r--cmd/maleeni/lex.go130
-rw-r--r--cmd/maleeni/main.go12
-rw-r--r--cmd/maleeni/root.go28
5 files changed, 355 insertions, 0 deletions
diff --git a/cmd/generator/main.go b/cmd/generator/main.go
new file mode 100644
index 0000000..3edcef5
--- /dev/null
+++ b/cmd/generator/main.go
@@ -0,0 +1,70 @@
+package main
+
+import (
+ "fmt"
+ "net/http"
+ "os"
+ "strings"
+ "text/template"
+
+ "github.com/nihei9/maleeni/ucd"
+)
+
+func main() {
+ err := gen()
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "%v\n", err)
+ os.Exit(1)
+ }
+}
+
+func gen() error {
+ var propValAliases *ucd.PropertyValueAliases
+ {
+ resp, err := http.Get("https://www.unicode.org/Public/13.0.0/ucd/PropertyValueAliases.txt")
+ if err != nil {
+ return err
+ }
+ defer resp.Body.Close()
+ propValAliases, err = ucd.ParsePropertyValueAliases(resp.Body)
+ if err != nil {
+ return err
+ }
+ }
+ var unicodeData *ucd.UnicodeData
+ {
+ resp, err := http.Get("https://www.unicode.org/Public/13.0.0/ucd/UnicodeData.txt")
+ if err != nil {
+ return err
+ }
+ defer resp.Body.Close()
+ unicodeData, err = ucd.ParseUnicodeData(resp.Body, propValAliases)
+ if err != nil {
+ return err
+ }
+ }
+ tmpl, err := template.ParseFiles("../compiler/ucd_table.go.tmpl")
+ if err != nil {
+ return err
+ }
+ var b strings.Builder
+ err = tmpl.Execute(&b, struct {
+ GeneratorName string
+ UnicodeData *ucd.UnicodeData
+ PropertyValueAliases *ucd.PropertyValueAliases
+ }{
+ GeneratorName: "generator/main.go",
+ UnicodeData: unicodeData,
+ PropertyValueAliases: propValAliases,
+ })
+ if err != nil {
+ return err
+ }
+ f, err := os.OpenFile("../compiler/ucd_table.go", os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ fmt.Fprintf(f, b.String())
+ return nil
+}
diff --git a/cmd/maleeni/compile.go b/cmd/maleeni/compile.go
new file mode 100644
index 0000000..6a5e303
--- /dev/null
+++ b/cmd/maleeni/compile.go
@@ -0,0 +1,115 @@
+package main
+
+import (
+ "encoding/json"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "time"
+
+ "github.com/nihei9/maleeni/compiler"
+ "github.com/nihei9/maleeni/spec"
+ "github.com/spf13/cobra"
+)
+
+var compileFlags = struct {
+ debug *bool
+ lexSpec *string
+ output *string
+}{}
+
+func init() {
+ cmd := &cobra.Command{
+ Use: "compile",
+ Short: "Compile a lexical specification into a DFA",
+ Long: `compile takes a lexical specification and generates a DFA accepting the tokens described in the specification.`,
+ Example: ` cat lexspec.json | maleeni compile > clexspec.json`,
+ RunE: runCompile,
+ }
+ compileFlags.debug = cmd.Flags().BoolP("debug", "d", false, "enable logging")
+ compileFlags.lexSpec = cmd.Flags().StringP("lex-spec", "l", "", "lexical specification file path (default: stdin)")
+ compileFlags.output = cmd.Flags().StringP("output", "o", "", "output file path (default: stdout)")
+ rootCmd.AddCommand(cmd)
+}
+
+func runCompile(cmd *cobra.Command, args []string) (retErr error) {
+ lspec, err := readLexSpec(*compileFlags.lexSpec)
+ if err != nil {
+ return fmt.Errorf("Cannot read a lexical specification: %w", err)
+ }
+
+ var opts []compiler.CompilerOption
+ if *compileFlags.debug {
+ fileName := "maleeni-compile.log"
+ f, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+ if err != nil {
+ return fmt.Errorf("Cannot open the log file %s: %w", fileName, err)
+ }
+ defer f.Close()
+ fmt.Fprintf(f, `maleeni compile starts.
+Date time: %v
+---
+`, time.Now().Format(time.RFC3339))
+ defer func() {
+ fmt.Fprintf(f, "---\n")
+ if retErr != nil {
+ fmt.Fprintf(f, "maleeni compile failed: %v\n", retErr)
+ } else {
+ fmt.Fprintf(f, "maleeni compile succeeded.\n")
+ }
+ }()
+
+ opts = append(opts, compiler.EnableLogging(f))
+ }
+
+ clspec, err := compiler.Compile(lspec, opts...)
+ if err != nil {
+ return err
+ }
+ err = writeCompiledLexSpec(clspec, *compileFlags.output)
+ if err != nil {
+ return fmt.Errorf("Cannot write a compiled lexical specification: %w", err)
+ }
+
+ return nil
+}
+
+func readLexSpec(path string) (*spec.LexSpec, error) {
+ r := os.Stdin
+ if path != "" {
+ f, err := os.Open(path)
+ if err != nil {
+ return nil, fmt.Errorf("Cannot open the lexical specification file %s: %w", path, err)
+ }
+ defer f.Close()
+ r = f
+ }
+ data, err := ioutil.ReadAll(r)
+ if err != nil {
+ return nil, err
+ }
+ lspec := &spec.LexSpec{}
+ err = json.Unmarshal(data, lspec)
+ if err != nil {
+ return nil, err
+ }
+ return lspec, nil
+}
+
+func writeCompiledLexSpec(clspec *spec.CompiledLexSpec, path string) error {
+ out, err := json.Marshal(clspec)
+ if err != nil {
+ return err
+ }
+ w := os.Stdout
+ if path != "" {
+ f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+ if err != nil {
+ return fmt.Errorf("Cannot open the output file %s: %w", path, err)
+ }
+ defer f.Close()
+ w = f
+ }
+ fmt.Fprintf(w, "%v\n", string(out))
+ return nil
+}
diff --git a/cmd/maleeni/lex.go b/cmd/maleeni/lex.go
new file mode 100644
index 0000000..c111370
--- /dev/null
+++ b/cmd/maleeni/lex.go
@@ -0,0 +1,130 @@
+package main
+
+import (
+ "encoding/json"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "time"
+
+ "github.com/nihei9/maleeni/driver"
+ "github.com/nihei9/maleeni/spec"
+ "github.com/spf13/cobra"
+)
+
+var lexFlags = struct {
+ debug *bool
+ source *string
+ output *string
+ breakOnError *bool
+}{}
+
+func init() {
+ cmd := &cobra.Command{
+ Use: "lex clexspec",
+ Short: "Tokenize a text stream",
+ Long: `lex takes a text stream and tokenizes it according to a compiled lexical specification.
+As use ` + "`maleeni compile`" + `, you can generate the specification.`,
+ Example: ` cat src | maleeni lex clexspec.json`,
+ Args: cobra.ExactArgs(1),
+ RunE: runLex,
+ }
+ lexFlags.debug = cmd.Flags().BoolP("debug", "d", false, "enable logging")
+ lexFlags.source = cmd.Flags().StringP("source", "s", "", "source file path (default: stdin)")
+ lexFlags.output = cmd.Flags().StringP("output", "o", "", "output file path (default: stdout)")
+ lexFlags.breakOnError = cmd.Flags().BoolP("break-on-error", "b", false, "break lexical analysis with exit status 1 immediately when an error token appears.")
+ rootCmd.AddCommand(cmd)
+}
+
+func runLex(cmd *cobra.Command, args []string) (retErr error) {
+ clspec, err := readCompiledLexSpec(args[0])
+ if err != nil {
+ return fmt.Errorf("Cannot read a compiled lexical specification: %w", err)
+ }
+
+ var opts []driver.LexerOption
+ if *lexFlags.debug {
+ fileName := "maleeni-lex.log"
+ f, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+ if err != nil {
+ return fmt.Errorf("Cannot open the log file %s: %w", fileName, err)
+ }
+ defer f.Close()
+ fmt.Fprintf(f, `maleeni lex starts.
+Date time: %v
+---
+`, time.Now().Format(time.RFC3339))
+ defer func() {
+ fmt.Fprintf(f, "---\n")
+ if retErr != nil {
+ fmt.Fprintf(f, "maleeni lex failed: %v\n", retErr)
+ } else {
+ fmt.Fprintf(f, "maleeni lex succeeded.\n")
+ }
+ }()
+
+ opts = append(opts, driver.EnableLogging(f))
+ }
+
+ var lex *driver.Lexer
+ {
+ src := os.Stdin
+ if *lexFlags.source != "" {
+ f, err := os.Open(*lexFlags.source)
+ if err != nil {
+ return fmt.Errorf("Cannot open the source file %s: %w", *lexFlags.source, err)
+ }
+ defer f.Close()
+ src = f
+ }
+ lex, err = driver.NewLexer(clspec, src, opts...)
+ if err != nil {
+ return err
+ }
+ }
+ w := os.Stdout
+ if *lexFlags.output != "" {
+ f, err := os.OpenFile(*lexFlags.output, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+ if err != nil {
+ return fmt.Errorf("Cannot open the output file %s: %w", *lexFlags.output, err)
+ }
+ defer f.Close()
+ w = f
+ }
+ for {
+ tok, err := lex.Next()
+ if err != nil {
+ return err
+ }
+ data, err := json.Marshal(tok)
+ if err != nil {
+ return fmt.Errorf("failed to marshal a token; token: %v, error: %v\n", tok, err)
+ }
+ if tok.Invalid && *lexFlags.breakOnError {
+ return fmt.Errorf("detected an error token: %v", string(data))
+ }
+ fmt.Fprintf(w, "%v\n", string(data))
+ if tok.EOF {
+ break
+ }
+ }
+
+ return nil
+}
+
+func readCompiledLexSpec(path string) (*spec.CompiledLexSpec, error) {
+ f, err := os.Open(path)
+ if err != nil {
+ return nil, err
+ }
+ data, err := ioutil.ReadAll(f)
+ if err != nil {
+ return nil, err
+ }
+ clspec := &spec.CompiledLexSpec{}
+ err = json.Unmarshal(data, clspec)
+ if err != nil {
+ return nil, err
+ }
+ return clspec, nil
+}
diff --git a/cmd/maleeni/main.go b/cmd/maleeni/main.go
new file mode 100644
index 0000000..701f02f
--- /dev/null
+++ b/cmd/maleeni/main.go
@@ -0,0 +1,12 @@
+package main
+
+import (
+ "os"
+)
+
+func main() {
+ err := Execute()
+ if err != nil {
+ os.Exit(1)
+ }
+}
diff --git a/cmd/maleeni/root.go b/cmd/maleeni/root.go
new file mode 100644
index 0000000..41c3081
--- /dev/null
+++ b/cmd/maleeni/root.go
@@ -0,0 +1,28 @@
+package main
+
+import (
+ "fmt"
+ "os"
+
+ "github.com/spf13/cobra"
+)
+
+var rootCmd = &cobra.Command{
+ Use: "maleeni",
+ Short: "Generate a portable DFA from a lexical specification",
+ Long: `maleeni provides two features:
+* Generates a portable DFA from a lexical specification.
+* Tokenizes a text stream according to the lexical specification.
+ This feature is primarily aimed at debugging the lexical specification.`,
+ SilenceErrors: true,
+ SilenceUsage: true,
+}
+
+func Execute() error {
+ err := rootCmd.Execute()
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "%v\n", err)
+ return err
+ }
+ return nil
+}