diff options
-rw-r--r-- | cli/cmd/compile.go | 42 | ||||
-rw-r--r-- | cli/cmd/lex.go | 43 | ||||
-rw-r--r-- | compiler/compiler.go | 10 | ||||
-rw-r--r-- | driver/lexer.go | 2 | ||||
-rw-r--r-- | driver/lexer_test.go | 63 | ||||
-rw-r--r-- | spec/spec.go | 89 |
6 files changed, 174 insertions, 75 deletions
diff --git a/cli/cmd/compile.go b/cli/cmd/compile.go index 7815129..6ad64b6 100644 --- a/cli/cmd/compile.go +++ b/cli/cmd/compile.go @@ -25,23 +25,16 @@ func init() { } func runCompile(cmd *cobra.Command, args []string) (retErr error) { - var lspec *spec.LexSpec - { - data, err := ioutil.ReadAll(os.Stdin) - if err != nil { - return err - } - lspec = &spec.LexSpec{} - err = json.Unmarshal(data, lspec) - if err != nil { - return err - } + lspec, err := readLexSpec() + if err != nil { + return fmt.Errorf("Cannot read a lexical specification: %w", err) } var w io.Writer { - f, err := os.OpenFile("maleeni-compile.log", os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + fileName := "maleeni-compile.log" + f, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) if err != nil { - return err + return fmt.Errorf("Cannot open the log file %s: %w", fileName, err) } defer f.Close() w = f @@ -62,11 +55,32 @@ Date time: %v if err != nil { return err } + err = writeCompiledLexSpec(clspec) + if err != nil { + return fmt.Errorf("Cannot write a compiled lexical specification: %w", err) + } + + return nil +} + +func readLexSpec() (*spec.LexSpec, error) { + data, err := ioutil.ReadAll(os.Stdin) + if err != nil { + return nil, err + } + lspec := &spec.LexSpec{} + err = json.Unmarshal(data, lspec) + if err != nil { + return nil, err + } + return lspec, nil +} + +func writeCompiledLexSpec(clspec *spec.CompiledLexSpec) error { out, err := json.Marshal(clspec) if err != nil { return err } fmt.Fprintf(os.Stdout, "%v\n", string(out)) - return nil } diff --git a/cli/cmd/lex.go b/cli/cmd/lex.go index 14fbc01..2c0be27 100644 --- a/cli/cmd/lex.go +++ b/cli/cmd/lex.go @@ -27,28 +27,16 @@ As use ` + "`maleeni compile`" + `, you can generate the specification.`, } func runLex(cmd *cobra.Command, args []string) (retErr error) { - var clspec *spec.CompiledLexSpec - { - clspecPath := args[0] - f, err := os.Open(clspecPath) - if err != nil { - return err - } - data, err := ioutil.ReadAll(f) - if err != nil { - return err - } - clspec = &spec.CompiledLexSpec{} - err = json.Unmarshal(data, clspec) - if err != nil { - return err - } + clspec, err := readCompiledLexSpec(args[0]) + if err != nil { + return fmt.Errorf("Cannot read a compiled lexical specification: %w", err) } var w io.Writer { - f, err := os.OpenFile("maleeni-lex.log", os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + fileName := "maleeni-lex.log" + f, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) if err != nil { - return err + return fmt.Errorf("Cannot open the log file %s: %w", fileName, err) } defer f.Close() w = f @@ -76,7 +64,7 @@ Date time: %v } data, err := json.Marshal(tok) if err != nil { - fmt.Fprintf(os.Stderr, "failed to marshal a token; token: %v, error: %v\n", tok, err) + return fmt.Errorf("failed to marshal a token; token: %v, error: %v\n", tok, err) } fmt.Fprintf(os.Stdout, "%v\n", string(data)) if tok.EOF { @@ -86,3 +74,20 @@ Date time: %v return nil } + +func readCompiledLexSpec(path string) (*spec.CompiledLexSpec, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + data, err := ioutil.ReadAll(f) + if err != nil { + return nil, err + } + clspec := &spec.CompiledLexSpec{} + err = json.Unmarshal(data, clspec) + if err != nil { + return nil, err + } + return clspec, nil +} diff --git a/compiler/compiler.go b/compiler/compiler.go index 02cda43..15f42f3 100644 --- a/compiler/compiler.go +++ b/compiler/compiler.go @@ -1,6 +1,7 @@ package compiler import ( + "fmt" "io" "strings" @@ -26,6 +27,11 @@ type compilerConfig struct { } func Compile(lexspec *spec.LexSpec, opts ...compilerOption) (*spec.CompiledLexSpec, error) { + err := lexspec.Validate() + if err != nil { + return nil, fmt.Errorf("invalid lexical specification:\n%w", err) + } + config := &compilerConfig{ logger: log.NewNopLogger(), } @@ -36,10 +42,10 @@ func Compile(lexspec *spec.LexSpec, opts ...compilerOption) (*spec.CompiledLexSp } } - var kinds []string + var kinds []spec.LexKind var patterns map[int][]byte { - kinds = append(kinds, "") + kinds = append(kinds, spec.LexKindNil) patterns = map[int][]byte{} for i, e := range lexspec.Entries { kinds = append(kinds, e.Kind) diff --git a/driver/lexer.go b/driver/lexer.go index 4a3c3cc..356b168 100644 --- a/driver/lexer.go +++ b/driver/lexer.go @@ -245,7 +245,7 @@ func (l *lexer) next() (*Token, error) { state = nextState id, ok := l.clspec.DFA.AcceptingStates[state] if ok { - tok = newToken(id, l.clspec.Kinds[id], newByteSequence(buf)) + tok = newToken(id, l.clspec.Kinds[id].String(), newByteSequence(buf)) unfixedBufLen = 0 } } diff --git a/driver/lexer_test.go b/driver/lexer_test.go index 1f3841b..68830a5 100644 --- a/driver/lexer_test.go +++ b/driver/lexer_test.go @@ -10,6 +10,13 @@ import ( "github.com/nihei9/maleeni/spec" ) +func newLexEntry(kind string, pattern string) *spec.LexEntry { + return &spec.LexEntry{ + Kind: spec.LexKind(kind), + Pattern: spec.LexPattern(pattern), + } +} + func TestLexer_Next(t *testing.T) { test := []struct { lspec *spec.LexSpec @@ -19,8 +26,8 @@ func TestLexer_Next(t *testing.T) { { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ - spec.NewLexEntry("t1", "(a|b)*abb"), - spec.NewLexEntry("t2", " +"), + newLexEntry("t1", "(a|b)*abb"), + newLexEntry("t2", " +"), }, }, src: "abb aabb aaabb babb bbabb abbbabb", @@ -42,9 +49,9 @@ func TestLexer_Next(t *testing.T) { { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ - spec.NewLexEntry("t1", "b?a+"), - spec.NewLexEntry("t2", "(ab)?(cd)+"), - spec.NewLexEntry("t3", " +"), + newLexEntry("t1", "b?a+"), + newLexEntry("t2", "(ab)?(cd)+"), + newLexEntry("t3", " +"), }, }, src: "ba baaa a aaa abcd abcdcdcd cd cdcdcd", @@ -70,7 +77,7 @@ func TestLexer_Next(t *testing.T) { { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ - spec.NewLexEntry("t1", "."), + newLexEntry("t1", "."), }, }, src: string([]byte{ @@ -114,7 +121,7 @@ func TestLexer_Next(t *testing.T) { { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ - spec.NewLexEntry("t1", "[ab.*+?|()[\\]]"), + newLexEntry("t1", "[ab.*+?|()[\\]]"), }, }, src: "ab.*+?|()[]", @@ -142,7 +149,7 @@ func TestLexer_Next(t *testing.T) { // maleeni cannot handle the null character in patterns because compiler.lexer, // specifically read() and restore(), recognizes the null characters as that a symbol doesn't exist. // There is room for improvement in this behavior of the lexer. - spec.NewLexEntry("1ByteChar", "[\x01-\x7f]"), + newLexEntry("1ByteChar", "[\x01-\x7f]"), }, }, src: string([]byte{ @@ -163,7 +170,7 @@ func TestLexer_Next(t *testing.T) { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ // all 2 byte characters - spec.NewLexEntry("2ByteChar", "[\xc2\x80-\xdf\xbf]"), + newLexEntry("2ByteChar", "[\xc2\x80-\xdf\xbf]"), }, }, src: string([]byte{ @@ -184,7 +191,7 @@ func TestLexer_Next(t *testing.T) { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ // All bytes are the same. - spec.NewLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xa0\x80]"), + newLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xa0\x80]"), }, }, src: string([]byte{ @@ -199,7 +206,7 @@ func TestLexer_Next(t *testing.T) { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ // The first two bytes are the same. - spec.NewLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xa0\xbf]"), + newLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xa0\xbf]"), }, }, src: string([]byte{ @@ -220,7 +227,7 @@ func TestLexer_Next(t *testing.T) { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ // The first byte are the same. - spec.NewLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xbf\xbf]"), + newLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xbf\xbf]"), }, }, src: string([]byte{ @@ -241,7 +248,7 @@ func TestLexer_Next(t *testing.T) { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ // all 3 byte characters - spec.NewLexEntry("3ByteChar", "[\xe0\xa0\x80-\xef\xbf\xbf]"), + newLexEntry("3ByteChar", "[\xe0\xa0\x80-\xef\xbf\xbf]"), }, }, src: string([]byte{ @@ -286,7 +293,7 @@ func TestLexer_Next(t *testing.T) { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ // All bytes are the same. - spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\x80\x80]"), + newLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\x80\x80]"), }, }, src: string([]byte{ @@ -301,7 +308,7 @@ func TestLexer_Next(t *testing.T) { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ // The first 3 bytes are the same. - spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\x80\xbf]"), + newLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\x80\xbf]"), }, }, src: string([]byte{ @@ -322,7 +329,7 @@ func TestLexer_Next(t *testing.T) { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ // The first 2 bytes are the same. - spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\xbf\xbf]"), + newLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\xbf\xbf]"), }, }, src: string([]byte{ @@ -343,7 +350,7 @@ func TestLexer_Next(t *testing.T) { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ // The first byte are the same. - spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\xbf\xbf\xbf]"), + newLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\xbf\xbf\xbf]"), }, }, src: string([]byte{ @@ -364,7 +371,7 @@ func TestLexer_Next(t *testing.T) { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ // all 4 byte characters - spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf4\x8f\xbf\xbf]"), + newLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf4\x8f\xbf\xbf]"), }, }, src: string([]byte{ @@ -400,7 +407,7 @@ func TestLexer_Next(t *testing.T) { { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ - spec.NewLexEntry("NonNumber", "[^0-9]+[0-9]"), + newLexEntry("NonNumber", "[^0-9]+[0-9]"), }, }, src: "foo9", @@ -439,8 +446,8 @@ func TestLexer_Next(t *testing.T) { func TestLexer_PeekN(t *testing.T) { clspec, err := compiler.Compile(&spec.LexSpec{ Entries: []*spec.LexEntry{ - spec.NewLexEntry("", "foo"), - spec.NewLexEntry("", "bar"), + newLexEntry("t1", "foo"), + newLexEntry("t2", "bar"), }, }) if err != nil { @@ -452,17 +459,9 @@ func TestLexer_PeekN(t *testing.T) { } expectedTokens := []*Token{ - { - ID: 1, - Match: newByteSequence([]byte("foo")), - }, - { - ID: 2, - Match: newByteSequence([]byte("bar")), - }, - { - EOF: true, - }, + newToken(1, "t1", []byte("foo")), + newToken(2, "t2", []byte("bar")), + newEOFToken(), } tok, err := lex.Peek1() diff --git a/spec/spec.go b/spec/spec.go index d827b68..0f9b484 100644 --- a/spec/spec.go +++ b/spec/spec.go @@ -1,21 +1,96 @@ package spec +import ( + "fmt" + "regexp" + "strings" +) + +const lexKindPattern = "[A-Za-z_][0-9A-Za-z_]*" + +var lexKindRE = regexp.MustCompile(lexKindPattern) + +type LexKind string + +const LexKindNil = LexKind("") + +func (k LexKind) String() string { + return string(k) +} + +func (k LexKind) validate() error { + if k == "" { + return fmt.Errorf("kind doesn't allow to be the empty string") + } + if !lexKindRE.Match([]byte(k)) { + return fmt.Errorf("kind must be %v", lexKindPattern) + } + return nil +} + +type LexPattern string + +func (p LexPattern) validate() error { + if p == "" { + return fmt.Errorf("pattern doesn't allow to be the empty string") + } + return nil +} + type LexEntry struct { - Kind string `json:"kind"` - Pattern string `json:"pattern"` + Kind LexKind `json:"kind"` + Pattern LexPattern `json:"pattern"` } -func NewLexEntry(kind string, pattern string) *LexEntry { - return &LexEntry{ - Kind: kind, - Pattern: pattern, +func (e *LexEntry) validate() error { + err := e.Kind.validate() + if err != nil { + return err + } + err = e.Pattern.validate() + if err != nil { + return err } + return nil } type LexSpec struct { Entries []*LexEntry `json:"entries"` } +func (s *LexSpec) Validate() error { + if len(s.Entries) <= 0 { + return fmt.Errorf("the lexical specification must have at least one entry") + } + { + var errs []error + for i, e := range s.Entries { + err := e.validate() + if err != nil { + errs = append(errs, fmt.Errorf("entry #%v: %w", i+1, err)) + } + } + if len(errs) > 0 { + var b strings.Builder + fmt.Fprintf(&b, "%v", errs[0]) + for _, err := range errs[1:] { + fmt.Fprintf(&b, "\n%v", err) + } + return fmt.Errorf(b.String()) + } + } + { + ks := map[string]struct{}{} + for _, e := range s.Entries { + if _, exist := ks[e.Kind.String()]; exist { + return fmt.Errorf("kinds `%v` are duplicates", e.Kind) + } + ks[e.Kind.String()] = struct{}{} + } + } + return nil +} + type TransitionTable struct { InitialState int `json:"initial_state"` AcceptingStates map[int]int `json:"accepting_states"` @@ -23,6 +98,6 @@ type TransitionTable struct { } type CompiledLexSpec struct { - Kinds []string `json:"kinds"` + Kinds []LexKind `json:"kinds"` DFA *TransitionTable `json:"dfa"` } |