aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cli/cmd/compile.go42
-rw-r--r--cli/cmd/lex.go43
-rw-r--r--compiler/compiler.go10
-rw-r--r--driver/lexer.go2
-rw-r--r--driver/lexer_test.go63
-rw-r--r--spec/spec.go89
6 files changed, 174 insertions, 75 deletions
diff --git a/cli/cmd/compile.go b/cli/cmd/compile.go
index 7815129..6ad64b6 100644
--- a/cli/cmd/compile.go
+++ b/cli/cmd/compile.go
@@ -25,23 +25,16 @@ func init() {
}
func runCompile(cmd *cobra.Command, args []string) (retErr error) {
- var lspec *spec.LexSpec
- {
- data, err := ioutil.ReadAll(os.Stdin)
- if err != nil {
- return err
- }
- lspec = &spec.LexSpec{}
- err = json.Unmarshal(data, lspec)
- if err != nil {
- return err
- }
+ lspec, err := readLexSpec()
+ if err != nil {
+ return fmt.Errorf("Cannot read a lexical specification: %w", err)
}
var w io.Writer
{
- f, err := os.OpenFile("maleeni-compile.log", os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+ fileName := "maleeni-compile.log"
+ f, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
- return err
+ return fmt.Errorf("Cannot open the log file %s: %w", fileName, err)
}
defer f.Close()
w = f
@@ -62,11 +55,32 @@ Date time: %v
if err != nil {
return err
}
+ err = writeCompiledLexSpec(clspec)
+ if err != nil {
+ return fmt.Errorf("Cannot write a compiled lexical specification: %w", err)
+ }
+
+ return nil
+}
+
+func readLexSpec() (*spec.LexSpec, error) {
+ data, err := ioutil.ReadAll(os.Stdin)
+ if err != nil {
+ return nil, err
+ }
+ lspec := &spec.LexSpec{}
+ err = json.Unmarshal(data, lspec)
+ if err != nil {
+ return nil, err
+ }
+ return lspec, nil
+}
+
+func writeCompiledLexSpec(clspec *spec.CompiledLexSpec) error {
out, err := json.Marshal(clspec)
if err != nil {
return err
}
fmt.Fprintf(os.Stdout, "%v\n", string(out))
-
return nil
}
diff --git a/cli/cmd/lex.go b/cli/cmd/lex.go
index 14fbc01..2c0be27 100644
--- a/cli/cmd/lex.go
+++ b/cli/cmd/lex.go
@@ -27,28 +27,16 @@ As use ` + "`maleeni compile`" + `, you can generate the specification.`,
}
func runLex(cmd *cobra.Command, args []string) (retErr error) {
- var clspec *spec.CompiledLexSpec
- {
- clspecPath := args[0]
- f, err := os.Open(clspecPath)
- if err != nil {
- return err
- }
- data, err := ioutil.ReadAll(f)
- if err != nil {
- return err
- }
- clspec = &spec.CompiledLexSpec{}
- err = json.Unmarshal(data, clspec)
- if err != nil {
- return err
- }
+ clspec, err := readCompiledLexSpec(args[0])
+ if err != nil {
+ return fmt.Errorf("Cannot read a compiled lexical specification: %w", err)
}
var w io.Writer
{
- f, err := os.OpenFile("maleeni-lex.log", os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+ fileName := "maleeni-lex.log"
+ f, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
- return err
+ return fmt.Errorf("Cannot open the log file %s: %w", fileName, err)
}
defer f.Close()
w = f
@@ -76,7 +64,7 @@ Date time: %v
}
data, err := json.Marshal(tok)
if err != nil {
- fmt.Fprintf(os.Stderr, "failed to marshal a token; token: %v, error: %v\n", tok, err)
+ return fmt.Errorf("failed to marshal a token; token: %v, error: %v\n", tok, err)
}
fmt.Fprintf(os.Stdout, "%v\n", string(data))
if tok.EOF {
@@ -86,3 +74,20 @@ Date time: %v
return nil
}
+
+func readCompiledLexSpec(path string) (*spec.CompiledLexSpec, error) {
+ f, err := os.Open(path)
+ if err != nil {
+ return nil, err
+ }
+ data, err := ioutil.ReadAll(f)
+ if err != nil {
+ return nil, err
+ }
+ clspec := &spec.CompiledLexSpec{}
+ err = json.Unmarshal(data, clspec)
+ if err != nil {
+ return nil, err
+ }
+ return clspec, nil
+}
diff --git a/compiler/compiler.go b/compiler/compiler.go
index 02cda43..15f42f3 100644
--- a/compiler/compiler.go
+++ b/compiler/compiler.go
@@ -1,6 +1,7 @@
package compiler
import (
+ "fmt"
"io"
"strings"
@@ -26,6 +27,11 @@ type compilerConfig struct {
}
func Compile(lexspec *spec.LexSpec, opts ...compilerOption) (*spec.CompiledLexSpec, error) {
+ err := lexspec.Validate()
+ if err != nil {
+ return nil, fmt.Errorf("invalid lexical specification:\n%w", err)
+ }
+
config := &compilerConfig{
logger: log.NewNopLogger(),
}
@@ -36,10 +42,10 @@ func Compile(lexspec *spec.LexSpec, opts ...compilerOption) (*spec.CompiledLexSp
}
}
- var kinds []string
+ var kinds []spec.LexKind
var patterns map[int][]byte
{
- kinds = append(kinds, "")
+ kinds = append(kinds, spec.LexKindNil)
patterns = map[int][]byte{}
for i, e := range lexspec.Entries {
kinds = append(kinds, e.Kind)
diff --git a/driver/lexer.go b/driver/lexer.go
index 4a3c3cc..356b168 100644
--- a/driver/lexer.go
+++ b/driver/lexer.go
@@ -245,7 +245,7 @@ func (l *lexer) next() (*Token, error) {
state = nextState
id, ok := l.clspec.DFA.AcceptingStates[state]
if ok {
- tok = newToken(id, l.clspec.Kinds[id], newByteSequence(buf))
+ tok = newToken(id, l.clspec.Kinds[id].String(), newByteSequence(buf))
unfixedBufLen = 0
}
}
diff --git a/driver/lexer_test.go b/driver/lexer_test.go
index 1f3841b..68830a5 100644
--- a/driver/lexer_test.go
+++ b/driver/lexer_test.go
@@ -10,6 +10,13 @@ import (
"github.com/nihei9/maleeni/spec"
)
+func newLexEntry(kind string, pattern string) *spec.LexEntry {
+ return &spec.LexEntry{
+ Kind: spec.LexKind(kind),
+ Pattern: spec.LexPattern(pattern),
+ }
+}
+
func TestLexer_Next(t *testing.T) {
test := []struct {
lspec *spec.LexSpec
@@ -19,8 +26,8 @@ func TestLexer_Next(t *testing.T) {
{
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
- spec.NewLexEntry("t1", "(a|b)*abb"),
- spec.NewLexEntry("t2", " +"),
+ newLexEntry("t1", "(a|b)*abb"),
+ newLexEntry("t2", " +"),
},
},
src: "abb aabb aaabb babb bbabb abbbabb",
@@ -42,9 +49,9 @@ func TestLexer_Next(t *testing.T) {
{
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
- spec.NewLexEntry("t1", "b?a+"),
- spec.NewLexEntry("t2", "(ab)?(cd)+"),
- spec.NewLexEntry("t3", " +"),
+ newLexEntry("t1", "b?a+"),
+ newLexEntry("t2", "(ab)?(cd)+"),
+ newLexEntry("t3", " +"),
},
},
src: "ba baaa a aaa abcd abcdcdcd cd cdcdcd",
@@ -70,7 +77,7 @@ func TestLexer_Next(t *testing.T) {
{
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
- spec.NewLexEntry("t1", "."),
+ newLexEntry("t1", "."),
},
},
src: string([]byte{
@@ -114,7 +121,7 @@ func TestLexer_Next(t *testing.T) {
{
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
- spec.NewLexEntry("t1", "[ab.*+?|()[\\]]"),
+ newLexEntry("t1", "[ab.*+?|()[\\]]"),
},
},
src: "ab.*+?|()[]",
@@ -142,7 +149,7 @@ func TestLexer_Next(t *testing.T) {
// maleeni cannot handle the null character in patterns because compiler.lexer,
// specifically read() and restore(), recognizes the null characters as that a symbol doesn't exist.
// There is room for improvement in this behavior of the lexer.
- spec.NewLexEntry("1ByteChar", "[\x01-\x7f]"),
+ newLexEntry("1ByteChar", "[\x01-\x7f]"),
},
},
src: string([]byte{
@@ -163,7 +170,7 @@ func TestLexer_Next(t *testing.T) {
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
// all 2 byte characters
- spec.NewLexEntry("2ByteChar", "[\xc2\x80-\xdf\xbf]"),
+ newLexEntry("2ByteChar", "[\xc2\x80-\xdf\xbf]"),
},
},
src: string([]byte{
@@ -184,7 +191,7 @@ func TestLexer_Next(t *testing.T) {
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
// All bytes are the same.
- spec.NewLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xa0\x80]"),
+ newLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xa0\x80]"),
},
},
src: string([]byte{
@@ -199,7 +206,7 @@ func TestLexer_Next(t *testing.T) {
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
// The first two bytes are the same.
- spec.NewLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xa0\xbf]"),
+ newLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xa0\xbf]"),
},
},
src: string([]byte{
@@ -220,7 +227,7 @@ func TestLexer_Next(t *testing.T) {
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
// The first byte are the same.
- spec.NewLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xbf\xbf]"),
+ newLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xbf\xbf]"),
},
},
src: string([]byte{
@@ -241,7 +248,7 @@ func TestLexer_Next(t *testing.T) {
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
// all 3 byte characters
- spec.NewLexEntry("3ByteChar", "[\xe0\xa0\x80-\xef\xbf\xbf]"),
+ newLexEntry("3ByteChar", "[\xe0\xa0\x80-\xef\xbf\xbf]"),
},
},
src: string([]byte{
@@ -286,7 +293,7 @@ func TestLexer_Next(t *testing.T) {
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
// All bytes are the same.
- spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\x80\x80]"),
+ newLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\x80\x80]"),
},
},
src: string([]byte{
@@ -301,7 +308,7 @@ func TestLexer_Next(t *testing.T) {
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
// The first 3 bytes are the same.
- spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\x80\xbf]"),
+ newLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\x80\xbf]"),
},
},
src: string([]byte{
@@ -322,7 +329,7 @@ func TestLexer_Next(t *testing.T) {
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
// The first 2 bytes are the same.
- spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\xbf\xbf]"),
+ newLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\xbf\xbf]"),
},
},
src: string([]byte{
@@ -343,7 +350,7 @@ func TestLexer_Next(t *testing.T) {
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
// The first byte are the same.
- spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\xbf\xbf\xbf]"),
+ newLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\xbf\xbf\xbf]"),
},
},
src: string([]byte{
@@ -364,7 +371,7 @@ func TestLexer_Next(t *testing.T) {
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
// all 4 byte characters
- spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf4\x8f\xbf\xbf]"),
+ newLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf4\x8f\xbf\xbf]"),
},
},
src: string([]byte{
@@ -400,7 +407,7 @@ func TestLexer_Next(t *testing.T) {
{
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
- spec.NewLexEntry("NonNumber", "[^0-9]+[0-9]"),
+ newLexEntry("NonNumber", "[^0-9]+[0-9]"),
},
},
src: "foo9",
@@ -439,8 +446,8 @@ func TestLexer_Next(t *testing.T) {
func TestLexer_PeekN(t *testing.T) {
clspec, err := compiler.Compile(&spec.LexSpec{
Entries: []*spec.LexEntry{
- spec.NewLexEntry("", "foo"),
- spec.NewLexEntry("", "bar"),
+ newLexEntry("t1", "foo"),
+ newLexEntry("t2", "bar"),
},
})
if err != nil {
@@ -452,17 +459,9 @@ func TestLexer_PeekN(t *testing.T) {
}
expectedTokens := []*Token{
- {
- ID: 1,
- Match: newByteSequence([]byte("foo")),
- },
- {
- ID: 2,
- Match: newByteSequence([]byte("bar")),
- },
- {
- EOF: true,
- },
+ newToken(1, "t1", []byte("foo")),
+ newToken(2, "t2", []byte("bar")),
+ newEOFToken(),
}
tok, err := lex.Peek1()
diff --git a/spec/spec.go b/spec/spec.go
index d827b68..0f9b484 100644
--- a/spec/spec.go
+++ b/spec/spec.go
@@ -1,21 +1,96 @@
package spec
+import (
+ "fmt"
+ "regexp"
+ "strings"
+)
+
+const lexKindPattern = "[A-Za-z_][0-9A-Za-z_]*"
+
+var lexKindRE = regexp.MustCompile(lexKindPattern)
+
+type LexKind string
+
+const LexKindNil = LexKind("")
+
+func (k LexKind) String() string {
+ return string(k)
+}
+
+func (k LexKind) validate() error {
+ if k == "" {
+ return fmt.Errorf("kind doesn't allow to be the empty string")
+ }
+ if !lexKindRE.Match([]byte(k)) {
+ return fmt.Errorf("kind must be %v", lexKindPattern)
+ }
+ return nil
+}
+
+type LexPattern string
+
+func (p LexPattern) validate() error {
+ if p == "" {
+ return fmt.Errorf("pattern doesn't allow to be the empty string")
+ }
+ return nil
+}
+
type LexEntry struct {
- Kind string `json:"kind"`
- Pattern string `json:"pattern"`
+ Kind LexKind `json:"kind"`
+ Pattern LexPattern `json:"pattern"`
}
-func NewLexEntry(kind string, pattern string) *LexEntry {
- return &LexEntry{
- Kind: kind,
- Pattern: pattern,
+func (e *LexEntry) validate() error {
+ err := e.Kind.validate()
+ if err != nil {
+ return err
+ }
+ err = e.Pattern.validate()
+ if err != nil {
+ return err
}
+ return nil
}
type LexSpec struct {
Entries []*LexEntry `json:"entries"`
}
+func (s *LexSpec) Validate() error {
+ if len(s.Entries) <= 0 {
+ return fmt.Errorf("the lexical specification must have at least one entry")
+ }
+ {
+ var errs []error
+ for i, e := range s.Entries {
+ err := e.validate()
+ if err != nil {
+ errs = append(errs, fmt.Errorf("entry #%v: %w", i+1, err))
+ }
+ }
+ if len(errs) > 0 {
+ var b strings.Builder
+ fmt.Fprintf(&b, "%v", errs[0])
+ for _, err := range errs[1:] {
+ fmt.Fprintf(&b, "\n%v", err)
+ }
+ return fmt.Errorf(b.String())
+ }
+ }
+ {
+ ks := map[string]struct{}{}
+ for _, e := range s.Entries {
+ if _, exist := ks[e.Kind.String()]; exist {
+ return fmt.Errorf("kinds `%v` are duplicates", e.Kind)
+ }
+ ks[e.Kind.String()] = struct{}{}
+ }
+ }
+ return nil
+}
+
type TransitionTable struct {
InitialState int `json:"initial_state"`
AcceptingStates map[int]int `json:"accepting_states"`
@@ -23,6 +98,6 @@ type TransitionTable struct {
}
type CompiledLexSpec struct {
- Kinds []string `json:"kinds"`
+ Kinds []LexKind `json:"kinds"`
DFA *TransitionTable `json:"dfa"`
}