aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cmd/maleeni/lex.go5
-rw-r--r--driver/lexer.go48
-rw-r--r--driver/lexer_test.go112
3 files changed, 140 insertions, 25 deletions
diff --git a/cmd/maleeni/lex.go b/cmd/maleeni/lex.go
index e6eab18..142f996 100644
--- a/cmd/maleeni/lex.go
+++ b/cmd/maleeni/lex.go
@@ -24,7 +24,10 @@ func init() {
Use: "lex clexspec",
Short: "Tokenize a text stream",
Long: `lex takes a text stream and tokenizes it according to a compiled lexical specification.
-As use ` + "`maleeni compile`" + `, you can generate the specification.`,
+As use ` + "`maleeni compile`" + `, you can generate the specification.
+
+Note that passive mode transitions are not performed. Thus, if there is a mode in
+your lexical specification that is set passively, lexemes in that mode will not be recognized.`,
Example: ` cat src | maleeni lex clexspec.json`,
Args: cobra.ExactArgs(1),
RunE: runLex,
diff --git a/driver/lexer.go b/driver/lexer.go
index 8d4a10f..1e54fa6 100644
--- a/driver/lexer.go
+++ b/driver/lexer.go
@@ -151,6 +151,13 @@ func (t *Token) MarshalJSON() ([]byte, error) {
type LexerOption func(l *Lexer) error
+func DisableModeTransition() LexerOption {
+ return func(l *Lexer) error {
+ l.passiveModeTran = true
+ return nil
+ }
+}
+
func EnableLogging(w io.Writer) LexerOption {
return func(l *Lexer) error {
logger, err := log.NewLogger(w)
@@ -163,12 +170,13 @@ func EnableLogging(w io.Writer) LexerOption {
}
type Lexer struct {
- clspec *spec.CompiledLexSpec
- src []byte
- srcPtr int
- tokBuf []*Token
- modeStack []spec.LexModeNum
- logger log.Logger
+ clspec *spec.CompiledLexSpec
+ src []byte
+ srcPtr int
+ tokBuf []*Token
+ modeStack []spec.LexModeNum
+ passiveModeTran bool
+ logger log.Logger
}
func NewLexer(clspec *spec.CompiledLexSpec, src io.Reader, opts ...LexerOption) (*Lexer, error) {
@@ -183,7 +191,8 @@ func NewLexer(clspec *spec.CompiledLexSpec, src io.Reader, opts ...LexerOption)
modeStack: []spec.LexModeNum{
clspec.InitialMode,
},
- logger: log.NewNopLogger(),
+ passiveModeTran: false,
+ logger: log.NewNopLogger(),
}
for _, opt := range opts {
err := opt(l)
@@ -201,7 +210,7 @@ func (l *Lexer) Next() (*Token, error) {
State:
mode: #%v %v
pointer: %v
- token buffer: %v`, l.mode(), l.clspec.Modes[l.mode()], l.srcPtr, l.tokBuf)
+ token buffer: %v`, l.Mode(), l.clspec.Modes[l.Mode()], l.srcPtr, l.tokBuf)
if len(l.tokBuf) > 0 {
tok := l.tokBuf[0]
@@ -212,7 +221,7 @@ func (l *Lexer) Next() (*Token, error) {
return tok, nil
}
- tok, err := l.nextAndTranMode()
+ tok, err := l.nextAndTransition()
if err != nil {
l.logger.Log(" Detectes an error: %v", err)
return nil, err
@@ -226,7 +235,7 @@ func (l *Lexer) Next() (*Token, error) {
}
errTok := tok
for {
- tok, err = l.nextAndTranMode()
+ tok, err = l.nextAndTransition()
if err != nil {
l.logger.Log(" Detectes an error: %v", err)
return nil, err
@@ -246,7 +255,7 @@ func (l *Lexer) Next() (*Token, error) {
return errTok, nil
}
-func (l *Lexer) nextAndTranMode() (*Token, error) {
+func (l *Lexer) nextAndTransition() (*Token, error) {
tok, err := l.next()
if err != nil {
return nil, err
@@ -254,16 +263,19 @@ func (l *Lexer) nextAndTranMode() (*Token, error) {
if tok.EOF || tok.Invalid {
return tok, nil
}
- spec := l.clspec.Specs[l.mode()]
+ if l.passiveModeTran {
+ return tok, nil
+ }
+ spec := l.clspec.Specs[l.Mode()]
if spec.Pop[tok.Kind] == 1 {
- err := l.popMode()
+ err := l.PopMode()
if err != nil {
return nil, err
}
}
mode := spec.Push[tok.Kind]
if !mode.IsNil() {
- l.pushMode(mode)
+ l.PushMode(mode)
}
// The checking length of the mode stack must be at after pop and push operations
// because those operations can be performed at the same time.
@@ -277,7 +289,7 @@ func (l *Lexer) nextAndTranMode() (*Token, error) {
}
func (l *Lexer) next() (*Token, error) {
- mode := l.mode()
+ mode := l.Mode()
modeName := l.clspec.Modes[mode]
spec := l.clspec.Specs[mode]
state := spec.DFA.InitialState
@@ -343,15 +355,15 @@ func (l *Lexer) lookupNextState(mode spec.LexModeNum, state int, v int) (int, bo
return next, true
}
-func (l *Lexer) mode() spec.LexModeNum {
+func (l *Lexer) Mode() spec.LexModeNum {
return l.modeStack[len(l.modeStack)-1]
}
-func (l *Lexer) pushMode(mode spec.LexModeNum) {
+func (l *Lexer) PushMode(mode spec.LexModeNum) {
l.modeStack = append(l.modeStack, mode)
}
-func (l *Lexer) popMode() error {
+func (l *Lexer) PopMode() error {
sLen := len(l.modeStack)
if sLen == 0 {
return fmt.Errorf("cannot pop a lex mode from a lex mode stack any more")
diff --git a/driver/lexer_test.go b/driver/lexer_test.go
index 87a381c..33edbc0 100644
--- a/driver/lexer_test.go
+++ b/driver/lexer_test.go
@@ -52,9 +52,11 @@ func newEOFTokenDefault() *Token {
func TestLexer_Next(t *testing.T) {
test := []struct {
- lspec *spec.LexSpec
- src string
- tokens []*Token
+ lspec *spec.LexSpec
+ src string
+ tokens []*Token
+ passiveModeTran bool
+ tran func(l *Lexer, tok *Token) error
}{
{
lspec: &spec.LexSpec{
@@ -576,17 +578,108 @@ func TestLexer_Next(t *testing.T) {
newEOFTokenDefault(),
},
},
+ {
+ lspec: &spec.LexSpec{
+ Entries: []*spec.LexEntry{
+ newLexEntry([]string{"default", "mode_1", "mode_2"}, "white_space", ` *`, "", false),
+ newLexEntry([]string{"default"}, "char", `.`, "", false),
+ newLexEntry([]string{"default"}, "push_1", `-> 1`, "", false),
+ newLexEntry([]string{"mode_1"}, "push_2", `-> 2`, "", false),
+ newLexEntry([]string{"mode_1"}, "pop_1", `<-`, "", false),
+ newLexEntry([]string{"mode_2"}, "pop_2", `<-`, "", false),
+ },
+ },
+ src: `-> 1 -> 2 <- <- a`,
+ tokens: []*Token{
+ newToken(1, "default", 3, "push_1", newByteSequence([]byte(`-> 1`))),
+ newToken(2, "mode_1", 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(2, "mode_1", 2, "push_2", newByteSequence([]byte(`-> 2`))),
+ newToken(3, "mode_2", 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(3, "mode_2", 2, "pop_2", newByteSequence([]byte(`<-`))),
+ newToken(2, "mode_1", 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(2, "mode_1", 3, "pop_1", newByteSequence([]byte(`<-`))),
+ newToken(1, "default", 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(1, "default", 2, "char", newByteSequence([]byte(`a`))),
+ newEOFTokenDefault(),
+ },
+ passiveModeTran: true,
+ tran: func(l *Lexer, tok *Token) error {
+ switch l.clspec.Modes[l.Mode().Int()] {
+ case "default":
+ switch tok.KindName {
+ case "push_1":
+ l.PushMode(2)
+ }
+ case "mode_1":
+ switch tok.KindName {
+ case "push_2":
+ l.PushMode(3)
+ case "pop_1":
+ return l.PopMode()
+ }
+ case "mode_2":
+ switch tok.KindName {
+ case "pop_2":
+ return l.PopMode()
+ }
+ }
+ return nil
+ },
+ },
+ {
+ lspec: &spec.LexSpec{
+ Entries: []*spec.LexEntry{
+ newLexEntry([]string{"default", "mode_1", "mode_2"}, "white_space", ` *`, "", false),
+ newLexEntry([]string{"default"}, "char", `.`, "", false),
+ newLexEntry([]string{"default"}, "push_1", `-> 1`, "mode_1", false),
+ newLexEntry([]string{"mode_1"}, "push_2", `-> 2`, "", false),
+ newLexEntry([]string{"mode_1"}, "pop_1", `<-`, "", false),
+ newLexEntry([]string{"mode_2"}, "pop_2", `<-`, "", true),
+ },
+ },
+ src: `-> 1 -> 2 <- <- a`,
+ tokens: []*Token{
+ newToken(1, "default", 3, "push_1", newByteSequence([]byte(`-> 1`))),
+ newToken(2, "mode_1", 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(2, "mode_1", 2, "push_2", newByteSequence([]byte(`-> 2`))),
+ newToken(3, "mode_2", 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(3, "mode_2", 2, "pop_2", newByteSequence([]byte(`<-`))),
+ newToken(2, "mode_1", 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(2, "mode_1", 3, "pop_1", newByteSequence([]byte(`<-`))),
+ newToken(1, "default", 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(1, "default", 2, "char", newByteSequence([]byte(`a`))),
+ newEOFTokenDefault(),
+ },
+ // Active mode transition and an external transition function can be used together.
+ passiveModeTran: false,
+ tran: func(l *Lexer, tok *Token) error {
+ switch l.clspec.Modes[l.Mode().Int()] {
+ case "mode_1":
+ switch tok.KindName {
+ case "push_2":
+ l.PushMode(3)
+ case "pop_1":
+ return l.PopMode()
+ }
+ }
+ return nil
+ },
+ },
}
for i, tt := range test {
for compLv := compiler.CompressionLevelMin; compLv <= compiler.CompressionLevelMax; compLv++ {
t.Run(fmt.Sprintf("#%v-%v", i, compLv), func(t *testing.T) {
clspec, err := compiler.Compile(tt.lspec, compiler.CompressionLevel(compLv))
if err != nil {
- t.Fatalf("unexpected error occurred: %v", err)
+ t.Fatalf("unexpected error: %v", err)
}
- lexer, err := NewLexer(clspec, strings.NewReader(tt.src))
+ opts := []LexerOption{}
+ if tt.passiveModeTran {
+ opts = append(opts, DisableModeTransition())
+ }
+ lexer, err := NewLexer(clspec, strings.NewReader(tt.src), opts...)
if err != nil {
- t.Fatalf("unexpecated error occurred; %v", err)
+ t.Fatalf("unexpected error: %v", err)
}
for _, eTok := range tt.tokens {
tok, err := lexer.Next()
@@ -599,6 +692,13 @@ func TestLexer_Next(t *testing.T) {
if tok.EOF {
break
}
+
+ if tt.tran != nil {
+ err := tt.tran(lexer, tok)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ }
}
})
}