Change APIs

Change fields of tokens, results of lexical analysis, as follows: - Rename: mode -> mode_id - Rename: kind_id -> mode_kind_id - Add: kind_id The kind ID is unique across all modes, but the mode kind ID is unique only within a mode. Change fields of a transition table as follows: - Rename: initial_mode -> initial_mode_id - Rename: modes -> mode_names - Rename: kinds -> kind_names - Rename: specs[].kinds -> specs[].kind_names - Rename: specs[].dfa.initial_state -> specs[].dfa.initial_state_id Change public types defined in the spec package as follows: - Rename: LexModeNum -> LexModeID - Rename: LexKind -> LexKindName - Add: LexKindID - Add: StateID
author: Ryo Nihei <nihei.dev@gmail.com> 2021-08-01 17:17:53 +0900
committer: Ryo Nihei <nihei.dev@gmail.com> 2021-08-01 21:18:34 +0900
commit: 2433c27f26bc1be2d9b33f6550482abc48fa31ef (patch)
tree: c7cbc22929045e2d5dccdc37cc978138b59c1bdb
parent: Add unique kind IDs to tokens (diff)
download: tre-2433c27f26bc1be2d9b33f6550482abc48fa31ef.tar.gz
tre-2433c27f26bc1be2d9b33f6550482abc48fa31ef.tar.xz
11 files changed, 289 insertions, 231 deletions
diff --git a/README.md b/README.md
index 8f9f01c..ae3e8fa 100644
--- a/README.md
+++ b/README.md
@@ -47,33 +47,33 @@ If you want to make sure that the lexical specification behaves as expected, you
 ⚠️ An encoding that `maleeni lex` and the driver can handle is only UTF-8.
 
 ```sh
-$ echo -n 'The truth is out there.' | maleeni lex clexspec.json | jq -r '[.kind_name, .text, .eof] | @csv'
-"word","The",false
-"whitespace"," ",false
-"word","truth",false
-"whitespace"," ",false
-"word","is",false
-"whitespace"," ",false
-"word","out",false
-"whitespace"," ",false
-"word","there",false
-"punctuation",".",false
-"","",true
+$ echo -n 'The truth is out there.' | maleeni lex clexspec.json | jq -r '[.kind_id, .kind_name, .text, .eof] | @csv'
+2,"word","The",false
+1,"whitespace"," ",false
+2,"word","truth",false
+1,"whitespace"," ",false
+2,"word","is",false
+1,"whitespace"," ",false
+2,"word","out",false
+1,"whitespace"," ",false
+2,"word","there",false
+3,"punctuation",".",false
+0,"","",true
 ```
 
 The JSON format of tokens that `maleeni lex` command prints is as follows:
 
-| Field     | Type              | Description                                                                            |
-|-----------|-------------------|----------------------------------------------------------------------------------------|
-| mode      | integer           | `mode` represents a number that corresponds to a `mode_name`.                          |
-| mode_name | string            | `mode_name` is a mode name that represents in which mode the lexer detected the token. |
-| kind_id   | integer           | `kind_id` represents an ID of a kind and is unique among modes.                        |
-| kind      | integer           | `kind` represents a number that corresponds to a `KindName`.                           |
-| kind_name | string            | `kind_name` is a kind name that represents what kind the token has.                    |
-| match     | array of integers | `match` is a byte sequence matched a pattern of a lexical specification.               |
-| text      | string            | `text` is a string representation of `match`.                                          |
-| eof       | bool              | If `eof` is true, it means the token is the EOF token.                                 |
-| invalid   | bool              | If `invalid` is true, it means the token is an error token.                            |
+| Field        | Type              | Description                                                                                                                                           |
+|--------------|-------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------|
+| mode_id      | integer           | An ID of a lex mode.                                                                                                                                  |
+| mode_name    | string            | A name of a lex mode.                                                                                                                                 |
+| kind_id      | integer           | An ID of a kind. This is unique among all modes.                                                                                                      |
+| mode_kind_id | integer           | An ID of a lexical kind. This is unique only within a mode. Note that you need to use `KindID` field if you want to identify a kind across all modes. |
+| kind_name    | string            | A name of a lexical kind.                                                                                                                             |
+| match        | array of integers | A byte sequense of a lexeme.                                                                                                                          |
+| text         | string            | A string representation of a lexeme.                                                                                                                  |
+| eof          | bool              | When this field is `true`, it means the token is the EOF token.                                                                                       |
+| invalid      | bool              | When this field is `true`, it means the token is an error token.                                                                                      |
 
 When using the driver, please import `github.com/nihei9/maleeni/driver` and `github.com/nihei9/maleeni/spec` package.
 You can use the driver easily in the following way:
diff --git a/compiler/ast.go b/compiler/ast.go
index 7d3965a..046662e 100644
--- a/compiler/ast.go
+++ b/compiler/ast.go
@@ -3,6 +3,8 @@ package compiler
 import (
 	"fmt"
 	"io"
+
+	"github.com/nihei9/maleeni/spec"
 )
 
 type astNode interface {
@@ -78,13 +80,13 @@ func (n *symbolNode) last() *symbolPositionSet {
 }
 
 type endMarkerNode struct {
-	id        int
+	id        spec.LexModeKindID
 	pos       symbolPosition
 	firstMemo *symbolPositionSet
 	lastMemo  *symbolPositionSet
 }
 
-func newEndMarkerNode(id int) *endMarkerNode {
+func newEndMarkerNode(id spec.LexModeKindID) *endMarkerNode {
 	return &endMarkerNode{
 		id:  id,
 		pos: symbolPositionNil,
diff --git a/compiler/compiler.go b/compiler/compiler.go
index 5d3e52f..5d1a1d5 100644
--- a/compiler/compiler.go
+++ b/compiler/compiler.go
@@ -54,28 +54,28 @@ func Compile(lexspec *spec.LexSpec, opts ...CompilerOption) (*spec.CompiledLexSp
 		}
 	}
 
-	modeEntries, modes, modeNums, fragmetns := groupEntriesByLexMode(lexspec.Entries)
+	modeEntries, modeNames, modeName2ID, fragmetns := groupEntriesByLexMode(lexspec.Entries)
 
 	modeSpecs := []*spec.CompiledLexModeSpec{
 		nil,
 	}
 	for i, es := range modeEntries[1:] {
-		modeName := modes[i+1]
+		modeName := modeNames[i+1]
 		config.logger.Log("Compile %v mode:", modeName)
-		modeSpec, err := compile(es, modeNums, fragmetns, config)
+		modeSpec, err := compile(es, modeName2ID, fragmetns, config)
 		if err != nil {
 			return nil, fmt.Errorf("failed to compile in %v mode: %w", modeName, err)
 		}
 		modeSpecs = append(modeSpecs, modeSpec)
 	}
 
-	var kindNames []spec.LexKind
-	var name2ID map[spec.LexKind]spec.LexKindID
+	var kindNames []spec.LexKindName
+	var name2ID map[spec.LexKindName]spec.LexKindID
 	{
-		name2ID = map[spec.LexKind]spec.LexKindID{}
+		name2ID = map[spec.LexKindName]spec.LexKindID{}
 		id := spec.LexKindIDMin
 		for _, modeSpec := range modeSpecs[1:] {
-			for _, name := range modeSpec.Kinds[1:] {
+			for _, name := range modeSpec.KindNames[1:] {
 				if _, ok := name2ID[name]; ok {
 					continue
 				}
@@ -84,7 +84,7 @@ func Compile(lexspec *spec.LexSpec, opts ...CompilerOption) (*spec.CompiledLexSp
 			}
 		}
 
-		kindNames = make([]spec.LexKind, len(name2ID)+1)
+		kindNames = make([]spec.LexKindName, len(name2ID)+1)
 		for name, id := range name2ID {
 			kindNames[id] = name
 		}
@@ -94,8 +94,8 @@ func Compile(lexspec *spec.LexSpec, opts ...CompilerOption) (*spec.CompiledLexSp
 	{
 		kindIDs = make([][]spec.LexKindID, len(modeSpecs))
 		for i, modeSpec := range modeSpecs[1:] {
-			ids := make([]spec.LexKindID, len(modeSpec.Kinds))
-			for modeID, name := range modeSpec.Kinds {
+			ids := make([]spec.LexKindID, len(modeSpec.KindNames))
+			for modeID, name := range modeSpec.KindNames {
 				if modeID == 0 {
 					continue
 				}
@@ -106,25 +106,25 @@ func Compile(lexspec *spec.LexSpec, opts ...CompilerOption) (*spec.CompiledLexSp
 	}
 
 	return &spec.CompiledLexSpec{
-		InitialMode:      spec.LexModeNumDefault,
-		Modes:            modes,
-		Kinds:            kindNames,
+		InitialModeID:    spec.LexModeIDDefault,
+		ModeNames:        modeNames,
+		KindNames:        kindNames,
 		KindIDs:          kindIDs,
 		CompressionLevel: config.compLv,
 		Specs:            modeSpecs,
 	}, nil
 }
 
-func groupEntriesByLexMode(entries []*spec.LexEntry) ([][]*spec.LexEntry, []spec.LexModeName, map[spec.LexModeName]spec.LexModeNum, map[string]*spec.LexEntry) {
-	modes := []spec.LexModeName{
+func groupEntriesByLexMode(entries []*spec.LexEntry) ([][]*spec.LexEntry, []spec.LexModeName, map[spec.LexModeName]spec.LexModeID, map[string]*spec.LexEntry) {
+	modeNames := []spec.LexModeName{
 		spec.LexModeNameNil,
 		spec.LexModeNameDefault,
 	}
-	modeNums := map[spec.LexModeName]spec.LexModeNum{
-		spec.LexModeNameNil:     spec.LexModeNumNil,
-		spec.LexModeNameDefault: spec.LexModeNumDefault,
+	modeName2ID := map[spec.LexModeName]spec.LexModeID{
+		spec.LexModeNameNil:     spec.LexModeIDNil,
+		spec.LexModeNameDefault: spec.LexModeIDDefault,
 	}
-	lastModeNum := spec.LexModeNumDefault
+	lastModeID := spec.LexModeIDDefault
 	modeEntries := [][]*spec.LexEntry{
 		nil,
 		{},
@@ -141,30 +141,30 @@ func groupEntriesByLexMode(entries []*spec.LexEntry) ([][]*spec.LexEntry, []spec
 				spec.LexModeNameDefault,
 			}
 		}
-		for _, mode := range ms {
-			num, ok := modeNums[mode]
+		for _, modeName := range ms {
+			modeID, ok := modeName2ID[modeName]
 			if !ok {
-				num = lastModeNum.Succ()
-				lastModeNum = num
-				modeNums[mode] = num
-				modes = append(modes, mode)
+				modeID = lastModeID + 1
+				lastModeID = modeID
+				modeName2ID[modeName] = modeID
+				modeNames = append(modeNames, modeName)
 				modeEntries = append(modeEntries, []*spec.LexEntry{})
 			}
-			modeEntries[num] = append(modeEntries[num], e)
+			modeEntries[modeID] = append(modeEntries[modeID], e)
 		}
 	}
-	return modeEntries, modes, modeNums, fragments
+	return modeEntries, modeNames, modeName2ID, fragments
 }
 
-func compile(entries []*spec.LexEntry, modeNums map[spec.LexModeName]spec.LexModeNum, fragments map[string]*spec.LexEntry, config *compilerConfig) (*spec.CompiledLexModeSpec, error) {
-	var kinds []spec.LexKind
-	var patterns map[int][]byte
+func compile(entries []*spec.LexEntry, modeName2ID map[spec.LexModeName]spec.LexModeID, fragments map[string]*spec.LexEntry, config *compilerConfig) (*spec.CompiledLexModeSpec, error) {
+	var kindNames []spec.LexKindName
+	var patterns map[spec.LexModeKindID][]byte
 	{
-		kinds = append(kinds, spec.LexKindNil)
-		patterns = map[int][]byte{}
+		kindNames = append(kindNames, spec.LexKindNameNil)
+		patterns = map[spec.LexModeKindID][]byte{}
 		for i, e := range entries {
-			kinds = append(kinds, e.Kind)
-			patterns[i+1] = []byte(e.Pattern)
+			kindNames = append(kindNames, e.Kind)
+			patterns[spec.LexModeKindID(i+1)] = []byte(e.Pattern)
 		}
 
 		config.logger.Log("Patterns:")
@@ -173,16 +173,16 @@ func compile(entries []*spec.LexEntry, modeNums map[spec.LexModeName]spec.LexMod
 		}
 	}
 
-	push := []spec.LexModeNum{
-		spec.LexModeNumNil,
+	push := []spec.LexModeID{
+		spec.LexModeIDNil,
 	}
 	pop := []int{
 		0,
 	}
 	for _, e := range entries {
-		pushV := spec.LexModeNumNil
+		pushV := spec.LexModeIDNil
 		if e.Push != "" {
-			pushV = modeNums[e.Push]
+			pushV = modeName2ID[e.Push]
 		}
 		push = append(push, pushV)
 		popV := 0
@@ -222,7 +222,7 @@ func compile(entries []*spec.LexEntry, modeNums map[spec.LexModeName]spec.LexMod
 
 		config.logger.Log(`DFA:
   States: %v states (%v entries)
-  Initial State: %v`, tranTab.RowCount, tranTab.RowCount*tranTab.ColCount, tranTab.InitialState)
+  Initial State ID: %v`, tranTab.RowCount, tranTab.RowCount*tranTab.ColCount, tranTab.InitialStateID)
 		config.logger.Log("  Accepting States:")
 		for state, symbol := range tranTab.AcceptingStates {
 			config.logger.Log("    %v: %v", state, symbol)
@@ -244,10 +244,10 @@ func compile(entries []*spec.LexEntry, modeNums map[spec.LexModeName]spec.LexMod
 	}
 
 	return &spec.CompiledLexModeSpec{
-		Kinds: kinds,
-		Push:  push,
-		Pop:   pop,
-		DFA:   tranTab,
+		KindNames: kindNames,
+		Push:      push,
+		Pop:       pop,
+		DFA:       tranTab,
 	}, nil
 }
 
@@ -259,7 +259,7 @@ const (
 func compressTransitionTableLv2(tranTab *spec.TransitionTable) (*spec.TransitionTable, error) {
 	ueTab := compressor.NewUniqueEntriesTable()
 	{
-		orig, err := compressor.NewOriginalTable(tranTab.UncompressedTransition, tranTab.ColCount)
+		orig, err := compressor.NewOriginalTable(convertStateIDSliceToIntSlice(tranTab.UncompressedTransition), tranTab.ColCount)
 		if err != nil {
 			return nil, err
 		}
@@ -285,8 +285,8 @@ func compressTransitionTableLv2(tranTab *spec.TransitionTable) (*spec.Transition
 		UniqueEntries: &spec.RowDisplacementTable{
 			OriginalRowCount: rdTab.OriginalRowCount,
 			OriginalColCount: rdTab.OriginalColCount,
-			EmptyValue:       rdTab.EmptyValue,
-			Entries:          rdTab.Entries,
+			EmptyValue:       spec.StateIDNil,
+			Entries:          convertIntSliceToStateIDSlice(rdTab.Entries),
 			Bounds:           rdTab.Bounds,
 			RowDisplacement:  rdTab.RowDisplacement,
 		},
@@ -302,7 +302,7 @@ func compressTransitionTableLv2(tranTab *spec.TransitionTable) (*spec.Transition
 func compressTransitionTableLv1(tranTab *spec.TransitionTable) (*spec.TransitionTable, error) {
 	ueTab := compressor.NewUniqueEntriesTable()
 	{
-		orig, err := compressor.NewOriginalTable(tranTab.UncompressedTransition, tranTab.ColCount)
+		orig, err := compressor.NewOriginalTable(convertStateIDSliceToIntSlice(tranTab.UncompressedTransition), tranTab.ColCount)
 		if err != nil {
 			return nil, err
 		}
@@ -313,7 +313,7 @@ func compressTransitionTableLv1(tranTab *spec.TransitionTable) (*spec.Transition
 	}
 
 	tranTab.Transition = &spec.UniqueEntriesTable{
-		UncompressedUniqueEntries: ueTab.UniqueEntries,
+		UncompressedUniqueEntries: convertIntSliceToStateIDSlice(ueTab.UniqueEntries),
 		RowNums:                   ueTab.RowNums,
 		OriginalRowCount:          ueTab.OriginalRowCount,
 		OriginalColCount:          ueTab.OriginalColCount,
@@ -322,3 +322,19 @@ func compressTransitionTableLv1(tranTab *spec.TransitionTable) (*spec.Transition
 
 	return tranTab, nil
 }
+
+func convertStateIDSliceToIntSlice(s []spec.StateID) []int {
+	is := make([]int, len(s))
+	for i, v := range s {
+		is[i] = v.Int()
+	}
+	return is
+}
+
+func convertIntSliceToStateIDSlice(s []int) []spec.StateID {
+	ss := make([]spec.StateID, len(s))
+	for i, v := range s {
+		ss[i] = spec.StateID(v)
+	}
+	return ss
+}
diff --git a/compiler/dfa.go b/compiler/dfa.go
index b94fca8..1d0b26a 100644
--- a/compiler/dfa.go
+++ b/compiler/dfa.go
@@ -9,7 +9,7 @@ import (
 type DFA struct {
 	States               []string
 	InitialState         string
-	AcceptingStatesTable map[string]int
+	AcceptingStatesTable map[string]spec.LexModeKindID
 	TransitionTable      map[string][256]string
 }
 
@@ -65,7 +65,7 @@ func genDFA(root astNode, symTab *symbolTable) *DFA {
 		}
 	}
 
-	accTab := map[string]int{}
+	accTab := map[string]spec.LexModeKindID{}
 	{
 		for h, s := range stateMap {
 			for _, pos := range s.set() {
@@ -104,33 +104,33 @@ func genDFA(root astNode, symTab *symbolTable) *DFA {
 }
 
 func genTransitionTable(dfa *DFA) (*spec.TransitionTable, error) {
-	state2Num := map[string]int{}
+	stateHash2ID := map[string]spec.StateID{}
 	for i, s := range dfa.States {
 		// Since 0 represents an invalid value in a transition table,
 		// assign a number greater than or equal to 1 to states.
-		state2Num[s] = i + 1
+		stateHash2ID[s] = spec.StateID(i + spec.StateIDMin.Int())
 	}
 
-	acc := make([]int, len(dfa.States)+1)
+	acc := make([]spec.LexModeKindID, len(dfa.States)+1)
 	for _, s := range dfa.States {
 		id, ok := dfa.AcceptingStatesTable[s]
 		if !ok {
 			continue
 		}
-		acc[state2Num[s]] = id
+		acc[stateHash2ID[s]] = id
 	}
 
 	rowCount := len(dfa.States) + 1
 	colCount := 256
-	tran := make([]int, rowCount*colCount)
+	tran := make([]spec.StateID, rowCount*colCount)
 	for s, tab := range dfa.TransitionTable {
 		for v, to := range tab {
-			tran[state2Num[s]*256+v] = state2Num[to]
+			tran[stateHash2ID[s].Int()*256+v] = stateHash2ID[to]
 		}
 	}
 
 	return &spec.TransitionTable{
-		InitialState:           state2Num[dfa.InitialState],
+		InitialStateID:         stateHash2ID[dfa.InitialState],
 		AcceptingStates:        acc,
 		UncompressedTransition: tran,
 		RowCount:               rowCount,
diff --git a/compiler/dfa_test.go b/compiler/dfa_test.go
index a797313..2683815 100644
--- a/compiler/dfa_test.go
+++ b/compiler/dfa_test.go
@@ -2,10 +2,12 @@ package compiler
 
 import (
 	"testing"
+
+	"github.com/nihei9/maleeni/spec"
 )
 
 func TestGenDFA(t *testing.T) {
-	root, symTab, err := parse(map[int][]byte{
+	root, symTab, err := parse(map[spec.LexModeKindID][]byte{
 		1: []byte("(a|b)*abb"),
 	}, nil)
 	if err != nil {
@@ -94,7 +96,7 @@ func TestGenDFA(t *testing.T) {
 		t.Errorf("initial state is mismatched; want: %v, got: %v", s0.hash(), dfa.InitialState)
 	}
 
-	accTab := map[string]int{
+	accTab := map[string]spec.LexModeKindID{
 		s3.hash(): 1,
 	}
 	if len(dfa.AcceptingStatesTable) != len(accTab) {
diff --git a/compiler/parser.go b/compiler/parser.go
index 06a762d..1c9126a 100644
--- a/compiler/parser.go
+++ b/compiler/parser.go
@@ -7,6 +7,8 @@ import (
 	"fmt"
 	"io"
 	"strings"
+
+	"github.com/nihei9/maleeni/spec"
 )
 
 type ParseErrors struct {
@@ -23,7 +25,7 @@ func (e *ParseErrors) Error() string {
 }
 
 type ParseError struct {
-	ID      int
+	ID      spec.LexModeKindID
 	Pattern []byte
 	Cause   error
 	Details string
@@ -44,13 +46,13 @@ func raiseSyntaxError(synErr *SyntaxError) {
 
 type symbolTable struct {
 	symPos2Byte map[symbolPosition]byteRange
-	endPos2ID   map[symbolPosition]int
+	endPos2ID   map[symbolPosition]spec.LexModeKindID
 }
 
 func genSymbolTable(root astNode) *symbolTable {
 	symTab := &symbolTable{
 		symPos2Byte: map[symbolPosition]byteRange{},
-		endPos2ID:   map[symbolPosition]int{},
+		endPos2ID:   map[symbolPosition]spec.LexModeKindID{},
 	}
 	return genSymTab(symTab, root)
 }
@@ -76,7 +78,7 @@ func genSymTab(symTab *symbolTable, node astNode) *symbolTable {
 	return symTab
 }
 
-func parse(regexps map[int][]byte, fragments map[string][]byte) (astNode, *symbolTable, error) {
+func parse(regexps map[spec.LexModeKindID][]byte, fragments map[string][]byte) (astNode, *symbolTable, error) {
 	if len(regexps) == 0 {
 		return nil, nil, fmt.Errorf("parse() needs at least one token entry")
 	}
@@ -159,7 +161,7 @@ func parseFragments(fragments map[string][]byte) (map[string]astNode, error) {
 	return fragmentASTs, nil
 }
 
-func parseRegexp(regexps map[int][]byte, fragmentASTs map[string]astNode) (astNode, error) {
+func parseRegexp(regexps map[spec.LexModeKindID][]byte, fragmentASTs map[string]astNode) (astNode, error) {
 	symPos := symbolPositionMin
 	var root astNode
 	var perrs []*ParseError
diff --git a/compiler/parser_test.go b/compiler/parser_test.go
index 3394845..30e6130 100644
--- a/compiler/parser_test.go
+++ b/compiler/parser_test.go
@@ -4,6 +4,8 @@ import (
 	"fmt"
 	"reflect"
 	"testing"
+
+	"github.com/nihei9/maleeni/spec"
 )
 
 func symPos(n uint16) symbolPosition {
@@ -1197,7 +1199,7 @@ func TestParse(t *testing.T) {
 			for kind, pattern := range tt.fragments {
 				fragments[kind] = []byte(pattern)
 			}
-			ast, _, err := parse(map[int][]byte{
+			ast, _, err := parse(map[spec.LexModeKindID][]byte{
 				1: []byte(tt.pattern),
 			}, fragments)
 			if tt.syntaxError != nil {
@@ -1237,7 +1239,7 @@ func TestParse(t *testing.T) {
 }
 
 func TestParse_FollowAndSymbolTable(t *testing.T) {
-	root, symTab, err := parse(map[int][]byte{
+	root, symTab, err := parse(map[spec.LexModeKindID][]byte{
 		1: []byte("(a|b)*abb"),
 	}, nil)
 	if err != nil {
@@ -1295,7 +1297,7 @@ func TestParse_FollowAndSymbolTable(t *testing.T) {
 				symPos(4): entry(byte('b')),
 				symPos(5): entry(byte('b')),
 			},
-			endPos2ID: map[symbolPosition]int{
+			endPos2ID: map[symbolPosition]spec.LexModeKindID{
 				endPos(6): 1,
 			},
 		}
diff --git a/compiler/test_util_test.go b/compiler/test_util_test.go
index 2ead2c9..72e150b 100644
--- a/compiler/test_util_test.go
+++ b/compiler/test_util_test.go
@@ -1,5 +1,7 @@
 package compiler
 
+import "github.com/nihei9/maleeni/spec"
+
 func newRangeSymbolNodeWithPos(from, to byte, pos symbolPosition) *symbolNode {
 	n := newRangeSymbolNode(from, to)
 	n.pos = pos
@@ -13,7 +15,7 @@ func newSymbolNodeWithPos(v byte, pos symbolPosition) *symbolNode {
 }
 
 func newEndMarkerNodeWithPos(id int, pos symbolPosition) *endMarkerNode {
-	n := newEndMarkerNode(id)
+	n := newEndMarkerNode(spec.LexModeKindID(id))
 	n.pos = pos
 	return n
 }
diff --git a/driver/lexer.go b/driver/lexer.go
index 7ad2dd0..bad7dbd 100644
--- a/driver/lexer.go
+++ b/driver/lexer.go
@@ -56,69 +56,70 @@ func (s byteSequence) merge(a byteSequence) byteSequence {
 
 // Token representes a token.
 type Token struct {
-	// `Mode` represents a number that corresponds to a `ModeName`.
-	Mode spec.LexModeNum
+	// ModeID is an ID of a lex mode.
+	ModeID spec.LexModeID
 
-	// `ModeName` is a mode name that represents in which mode the lexer detected the token.
+	// ModeName is a name of a lex mode.
 	ModeName spec.LexModeName
 
-	// `KindID` is a unique ID among modes.
-	KindID int
+	// KindID is an ID of a kind. This is unique among all modes.
+	KindID spec.LexKindID
 
-	// `Kind` represents a number that corresponds to a `KindName`.
-	Kind int
+	// ModeKindID is an ID of a lexical kind. This is unique only within a mode.
+	// Note that you need to use KindID field if you want to identify a kind across all modes.
+	ModeKindID spec.LexModeKindID
 
-	// `KindName` is a kind name that represents what kind the token has.
-	KindName string
+	// KindName is a name of a lexical kind.
+	KindName spec.LexKindName
 
-	// If `EOF` is true, it means the token is the EOF token.
+	// When this field is true, it means the token is the EOF token.
 	EOF bool
 
-	// If `Invalid` is true, it means the token is an error token.
+	// When this field is true, it means the token is an error token.
 	Invalid bool
 
-	// `match` is a byte sequence matched a pattern of a lexical specification.
+	// match is a byte sequence matched a pattern of a lexical specification.
 	match byteSequence
 }
 
-func newToken(mode spec.LexModeNum, modeName spec.LexModeName, kindID int, modeKindID int, kindName string, match byteSequence) *Token {
+func newToken(modeID spec.LexModeID, modeName spec.LexModeName, kindID spec.LexKindID, modeKindID spec.LexModeKindID, kindName spec.LexKindName, match byteSequence) *Token {
 	return &Token{
-		Mode:     mode,
-		ModeName: modeName,
-		KindID:   kindID,
-		Kind:     modeKindID,
-		KindName: kindName,
-		match:    match,
+		ModeID:     modeID,
+		ModeName:   modeName,
+		KindID:     kindID,
+		ModeKindID: modeKindID,
+		KindName:   kindName,
+		match:      match,
 	}
 }
 
-func newEOFToken(mode spec.LexModeNum, modeName spec.LexModeName) *Token {
+func newEOFToken(modeID spec.LexModeID, modeName spec.LexModeName) *Token {
 	return &Token{
-		Mode:     mode,
-		ModeName: modeName,
-		Kind:     0,
-		EOF:      true,
+		ModeID:     modeID,
+		ModeName:   modeName,
+		ModeKindID: 0,
+		EOF:        true,
 	}
 }
 
-func newInvalidToken(mode spec.LexModeNum, modeName spec.LexModeName, match byteSequence) *Token {
+func newInvalidToken(modeID spec.LexModeID, modeName spec.LexModeName, match byteSequence) *Token {
 	return &Token{
-		Mode:     mode,
-		ModeName: modeName,
-		Kind:     0,
-		match:    match,
-		Invalid:  true,
+		ModeID:     modeID,
+		ModeName:   modeName,
+		ModeKindID: 0,
+		match:      match,
+		Invalid:    true,
 	}
 }
 
 func (t *Token) String() string {
 	if t.Invalid {
-		return fmt.Sprintf("!{mode: %v, mode name: %v, text: %v, byte: %v}", t.Mode, t.ModeName, t.Text(), t.Match())
+		return fmt.Sprintf("!{mode id: %v, mode name: %v, text: %v, byte: %v}", t.ModeID, t.ModeName, t.Text(), t.Match())
 	}
 	if t.EOF {
 		return "{eof}"
 	}
-	return fmt.Sprintf("{mode: %v, mode name: %v, kind: %v, kind name: %v, text: %v, byte: %v}", t.Mode, t.ModeName, t.Kind, t.KindName, t.Text(), t.Match())
+	return fmt.Sprintf("{mode id: %v, mode name: %v, kind id: %v, mode kind id: %v, kind name: %v, text: %v, byte: %v}", t.ModeID, t.ModeName, t.KindID, t.ModeKindID, t.KindName, t.Text(), t.Match())
 }
 
 // Match returns a byte slice matched a pattern of a lexical specification.
@@ -133,25 +134,25 @@ func (t *Token) Text() string {
 
 func (t *Token) MarshalJSON() ([]byte, error) {
 	return json.Marshal(struct {
-		Mode     int          `json:"mode"`
-		ModeName string       `json:"mode_name"`
-		KindID   int          `json:"kind_id"`
-		Kind     int          `json:"kind"`
-		KindName string       `json:"kind_name"`
-		Match    byteSequence `json:"match"`
-		Text     string       `json:"text"`
-		EOF      bool         `json:"eof"`
-		Invalid  bool         `json:"invalid"`
+		ModeID     int          `json:"mode_id"`
+		ModeName   string       `json:"mode_name"`
+		KindID     int          `json:"kind_id"`
+		ModeKindID int          `json:"mode_kind_id"`
+		KindName   string       `json:"kind_name"`
+		Match      byteSequence `json:"match"`
+		Text       string       `json:"text"`
+		EOF        bool         `json:"eof"`
+		Invalid    bool         `json:"invalid"`
 	}{
-		Mode:     t.Mode.Int(),
-		ModeName: t.ModeName.String(),
-		KindID:   t.KindID,
-		Kind:     t.Kind,
-		KindName: t.KindName,
-		Match:    t.match,
-		Text:     t.Text(),
-		EOF:      t.EOF,
-		Invalid:  t.Invalid,
+		ModeID:     t.ModeID.Int(),
+		ModeName:   t.ModeName.String(),
+		KindID:     t.KindID.Int(),
+		ModeKindID: t.ModeKindID.Int(),
+		KindName:   t.KindName.String(),
+		Match:      t.match,
+		Text:       t.Text(),
+		EOF:        t.EOF,
+		Invalid:    t.Invalid,
 	})
 }
 
@@ -180,7 +181,7 @@ type Lexer struct {
 	src             []byte
 	srcPtr          int
 	tokBuf          []*Token
-	modeStack       []spec.LexModeNum
+	modeStack       []spec.LexModeID
 	passiveModeTran bool
 	logger          log.Logger
 }
@@ -194,8 +195,8 @@ func NewLexer(clspec *spec.CompiledLexSpec, src io.Reader, opts ...LexerOption)
 		clspec: clspec,
 		src:    b,
 		srcPtr: 0,
-		modeStack: []spec.LexModeNum{
-			clspec.InitialMode,
+		modeStack: []spec.LexModeID{
+			clspec.InitialModeID,
 		},
 		passiveModeTran: false,
 		logger:          log.NewNopLogger(),
@@ -216,7 +217,7 @@ func (l *Lexer) Next() (*Token, error) {
   State:
     mode: #%v %v
     pointer: %v
-    token buffer: %v`, l.Mode(), l.clspec.Modes[l.Mode()], l.srcPtr, l.tokBuf)
+    token buffer: %v`, l.Mode(), l.clspec.ModeNames[l.Mode()], l.srcPtr, l.tokBuf)
 
 	if len(l.tokBuf) > 0 {
 		tok := l.tokBuf[0]
@@ -273,13 +274,13 @@ func (l *Lexer) nextAndTransition() (*Token, error) {
 		return tok, nil
 	}
 	spec := l.clspec.Specs[l.Mode()]
-	if spec.Pop[tok.Kind] == 1 {
+	if spec.Pop[tok.ModeKindID] == 1 {
 		err := l.PopMode()
 		if err != nil {
 			return nil, err
 		}
 	}
-	mode := spec.Push[tok.Kind]
+	mode := spec.Push[tok.ModeKindID]
 	if !mode.IsNil() {
 		l.PushMode(mode)
 	}
@@ -296,9 +297,9 @@ func (l *Lexer) nextAndTransition() (*Token, error) {
 
 func (l *Lexer) next() (*Token, error) {
 	mode := l.Mode()
-	modeName := l.clspec.Modes[mode]
+	modeName := l.clspec.ModeNames[mode]
 	spec := l.clspec.Specs[mode]
-	state := spec.DFA.InitialState
+	state := spec.DFA.InitialStateID
 	buf := []byte{}
 	unfixedBufLen := 0
 	var tok *Token
@@ -330,13 +331,13 @@ func (l *Lexer) next() (*Token, error) {
 		modeKindID := spec.DFA.AcceptingStates[state]
 		if modeKindID != 0 {
 			kindID := l.clspec.KindIDs[mode][modeKindID]
-			tok = newToken(mode, modeName, kindID.Int(), modeKindID, spec.Kinds[modeKindID].String(), newByteSequence(buf))
+			tok = newToken(mode, modeName, kindID, modeKindID, spec.KindNames[modeKindID], newByteSequence(buf))
 			unfixedBufLen = 0
 		}
 	}
 }
 
-func (l *Lexer) lookupNextState(mode spec.LexModeNum, state int, v int) (int, bool) {
+func (l *Lexer) lookupNextState(mode spec.LexModeID, state spec.StateID, v int) (spec.StateID, bool) {
 	switch l.clspec.CompressionLevel {
 	case 2:
 		tab := l.clspec.Specs[mode].DFA.Transition
@@ -349,24 +350,24 @@ func (l *Lexer) lookupNextState(mode spec.LexModeNum, state int, v int) (int, bo
 	case 1:
 		tab := l.clspec.Specs[mode].DFA.Transition
 		next := tab.UncompressedUniqueEntries[tab.RowNums[state]*tab.OriginalColCount+v]
-		if next == 0 {
-			return 0, false
+		if next == spec.StateIDNil {
+			return spec.StateIDNil, false
 		}
 		return next, true
 	}
-	spec := l.clspec.Specs[mode]
-	next := spec.DFA.UncompressedTransition[state*spec.DFA.ColCount+v]
-	if next == 0 {
-		return 0, false
+	modeSpec := l.clspec.Specs[mode]
+	next := modeSpec.DFA.UncompressedTransition[state.Int()*modeSpec.DFA.ColCount+v]
+	if next == spec.StateIDNil {
+		return spec.StateIDNil, false
 	}
 	return next, true
 }
 
-func (l *Lexer) Mode() spec.LexModeNum {
+func (l *Lexer) Mode() spec.LexModeID {
 	return l.modeStack[len(l.modeStack)-1]
 }
 
-func (l *Lexer) PushMode(mode spec.LexModeNum) {
+func (l *Lexer) PushMode(mode spec.LexModeID) {
 	l.modeStack = append(l.modeStack, mode)
 }
 
diff --git a/driver/lexer_test.go b/driver/lexer_test.go
index 79ee12e..5abe83c 100644
--- a/driver/lexer_test.go
+++ b/driver/lexer_test.go
@@ -16,7 +16,7 @@ func newLexEntry(modes []string, kind string, pattern string, push string, pop b
 		ms = append(ms, spec.LexModeName(m))
 	}
 	return &spec.LexEntry{
-		Kind:    spec.LexKind(kind),
+		Kind:    spec.LexKindName(kind),
 		Pattern: spec.LexPattern(pattern),
 		Modes:   ms,
 		Push:    spec.LexModeName(push),
@@ -26,7 +26,7 @@ func newLexEntry(modes []string, kind string, pattern string, push string, pop b
 
 func newLexEntryDefaultNOP(kind string, pattern string) *spec.LexEntry {
 	return &spec.LexEntry{
-		Kind:    spec.LexKind(kind),
+		Kind:    spec.LexKindName(kind),
 		Pattern: spec.LexPattern(pattern),
 		Modes: []spec.LexModeName{
 			spec.LexModeNameDefault,
@@ -36,18 +36,18 @@ func newLexEntryDefaultNOP(kind string, pattern string) *spec.LexEntry {
 
 func newLexEntryFragment(kind string, pattern string) *spec.LexEntry {
 	return &spec.LexEntry{
-		Kind:     spec.LexKind(kind),
+		Kind:     spec.LexKindName(kind),
 		Pattern:  spec.LexPattern(pattern),
 		Fragment: true,
 	}
 }
 
 func newTokenDefault(kindID int, modeKindID int, kindName string, match byteSequence) *Token {
-	return newToken(spec.LexModeNumDefault, spec.LexModeNameDefault, kindID, modeKindID, kindName, match)
+	return newToken(spec.LexModeIDDefault, spec.LexModeNameDefault, spec.LexKindID(kindID), spec.LexModeKindID(modeKindID), spec.LexKindName(kindName), match)
 }
 
 func newEOFTokenDefault() *Token {
-	return newEOFToken(spec.LexModeNumDefault, spec.LexModeNameDefault)
+	return newEOFToken(spec.LexModeIDDefault, spec.LexModeNameDefault)
 }
 
 func TestLexer_Next(t *testing.T) {
@@ -604,7 +604,7 @@ func TestLexer_Next(t *testing.T) {
 			},
 			passiveModeTran: true,
 			tran: func(l *Lexer, tok *Token) error {
-				switch l.clspec.Modes[l.Mode().Int()] {
+				switch l.clspec.ModeNames[l.Mode()] {
 				case "default":
 					switch tok.KindName {
 					case "push_1":
@@ -653,7 +653,7 @@ func TestLexer_Next(t *testing.T) {
 			// Active mode transition and an external transition function can be used together.
 			passiveModeTran: false,
 			tran: func(l *Lexer, tok *Token) error {
-				switch l.clspec.Modes[l.Mode().Int()] {
+				switch l.clspec.ModeNames[l.Mode()] {
 				case "mode_1":
 					switch tok.KindName {
 					case "push_2":
@@ -736,10 +736,10 @@ func TestLexer_Next(t *testing.T) {
 func testToken(t *testing.T, expected, actual *Token) {
 	t.Helper()
 
-	if actual.Mode != expected.Mode ||
+	if actual.ModeID != expected.ModeID ||
 		actual.ModeName != expected.ModeName ||
 		actual.KindID != expected.KindID ||
-		actual.Kind != expected.Kind ||
+		actual.ModeKindID != expected.ModeKindID ||
 		actual.KindName != expected.KindName ||
 		!bytes.Equal(actual.Match(), expected.Match()) ||
 		actual.EOF != expected.EOF ||
diff --git a/spec/spec.go b/spec/spec.go
index b8aae33..d4f6346 100644
--- a/spec/spec.go
+++ b/spec/spec.go
@@ -7,40 +7,56 @@ import (
 	"strings"
 )
 
-const lexKindPattern = "[A-Za-z_][0-9A-Za-z_]*"
+// LexKindID represents an ID of a lexical kind and is unique across all modes.
+type LexKindID int
+
+const (
+	LexKindIDNil = LexKindID(0)
+	LexKindIDMin = LexKindID(1)
+)
+
+func (id LexKindID) Int() int {
+	return int(id)
+}
+
+// LexModeKindID represents an ID of a lexical kind and is unique within a mode.
+// Use LexKindID to identify a kind across all modes uniquely.
+type LexModeKindID int
+
+const (
+	LexModeKindIDNil = LexKindID(0)
+	LexModeKindIDMin = LexKindID(1)
+)
 
-var lexKindRE = regexp.MustCompile(lexKindPattern)
+func (id LexModeKindID) Int() int {
+	return int(id)
+}
 
-type LexKind string
+// LexKindName represents a name of a lexical kind.
+type LexKindName string
 
-const LexKindNil = LexKind("")
+const LexKindNameNil = LexKindName("")
 
-func (k LexKind) String() string {
+func (k LexKindName) String() string {
 	return string(k)
 }
 
-func (k LexKind) validate() error {
+func (k LexKindName) validate() error {
 	if k == "" {
 		return fmt.Errorf("kind doesn't allow to be the empty string")
 	}
-	if !lexKindRE.Match([]byte(k)) {
-		return fmt.Errorf("kind must be %v", lexKindPattern)
+	if !lexKindNameRE.Match([]byte(k)) {
+		return fmt.Errorf("kind must be %v", lexKindNamePattern)
 	}
 	return nil
 }
 
-// LexKindID is a unique ID among modes.
-type LexKindID int
-
-func (id LexKindID) Int() int {
-	return int(id)
-}
+const lexKindNamePattern = "[A-Za-z_][0-9A-Za-z_]*"
 
-const (
-	LexKindIDNil = LexKindID(0)
-	LexKindIDMin = LexKindID(1)
-)
+var lexKindNameRE = regexp.MustCompile(lexKindNamePattern)
 
+// LexPattern represents a pattern of a lexeme.
+// The pattern is written in regular expression.
 type LexPattern string
 
 func (p LexPattern) validate() error {
@@ -50,10 +66,27 @@ func (p LexPattern) validate() error {
 	return nil
 }
 
-const lexModePattern = "[A-Za-z_][0-9A-Za-z_]*"
+// LexModeID represents an ID of a lex mode.
+type LexModeID int
 
-var lexModeRE = regexp.MustCompile(lexKindPattern)
+const (
+	LexModeIDNil     = LexModeID(0)
+	LexModeIDDefault = LexModeID(1)
+)
 
+func (n LexModeID) String() string {
+	return strconv.Itoa(int(n))
+}
+
+func (n LexModeID) Int() int {
+	return int(n)
+}
+
+func (n LexModeID) IsNil() bool {
+	return n == LexModeIDNil
+}
+
+// LexModeName represents a name of a lex mode.
 type LexModeName string
 
 const (
@@ -66,8 +99,8 @@ func (m LexModeName) String() string {
 }
 
 func (m LexModeName) validate() error {
-	if m.isNil() || !lexModeRE.Match([]byte(m)) {
-		return fmt.Errorf("mode must be %v", lexModePattern)
+	if m.isNil() || !lexModeNameRE.Match([]byte(m)) {
+		return fmt.Errorf("mode must be %v", lexModeNamePattern)
 	}
 	return nil
 }
@@ -76,31 +109,12 @@ func (m LexModeName) isNil() bool {
 	return m == LexModeNameNil
 }
 
-type LexModeNum int
+const lexModeNamePattern = "[A-Za-z_][0-9A-Za-z_]*"
 
-const (
-	LexModeNumNil     = LexModeNum(0)
-	LexModeNumDefault = LexModeNum(1)
-)
-
-func (n LexModeNum) String() string {
-	return strconv.Itoa(int(n))
-}
-
-func (n LexModeNum) Int() int {
-	return int(n)
-}
-
-func (n LexModeNum) Succ() LexModeNum {
-	return n + 1
-}
-
-func (n LexModeNum) IsNil() bool {
-	return n == LexModeNumNil
-}
+var lexModeNameRE = regexp.MustCompile(lexModeNamePattern)
 
 type LexEntry struct {
-	Kind     LexKind       `json:"kind"`
+	Kind     LexKindName   `json:"kind"`
 	Pattern  LexPattern    `json:"pattern"`
 	Modes    []LexModeName `json:"modes"`
 	Push     LexModeName   `json:"push"`
@@ -174,18 +188,35 @@ func (s *LexSpec) Validate() error {
 	return nil
 }
 
+// StateID represents an ID of a state of a transition table.
+type StateID int
+
+const (
+	// StateIDNil represents an empty entry of a transition table.
+	// When the driver reads this value, it raises an error meaning lexical analysis failed.
+	StateIDNil = StateID(0)
+
+	// StateIDMin is the minimum value of the state ID. All valid state IDs are represented as
+	// sequential numbers starting from this value.
+	StateIDMin = StateID(1)
+)
+
+func (id StateID) Int() int {
+	return int(id)
+}
+
 type RowDisplacementTable struct {
-	OriginalRowCount int   `json:"original_row_count"`
-	OriginalColCount int   `json:"original_col_count"`
-	EmptyValue       int   `json:"empty_value"`
-	Entries          []int `json:"entries"`
-	Bounds           []int `json:"bounds"`
-	RowDisplacement  []int `json:"row_displacement"`
+	OriginalRowCount int       `json:"original_row_count"`
+	OriginalColCount int       `json:"original_col_count"`
+	EmptyValue       StateID   `json:"empty_value"`
+	Entries          []StateID `json:"entries"`
+	Bounds           []int     `json:"bounds"`
+	RowDisplacement  []int     `json:"row_displacement"`
 }
 
 type UniqueEntriesTable struct {
 	UniqueEntries             *RowDisplacementTable `json:"unique_entries,omitempty"`
-	UncompressedUniqueEntries []int                 `json:"uncompressed_unique_entries,omitempty"`
+	UncompressedUniqueEntries []StateID             `json:"uncompressed_unique_entries,omitempty"`
 	RowNums                   []int                 `json:"row_nums"`
 	OriginalRowCount          int                   `json:"original_row_count"`
 	OriginalColCount          int                   `json:"original_col_count"`
@@ -193,25 +224,25 @@ type UniqueEntriesTable struct {
 }
 
 type TransitionTable struct {
-	InitialState           int                 `json:"initial_state"`
-	AcceptingStates        []int               `json:"accepting_states"`
+	InitialStateID         StateID             `json:"initial_state_id"`
+	AcceptingStates        []LexModeKindID     `json:"accepting_states"`
 	RowCount               int                 `json:"row_count"`
 	ColCount               int                 `json:"col_count"`
 	Transition             *UniqueEntriesTable `json:"transition,omitempty"`
-	UncompressedTransition []int               `json:"uncompressed_transition,omitempty"`
+	UncompressedTransition []StateID           `json:"uncompressed_transition,omitempty"`
 }
 
 type CompiledLexModeSpec struct {
-	Kinds []LexKind        `json:"kinds"`
-	Push  []LexModeNum     `json:"push"`
-	Pop   []int            `json:"pop"`
-	DFA   *TransitionTable `json:"dfa"`
+	KindNames []LexKindName    `json:"kind_names"`
+	Push      []LexModeID      `json:"push"`
+	Pop       []int            `json:"pop"`
+	DFA       *TransitionTable `json:"dfa"`
 }
 
 type CompiledLexSpec struct {
-	InitialMode      LexModeNum             `json:"initial_mode"`
-	Modes            []LexModeName          `json:"modes"`
-	Kinds            []LexKind              `json:"kinds"`
+	InitialModeID    LexModeID              `json:"initial_mode_id"`
+	ModeNames        []LexModeName          `json:"mode_names"`
+	KindNames        []LexKindName          `json:"kind_names"`
 	KindIDs          [][]LexKindID          `json:"kind_ids"`
 	CompressionLevel int                    `json:"compression_level"`
 	Specs            []*CompiledLexModeSpec `json:"specs"`
author	Ryo Nihei <nihei.dev@gmail.com>	2021-08-01 17:17:53 +0900
committer	Ryo Nihei <nihei.dev@gmail.com>	2021-08-01 21:18:34 +0900
commit	2433c27f26bc1be2d9b33f6550482abc48fa31ef (patch)
tree	c7cbc22929045e2d5dccdc37cc978138b59c1bdb
parent	Add unique kind IDs to tokens (diff)
download	tre-2433c27f26bc1be2d9b33f6550482abc48fa31ef.tar.gz tre-2433c27f26bc1be2d9b33f6550482abc48fa31ef.tar.xz