4 files changed, 977 insertions, 9 deletions
diff --git a/compiler/lexer.go b/compiler/lexer.go
index 91cd209..acc9658 100644
--- a/compiler/lexer.go
+++ b/compiler/lexer.go
@@ -19,6 +19,7 @@ const (
 	tokenKindGroupClose      = tokenKind(")")
 	tokenKindBExpOpen        = tokenKind("[")
 	tokenKindBExpClose       = tokenKind("]")
+	tokenKindCharRange       = tokenKind("-")
 	tokenKindEOF             = tokenKind("eof")
 )
 
@@ -124,6 +125,8 @@ func (l *lexer) nextInDefault(c rune) (*token, error) {
 
 func (l *lexer) nextInBExp(c rune) (*token, error) {
 	switch c {
+	case '-':
+		return newToken(tokenKindCharRange, nullChar), nil
 	case ']':
 		l.mode = lexerModeDefault
 		return newToken(tokenKindBExpClose, nullChar), nil
@@ -138,7 +141,7 @@ func (l *lexer) nextInBExp(c rune) (*token, error) {
 			}
 		}
 		switch {
-		case c == '\\' || c == ']':
+		case c == '\\' || c == '-' || c == ']':
 			return newToken(tokenKindChar, c), nil
 		default:
 			return nil, &SyntaxError{
diff --git a/compiler/lexer_test.go b/compiler/lexer_test.go
index e2e7d7a..be00317 100644
--- a/compiler/lexer_test.go
+++ b/compiler/lexer_test.go
@@ -31,7 +31,7 @@ func TestLexer(t *testing.T) {
 		},
 		{
 			caption: "lexer can recognize the special characters",
-			src:     ".*+?|()[]",
+			src:     ".*+?|()[-]",
 			tokens: []*token{
 				newToken(tokenKindAnyChar, nullChar),
 				newToken(tokenKindRepeat, nullChar),
@@ -41,13 +41,14 @@ func TestLexer(t *testing.T) {
 				newToken(tokenKindGroupOpen, nullChar),
 				newToken(tokenKindGroupClose, nullChar),
 				newToken(tokenKindBExpOpen, nullChar),
+				newToken(tokenKindCharRange, nullChar),
 				newToken(tokenKindBExpClose, nullChar),
 				newToken(tokenKindEOF, nullChar),
 			},
 		},
 		{
 			caption: "lexer can recognize the escape sequences",
-			src:     "\\\\\\.\\*\\+\\?\\|\\(\\)\\[\\]",
+			src:     "\\\\\\.\\*\\+\\?\\|\\(\\)\\[\\][\\-]",
 			tokens: []*token{
 				newToken(tokenKindChar, '\\'),
 				newToken(tokenKindChar, '.'),
@@ -59,12 +60,15 @@ func TestLexer(t *testing.T) {
 				newToken(tokenKindChar, ')'),
 				newToken(tokenKindChar, '['),
 				newToken(tokenKindChar, ']'),
+				newToken(tokenKindBExpOpen, nullChar),
+				newToken(tokenKindChar, '-'),
+				newToken(tokenKindBExpClose, nullChar),
 				newToken(tokenKindEOF, nullChar),
 			},
 		},
 		{
 			caption: "in a bracket expression, the special characters are also handled as normal characters",
-			src:     "[\\\\.*+?|()[\\]].*|()][",
+			src:     "[\\\\.*+?|()[\\]].*|()-][",
 			tokens: []*token{
 				newToken(tokenKindBExpOpen, nullChar),
 				newToken(tokenKindChar, '\\'),
@@ -83,6 +87,7 @@ func TestLexer(t *testing.T) {
 				newToken(tokenKindAlt, nullChar),
 				newToken(tokenKindGroupOpen, nullChar),
 				newToken(tokenKindGroupClose, nullChar),
+				newToken(tokenKindChar, '-'),
 				newToken(tokenKindBExpClose, nullChar),
 				newToken(tokenKindBExpOpen, nullChar),
 				newToken(tokenKindEOF, nullChar),
diff --git a/compiler/parser.go b/compiler/parser.go
index 04cd8ad..bae4e17 100644
--- a/compiler/parser.go
+++ b/compiler/parser.go
@@ -15,9 +15,9 @@ func (err *SyntaxError) Error() string {
 	return fmt.Sprintf("Syntax Error: %v", err.message)
 }
 
-func raiseSyntaxError(message string) {
+func raiseSyntaxError(format string, a ...interface{}) {
 	panic(&SyntaxError{
-		message: message,
+		message: fmt.Sprintf(format, a...),
 	})
 }
 
@@ -170,12 +170,12 @@ func (p *parser) parseSingleChar() astNode {
 	}
 	if p.consume(tokenKindBExpOpen) {
 		defer p.expect(tokenKindBExpClose)
-		left := p.parseNormalChar()
+		left := p.parseBExpElem()
 		if left == nil {
 			raiseSyntaxError("bracket expression must include at least one character")
 		}
 		for {
-			right := p.parseNormalChar()
+			right := p.parseBExpElem()
 			if right == nil {
 				break
 			}
@@ -185,6 +185,19 @@ func (p *parser) parseSingleChar() astNode {
 	}
 	return p.parseNormalChar()
 }
+
+func (p *parser) parseBExpElem() astNode {
+	left := p.parseNormalChar()
+	if !p.consume(tokenKindCharRange) {
+		return left
+	}
+	right := p.parseNormalChar()
+	if right == nil {
+		raiseSyntaxError("invalid range expression")
+	}
+	return genRangeAST(left, right)
+}
+
 func (p *parser) parseNormalChar() astNode {
 	if !p.consume(tokenKindChar) {
 		return nil
@@ -316,6 +329,694 @@ func genAnyCharAST(tok *token) astNode {
 	)
 }
 
+func genRangeAST(fromNode, toNode astNode) astNode {
+	from := genByteSeq(fromNode)
+	to := genByteSeq(toNode)
+	if !isValidOrder(from, to) {
+		raiseSyntaxError("range expression with invalid order: [%s-%s] ([%v-%v])", string(from), string(to), from, to)
+	}
+	switch len(from) {
+	case 1:
+		switch len(to) {
+		case 1:
+			return gen1ByteCharRangeAST(from, to)
+		case 2:
+			return newAltNode(
+				gen1ByteCharRangeAST(from, []byte{0x7f}),
+				gen2ByteCharRangeAST([]byte{0xc2, 0x80}, to),
+			)
+		case 3:
+			return newAltNode(
+				newAltNode(
+					gen1ByteCharRangeAST(from, []byte{0x7f}),
+					gen2ByteCharRangeAST([]byte{0xc2, 0x80}, []byte{0xdf, 0xbf}),
+				),
+				gen3ByteCharRangeAST([]byte{0xe0, 0xa0, 0x80}, to),
+			)
+		case 4:
+			return newAltNode(
+				newAltNode(
+					newAltNode(
+						gen1ByteCharRangeAST(from, []byte{0x7f}),
+						gen2ByteCharRangeAST([]byte{0xc2, 0x80}, []byte{0xdf, 0xbf}),
+					),
+					gen3ByteCharRangeAST([]byte{0xe0, 0xa0, 0x80}, []byte{0xef, 0xbf, 0xbf}),
+				),
+				gen4ByteCharRangeAST([]byte{0xf0, 0x90, 0x80}, to),
+			)
+		}
+	case 2:
+		switch len(to) {
+		case 2:
+			return gen2ByteCharRangeAST(from, to)
+		case 3:
+			return newAltNode(
+				gen2ByteCharRangeAST(from, []byte{0xdf, 0xbf}),
+				gen3ByteCharRangeAST([]byte{0xc2, 0x80}, to),
+			)
+		case 4:
+			return newAltNode(
+				newAltNode(
+					gen2ByteCharRangeAST(from, []byte{0xdf, 0xbf}),
+					gen3ByteCharRangeAST([]byte{0xc2, 0x80}, []byte{0xef, 0xbf, 0xbf}),
+				),
+				gen4ByteCharRangeAST([]byte{0xf0, 0x90, 0x80}, to),
+			)
+		}
+	case 3:
+		switch len(to) {
+		case 3:
+			return gen3ByteCharRangeAST(from, to)
+		case 4:
+			return newAltNode(
+				gen3ByteCharRangeAST(from, []byte{0xef, 0xbf, 0xbf}),
+				gen4ByteCharRangeAST([]byte{0xf0, 0x90, 0x80}, to),
+			)
+		}
+	case 4:
+		return gen4ByteCharRangeAST(from, to)
+	}
+	panic(fmt.Sprintf("invalid range; from: %v, to: %v", from, to))
+}
+
+func genByteSeq(node astNode) []byte {
+	switch n := node.(type) {
+	case *symbolNode:
+		return []byte{n.from}
+	case *concatNode:
+		seq := genByteSeq(n.left)
+		seq = append(seq, genByteSeq(n.right)...)
+		return seq
+	}
+	panic(fmt.Sprintf("genByteSeq() cannot handle %T: %v", node, node))
+}
+
+func isValidOrder(from, to []byte) bool {
+	if len(from) > len(to) {
+		return false
+	}
+	if len(from) < len(to) {
+		return true
+	}
+	for i, f := range from {
+		t := to[i]
+		if f > t {
+			return false
+		}
+		if f < t {
+			return true
+		}
+	}
+	return true
+}
+
+func gen1ByteCharRangeAST(from, to []byte) astNode {
+	return newRangeSymbolNode(nil, from[0], to[0], symbolPositionNil)
+}
+
+func gen2ByteCharRangeAST(from, to []byte) astNode {
+	from0 := from[0]
+	from1 := from[1]
+	to0 := to[0]
+	to1 := to[1]
+	switch {
+	case from0 == to0 && from1 == to1:
+		return newConcatNode(
+			newSymbolNode(nil, from0, symbolPositionNil),
+			newSymbolNode(nil, from1, symbolPositionNil),
+		)
+	case from0 == to0:
+		return newConcatNode(
+			newSymbolNode(nil, from0, symbolPositionNil),
+			newRangeSymbolNode(nil, from1, to1, symbolPositionNil),
+		)
+	default:
+		alt1 := newConcatNode(
+			newSymbolNode(nil, from0, symbolPositionNil),
+			newRangeSymbolNode(nil, from1, 0xbf, symbolPositionNil),
+		)
+		alt2 := newConcatNode(
+			newRangeSymbolNode(nil, from0+1, to0, symbolPositionNil),
+			newRangeSymbolNode(nil, 0x80, to1, symbolPositionNil),
+		)
+		return newAltNode(alt1, alt2)
+	}
+}
+
+type byteBoundsEntry struct {
+	min byte
+	max byte
+}
+
+var (
+	bounds3 = [][]byteBoundsEntry{
+		nil,
+		{{min: 0xe0, max: 0xe0}, {min: 0xa0, max: 0xbf}, {min: 0x80, max: 0xbf}},
+		{{min: 0xe1, max: 0xec}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}},
+		{{min: 0xed, max: 0xed}, {min: 0x80, max: 0x9f}, {min: 0x80, max: 0xbf}},
+		{{min: 0xee, max: 0xef}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}},
+	}
+
+	bounds4 = [][]byteBoundsEntry{
+		nil,
+		{{min: 0xf0, max: 0xf0}, {min: 0x90, max: 0xbf}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}},
+		{{min: 0xf1, max: 0xf3}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}},
+		{{min: 0xf4, max: 0xf4}, {min: 0x80, max: 0x8f}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}},
+	}
+)
+
+func get3ByteCharRangeNum(seq []byte) int {
+	head := seq[0]
+	switch {
+	case head == 0xe0:
+		return 1
+	case head >= 0xe1 && head <= 0xec:
+		return 2
+	case head == 0xed:
+		return 3
+	case head >= 0xee && head <= 0xef:
+		return 4
+	}
+	return 0
+}
+
+func get4ByteCharRangeNum(seq []byte) int {
+	head := seq[0]
+	switch {
+	case head == 0xf0:
+		return 1
+	case head >= 0xf1 && head <= 0xf3:
+		return 2
+	case head == 0xf4:
+		return 3
+	}
+	return 0
+}
+
+func gen3ByteCharRangeAST(from, to []byte) astNode {
+	from0 := from[0]
+	from1 := from[1]
+	from2 := from[2]
+	to0 := to[0]
+	to1 := to[1]
+	to2 := to[2]
+	switch {
+	case from0 == to0 && from1 == to1 && from2 == to2:
+		return newConcatNode(
+			newConcatNode(
+				newSymbolNode(nil, from0, symbolPositionNil),
+				newSymbolNode(nil, from1, symbolPositionNil),
+			),
+			newSymbolNode(nil, from2, symbolPositionNil),
+		)
+	case from0 == to0 && from1 == to1:
+		return newConcatNode(
+			newConcatNode(
+				newSymbolNode(nil, from0, symbolPositionNil),
+				newSymbolNode(nil, from1, symbolPositionNil),
+			),
+			newRangeSymbolNode(nil, from2, to2, symbolPositionNil),
+		)
+	case from0 == to0:
+		rangeNum := get3ByteCharRangeNum(from)
+		bounds := bounds3[rangeNum]
+		var alt astNode
+		alt = newConcatNode(
+			newConcatNode(
+				newSymbolNode(nil, from0, symbolPositionNil),
+				newSymbolNode(nil, from1, symbolPositionNil),
+			),
+			newRangeSymbolNode(nil, from2, bounds[2].max, symbolPositionNil),
+		)
+		if from1+1 < to1 {
+			alt = newAltNode(
+				alt,
+				newConcatNode(
+					newConcatNode(
+						newSymbolNode(nil, from0, symbolPositionNil),
+						newRangeSymbolNode(nil, from1+1, to1-1, symbolPositionNil),
+					),
+					newRangeSymbolNode(nil, bounds[2].min, bounds[2].max, symbolPositionNil),
+				),
+			)
+		}
+		alt = newAltNode(
+			alt,
+			newConcatNode(
+				newConcatNode(
+					newSymbolNode(nil, from0, symbolPositionNil),
+					newSymbolNode(nil, to1, symbolPositionNil),
+				),
+				newRangeSymbolNode(nil, bounds[2].min, to2, symbolPositionNil),
+			),
+		)
+		return alt
+	default:
+		fromRangeNum := get3ByteCharRangeNum(from)
+		toRangeNum := get3ByteCharRangeNum(to)
+		bounds := bounds3[fromRangeNum]
+		var alt astNode
+		alt = newConcatNode(
+			newConcatNode(
+				newSymbolNode(nil, from0, symbolPositionNil),
+				newSymbolNode(nil, from1, symbolPositionNil),
+			),
+			newRangeSymbolNode(nil, from2, bounds[2].max, symbolPositionNil),
+		)
+		if from1 < bounds[1].max {
+			alt = newAltNode(
+				alt,
+				newConcatNode(
+					newConcatNode(
+						newSymbolNode(nil, from0, symbolPositionNil),
+						newRangeSymbolNode(nil, from1+1, bounds[1].max, symbolPositionNil),
+					),
+					newRangeSymbolNode(nil, bounds[2].min, bounds[2].max, symbolPositionNil),
+				),
+			)
+		}
+		if fromRangeNum == toRangeNum {
+			if from0+1 < to0 {
+				alt = newAltNode(
+					alt,
+					newConcatNode(
+						newConcatNode(
+							newRangeSymbolNode(nil, from0+1, to0-1, symbolPositionNil),
+							newRangeSymbolNode(nil, bounds[1].min, bounds[1].max, symbolPositionNil),
+						),
+						newRangeSymbolNode(nil, bounds[2].min, bounds[2].max, symbolPositionNil),
+					),
+				)
+			}
+			if to1 > bounds[1].min {
+				alt = newAltNode(
+					alt,
+					newConcatNode(
+						newConcatNode(
+							newSymbolNode(nil, to0, symbolPositionNil),
+							newRangeSymbolNode(nil, bounds[1].min, to1-1, symbolPositionNil),
+						),
+						newRangeSymbolNode(nil, bounds[2].min, bounds[2].max, symbolPositionNil),
+					),
+				)
+			}
+			alt = newAltNode(
+				alt,
+				newConcatNode(
+					newConcatNode(
+						newSymbolNode(nil, to0, symbolPositionNil),
+						newSymbolNode(nil, to1, symbolPositionNil),
+					),
+					newRangeSymbolNode(nil, bounds[2].min, to2, symbolPositionNil),
+				),
+			)
+			return alt
+		}
+		for rangeNum := fromRangeNum + 1; rangeNum < toRangeNum; rangeNum++ {
+			bounds := bounds3[rangeNum]
+			alt = newAltNode(
+				alt,
+				newConcatNode(
+					newConcatNode(
+						newRangeSymbolNode(nil, bounds[0].min, bounds[0].max, symbolPositionNil),
+						newRangeSymbolNode(nil, bounds[1].min, bounds[1].max, symbolPositionNil),
+					),
+					newRangeSymbolNode(nil, bounds[2].min, bounds[2].max, symbolPositionNil),
+				),
+			)
+		}
+		bounds = bounds3[toRangeNum]
+		if to0 > bounds[0].min {
+			alt = newAltNode(
+				alt,
+				newConcatNode(
+					newConcatNode(
+						newRangeSymbolNode(nil, bounds[0].min, to0-1, symbolPositionNil),
+						newRangeSymbolNode(nil, bounds[1].min, bounds[1].max, symbolPositionNil),
+					),
+					newRangeSymbolNode(nil, bounds[2].min, bounds[2].max, symbolPositionNil),
+				),
+			)
+		}
+		if to1 > bounds[1].min {
+			alt = newAltNode(
+				alt,
+				newConcatNode(
+					newConcatNode(
+						newSymbolNode(nil, to0, symbolPositionNil),
+						newRangeSymbolNode(nil, bounds[1].min, to1-1, symbolPositionNil),
+					),
+					newRangeSymbolNode(nil, bounds[2].min, bounds[2].max, symbolPositionNil),
+				),
+			)
+		}
+		alt = newAltNode(
+			alt,
+			newConcatNode(
+				newConcatNode(
+					newSymbolNode(nil, to0, symbolPositionNil),
+					newSymbolNode(nil, to1, symbolPositionNil),
+				),
+				newRangeSymbolNode(nil, bounds[2].min, to2, symbolPositionNil),
+			),
+		)
+		return alt
+	}
+}
+
+func gen4ByteCharRangeAST(from, to []byte) astNode {
+	from0 := from[0]
+	from1 := from[1]
+	from2 := from[2]
+	from3 := from[3]
+	to0 := to[0]
+	to1 := to[1]
+	to2 := to[2]
+	to3 := to[3]
+	switch {
+	case from0 == to0 && from1 == to1 && from2 == to2 && from3 == to3:
+		return newConcatNode(
+			newConcatNode(
+				newConcatNode(
+					newSymbolNode(nil, from0, symbolPositionNil),
+					newSymbolNode(nil, from1, symbolPositionNil),
+				),
+				newSymbolNode(nil, from2, symbolPositionNil),
+			),
+			newSymbolNode(nil, from3, symbolPositionNil),
+		)
+	case from0 == to0 && from1 == to1 && from2 == to2:
+		return newConcatNode(
+			newConcatNode(
+				newConcatNode(
+					newSymbolNode(nil, from0, symbolPositionNil),
+					newSymbolNode(nil, from1, symbolPositionNil),
+				),
+				newSymbolNode(nil, from2, symbolPositionNil),
+			),
+			newRangeSymbolNode(nil, from3, to3, symbolPositionNil),
+		)
+	case from0 == to0 && from1 == to1:
+		rangeNum := get4ByteCharRangeNum(from)
+		bounds := bounds4[rangeNum]
+		var alt astNode
+		alt = newConcatNode(
+			newConcatNode(
+				newConcatNode(
+					newSymbolNode(nil, from0, symbolPositionNil),
+					newSymbolNode(nil, from1, symbolPositionNil),
+				),
+				newSymbolNode(nil, from2, symbolPositionNil),
+			),
+			newRangeSymbolNode(nil, from3, bounds[3].max, symbolPositionNil),
+		)
+		if from2+1 < to2 {
+			alt = newAltNode(
+				alt,
+				newConcatNode(
+					newConcatNode(
+						newConcatNode(
+							newSymbolNode(nil, from0, symbolPositionNil),
+							newSymbolNode(nil, from1, symbolPositionNil),
+						),
+						newRangeSymbolNode(nil, from2+1, to2-1, symbolPositionNil),
+					),
+					newRangeSymbolNode(nil, bounds[3].min, bounds[3].max, symbolPositionNil),
+				),
+			)
+		}
+		alt = newAltNode(
+			alt,
+			newConcatNode(
+				newConcatNode(
+					newConcatNode(
+						newSymbolNode(nil, from0, symbolPositionNil),
+						newSymbolNode(nil, from1, symbolPositionNil),
+					),
+					newSymbolNode(nil, to2, symbolPositionNil),
+				),
+				newRangeSymbolNode(nil, bounds[3].min, to3, symbolPositionNil),
+			),
+		)
+		return alt
+	case from0 == to0:
+		rangeNum := get4ByteCharRangeNum(from)
+		bounds := bounds4[rangeNum]
+		var alt astNode
+		alt = newConcatNode(
+			newConcatNode(
+				newConcatNode(
+					newSymbolNode(nil, from0, symbolPositionNil),
+					newSymbolNode(nil, from1, symbolPositionNil),
+				),
+				newSymbolNode(nil, from2, symbolPositionNil),
+			),
+			newRangeSymbolNode(nil, from3, bounds[3].max, symbolPositionNil),
+		)
+		if from2 < bounds[2].max {
+			alt = newAltNode(
+				alt,
+				newConcatNode(
+					newConcatNode(
+						newConcatNode(
+							newSymbolNode(nil, from0, symbolPositionNil),
+							newSymbolNode(nil, from1, symbolPositionNil),
+						),
+						newRangeSymbolNode(nil, from2+1, bounds[2].max, symbolPositionNil),
+					),
+					newRangeSymbolNode(nil, bounds[3].min, bounds[3].max, symbolPositionNil),
+				),
+			)
+		}
+		if from1+1 < to1 {
+			alt = newAltNode(
+				alt,
+				newConcatNode(
+					newConcatNode(
+						newConcatNode(
+							newSymbolNode(nil, from0, symbolPositionNil),
+							newRangeSymbolNode(nil, from1+1, to1-1, symbolPositionNil),
+						),
+						newRangeSymbolNode(nil, bounds[2].min, bounds[2].max, symbolPositionNil),
+					),
+					newRangeSymbolNode(nil, bounds[3].min, bounds[3].max, symbolPositionNil),
+				),
+			)
+		}
+		if to2 > bounds[2].min {
+			alt = newAltNode(
+				alt,
+				newConcatNode(
+					newConcatNode(
+						newConcatNode(
+							newSymbolNode(nil, from0, symbolPositionNil),
+							newSymbolNode(nil, to1, symbolPositionNil),
+						),
+						newRangeSymbolNode(nil, bounds[2].min, to2-1, symbolPositionNil),
+					),
+					newRangeSymbolNode(nil, bounds[3].min, bounds[3].max, symbolPositionNil),
+				),
+			)
+		}
+		alt = newAltNode(
+			alt,
+			newConcatNode(
+				newConcatNode(
+					newConcatNode(
+						newSymbolNode(nil, from0, symbolPositionNil),
+						newSymbolNode(nil, to1, symbolPositionNil),
+					),
+					newSymbolNode(nil, to2, symbolPositionNil),
+				),
+				newRangeSymbolNode(nil, bounds[3].min, to3, symbolPositionNil),
+			),
+		)
+		return alt
+	default:
+		fromRangeNum := get4ByteCharRangeNum(from)
+		toRangeNum := get4ByteCharRangeNum(to)
+		bounds := bounds4[fromRangeNum]
+		var alt astNode
+		alt = newConcatNode(
+			newConcatNode(
+				newConcatNode(
+					newSymbolNode(nil, from0, symbolPositionNil),
+					newSymbolNode(nil, from1, symbolPositionNil),
+				),
+				newSymbolNode(nil, from2, symbolPositionNil),
+			),
+			newRangeSymbolNode(nil, from3, bounds[3].max, symbolPositionNil),
+		)
+		if from2 < bounds[2].max {
+			alt = newAltNode(
+				alt,
+				newConcatNode(
+					newConcatNode(
+						newConcatNode(
+							newSymbolNode(nil, from0, symbolPositionNil),
+							newSymbolNode(nil, from1, symbolPositionNil),
+						),
+						newRangeSymbolNode(nil, from2+1, bounds[2].max, symbolPositionNil),
+					),
+					newRangeSymbolNode(nil, bounds[3].min, bounds[3].max, symbolPositionNil),
+				),
+			)
+		}
+		if from1 < bounds[1].max {
+			alt = newAltNode(
+				alt,
+				newConcatNode(
+					newConcatNode(
+						newConcatNode(
+							newSymbolNode(nil, from0, symbolPositionNil),
+							newRangeSymbolNode(nil, from1+1, bounds[1].max, symbolPositionNil),
+						),
+						newRangeSymbolNode(nil, bounds[2].min, bounds[2].max, symbolPositionNil),
+					),
+					newRangeSymbolNode(nil, bounds[3].min, bounds[3].max, symbolPositionNil),
+				),
+			)
+		}
+		if fromRangeNum == toRangeNum {
+			if from0+1 < to0 {
+				alt = newAltNode(
+					alt,
+					newConcatNode(
+						newConcatNode(
+							newConcatNode(
+								newRangeSymbolNode(nil, from0+1, to0-1, symbolPositionNil),
+								newRangeSymbolNode(nil, bounds[1].min, bounds[1].max, symbolPositionNil),
+							),
+							newRangeSymbolNode(nil, bounds[2].min, bounds[2].max, symbolPositionNil),
+						),
+						newRangeSymbolNode(nil, bounds[3].min, bounds[3].max, symbolPositionNil),
+					),
+				)
+			}
+			if to1 > bounds[1].min {
+				alt = newAltNode(
+					alt,
+					newConcatNode(
+						newConcatNode(
+							newConcatNode(
+								newSymbolNode(nil, to0, symbolPositionNil),
+								newRangeSymbolNode(nil, bounds[1].min, to1-1, symbolPositionNil),
+							),
+							newRangeSymbolNode(nil, bounds[2].min, bounds[2].max, symbolPositionNil),
+						),
+						newRangeSymbolNode(nil, bounds[3].min, bounds[3].max, symbolPositionNil),
+					),
+				)
+			}
+			if to2 > bounds[2].min {
+				alt = newAltNode(
+					alt,
+					newConcatNode(
+						newConcatNode(
+							newConcatNode(
+								newSymbolNode(nil, to0, symbolPositionNil),
+								newSymbolNode(nil, to1, symbolPositionNil),
+							),
+							newRangeSymbolNode(nil, bounds[2].min, to2-1, symbolPositionNil),
+						),
+						newRangeSymbolNode(nil, bounds[3].min, bounds[3].max, symbolPositionNil),
+					),
+				)
+			}
+			alt = newAltNode(
+				alt,
+				newConcatNode(
+					newConcatNode(
+						newConcatNode(
+							newSymbolNode(nil, to0, symbolPositionNil),
+							newSymbolNode(nil, to1, symbolPositionNil),
+						),
+						newSymbolNode(nil, to2, symbolPositionNil),
+					),
+					newRangeSymbolNode(nil, bounds[3].min, to3, symbolPositionNil),
+				),
+			)
+			return alt
+		}
+		for rangeNum := fromRangeNum + 1; rangeNum < toRangeNum; rangeNum++ {
+			bounds := bounds4[rangeNum]
+			alt = newAltNode(
+				alt,
+				newConcatNode(
+					newConcatNode(
+						newConcatNode(
+							newRangeSymbolNode(nil, bounds[0].min, bounds[0].max, symbolPositionNil),
+							newRangeSymbolNode(nil, bounds[1].min, bounds[1].max, symbolPositionNil),
+						),
+						newRangeSymbolNode(nil, bounds[2].min, bounds[2].max, symbolPositionNil),
+					),
+					newRangeSymbolNode(nil, bounds[3].min, bounds[3].max, symbolPositionNil),
+				),
+			)
+		}
+		bounds = bounds4[toRangeNum]
+		if to0 > bounds[0].min {
+			alt = newAltNode(
+				alt,
+				newConcatNode(
+					newConcatNode(
+						newConcatNode(
+							newRangeSymbolNode(nil, bounds[0].min, to0-1, symbolPositionNil),
+							newRangeSymbolNode(nil, bounds[1].min, bounds[1].max, symbolPositionNil),
+						),
+						newRangeSymbolNode(nil, bounds[2].min, bounds[2].max, symbolPositionNil),
+					),
+					newRangeSymbolNode(nil, bounds[3].min, bounds[3].max, symbolPositionNil),
+				),
+			)
+		}
+		if to1 > bounds[1].min {
+			alt = newAltNode(
+				alt,
+				newConcatNode(
+					newConcatNode(
+						newConcatNode(
+							newSymbolNode(nil, to0, symbolPositionNil),
+							newRangeSymbolNode(nil, bounds[1].min, to1-1, symbolPositionNil),
+						),
+						newRangeSymbolNode(nil, bounds[2].min, bounds[2].max, symbolPositionNil),
+					),
+					newRangeSymbolNode(nil, bounds[3].min, bounds[3].max, symbolPositionNil),
+				),
+			)
+		}
+		if to2 > bounds[2].min {
+			alt = newAltNode(
+				alt,
+				newConcatNode(
+					newConcatNode(
+						newConcatNode(
+							newSymbolNode(nil, to0, symbolPositionNil),
+							newSymbolNode(nil, to1, symbolPositionNil),
+						),
+						newRangeSymbolNode(nil, bounds[2].min, to2-1, symbolPositionNil),
+					),
+					newRangeSymbolNode(nil, bounds[3].min, bounds[3].max, symbolPositionNil),
+				),
+			)
+		}
+		alt = newAltNode(
+			alt,
+			newConcatNode(
+				newConcatNode(
+					newConcatNode(
+						newSymbolNode(nil, to0, symbolPositionNil),
+						newSymbolNode(nil, to1, symbolPositionNil),
+					),
+					newSymbolNode(nil, to2, symbolPositionNil),
+				),
+				newRangeSymbolNode(nil, bounds[3].min, to3, symbolPositionNil),
+			),
+		)
+		return alt
+	}
+}
+
 func (p *parser) expect(expected tokenKind) {
 	if !p.consume(expected) {
 		tok := p.peekedTok
diff --git a/driver/lexer_test.go b/driver/lexer_test.go
index 283d5fe..bdf5f03 100644
--- a/driver/lexer_test.go
+++ b/driver/lexer_test.go
@@ -132,6 +132,265 @@ func TestLexer_Next(t *testing.T) {
 				newEOFToken(),
 			},
 		},
+		{
+			lspec: &spec.LexSpec{
+				Entries: []*spec.LexEntry{
+					// all 1 byte characters
+					spec.NewLexEntry("1ByteChar", "[\x00-\x7f]"),
+				},
+			},
+			src: string([]byte{
+				0x00,
+				0x01,
+				0x7e,
+				0x7f,
+			}),
+			tokens: []*Token{
+				newToken(1, "1ByteChar", []byte{0x00}),
+				newToken(1, "1ByteChar", []byte{0x01}),
+				newToken(1, "1ByteChar", []byte{0x7e}),
+				newToken(1, "1ByteChar", []byte{0x7f}),
+				newEOFToken(),
+			},
+		},
+		{
+			lspec: &spec.LexSpec{
+				Entries: []*spec.LexEntry{
+					// all 2 byte characters
+					spec.NewLexEntry("2ByteChar", "[\xc2\x80-\xdf\xbf]"),
+				},
+			},
+			src: string([]byte{
+				0xc2, 0x80,
+				0xc2, 0x81,
+				0xdf, 0xbe,
+				0xdf, 0xbf,
+			}),
+			tokens: []*Token{
+				newToken(1, "2ByteChar", []byte{0xc2, 0x80}),
+				newToken(1, "2ByteChar", []byte{0xc2, 0x81}),
+				newToken(1, "2ByteChar", []byte{0xdf, 0xbe}),
+				newToken(1, "2ByteChar", []byte{0xdf, 0xbf}),
+				newEOFToken(),
+			},
+		},
+		{
+			lspec: &spec.LexSpec{
+				Entries: []*spec.LexEntry{
+					// All bytes are the same.
+					spec.NewLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xa0\x80]"),
+				},
+			},
+			src: string([]byte{
+				0xe0, 0xa0, 0x80,
+			}),
+			tokens: []*Token{
+				newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
+				newEOFToken(),
+			},
+		},
+		{
+			lspec: &spec.LexSpec{
+				Entries: []*spec.LexEntry{
+					// The first two bytes are the same.
+					spec.NewLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xa0\xbf]"),
+				},
+			},
+			src: string([]byte{
+				0xe0, 0xa0, 0x80,
+				0xe0, 0xa0, 0x81,
+				0xe0, 0xa0, 0xbe,
+				0xe0, 0xa0, 0xbf,
+			}),
+			tokens: []*Token{
+				newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
+				newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}),
+				newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0xbe}),
+				newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0xbf}),
+				newEOFToken(),
+			},
+		},
+		{
+			lspec: &spec.LexSpec{
+				Entries: []*spec.LexEntry{
+					// The first byte are the same.
+					spec.NewLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xbf\xbf]"),
+				},
+			},
+			src: string([]byte{
+				0xe0, 0xa0, 0x80,
+				0xe0, 0xa0, 0x81,
+				0xe0, 0xbf, 0xbe,
+				0xe0, 0xbf, 0xbf,
+			}),
+			tokens: []*Token{
+				newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
+				newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}),
+				newToken(1, "3ByteChar", []byte{0xe0, 0xbf, 0xbe}),
+				newToken(1, "3ByteChar", []byte{0xe0, 0xbf, 0xbf}),
+				newEOFToken(),
+			},
+		},
+		{
+			lspec: &spec.LexSpec{
+				Entries: []*spec.LexEntry{
+					// all 3 byte characters
+					spec.NewLexEntry("3ByteChar", "[\xe0\xa0\x80-\xef\xbf\xbf]"),
+				},
+			},
+			src: string([]byte{
+				0xe0, 0xa0, 0x80,
+				0xe0, 0xa0, 0x81,
+				0xe0, 0xbf, 0xbe,
+				0xe0, 0xbf, 0xbf,
+				0xe1, 0x80, 0x80,
+				0xe1, 0x80, 0x81,
+				0xec, 0xbf, 0xbe,
+				0xec, 0xbf, 0xbf,
+				0xed, 0x80, 0x80,
+				0xed, 0x80, 0x81,
+				0xed, 0x9f, 0xbe,
+				0xed, 0x9f, 0xbf,
+				0xee, 0x80, 0x80,
+				0xee, 0x80, 0x81,
+				0xef, 0xbf, 0xbe,
+				0xef, 0xbf, 0xbf,
+			}),
+			tokens: []*Token{
+				newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
+				newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}),
+				newToken(1, "3ByteChar", []byte{0xe0, 0xbf, 0xbe}),
+				newToken(1, "3ByteChar", []byte{0xe0, 0xbf, 0xbf}),
+				newToken(1, "3ByteChar", []byte{0xe1, 0x80, 0x80}),
+				newToken(1, "3ByteChar", []byte{0xe1, 0x80, 0x81}),
+				newToken(1, "3ByteChar", []byte{0xec, 0xbf, 0xbe}),
+				newToken(1, "3ByteChar", []byte{0xec, 0xbf, 0xbf}),
+				newToken(1, "3ByteChar", []byte{0xed, 0x80, 0x80}),
+				newToken(1, "3ByteChar", []byte{0xed, 0x80, 0x81}),
+				newToken(1, "3ByteChar", []byte{0xed, 0x9f, 0xbe}),
+				newToken(1, "3ByteChar", []byte{0xed, 0x9f, 0xbf}),
+				newToken(1, "3ByteChar", []byte{0xee, 0x80, 0x80}),
+				newToken(1, "3ByteChar", []byte{0xee, 0x80, 0x81}),
+				newToken(1, "3ByteChar", []byte{0xef, 0xbf, 0xbe}),
+				newToken(1, "3ByteChar", []byte{0xef, 0xbf, 0xbf}),
+				newEOFToken(),
+			},
+		},
+		{
+			lspec: &spec.LexSpec{
+				Entries: []*spec.LexEntry{
+					// All bytes are the same.
+					spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\x80\x80]"),
+				},
+			},
+			src: string([]byte{
+				0xf0, 0x90, 0x80, 0x80,
+			}),
+			tokens: []*Token{
+				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
+				newEOFToken(),
+			},
+		},
+		{
+			lspec: &spec.LexSpec{
+				Entries: []*spec.LexEntry{
+					// The first 3 bytes are the same.
+					spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\x80\xbf]"),
+				},
+			},
+			src: string([]byte{
+				0xf0, 0x90, 0x80, 0x80,
+				0xf0, 0x90, 0x80, 0x81,
+				0xf0, 0x90, 0x80, 0xbe,
+				0xf0, 0x90, 0x80, 0xbf,
+			}),
+			tokens: []*Token{
+				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
+				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
+				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0xbe}),
+				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0xbf}),
+				newEOFToken(),
+			},
+		},
+		{
+			lspec: &spec.LexSpec{
+				Entries: []*spec.LexEntry{
+					// The first 2 bytes are the same.
+					spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\xbf\xbf]"),
+				},
+			},
+			src: string([]byte{
+				0xf0, 0x90, 0x80, 0x80,
+				0xf0, 0x90, 0x80, 0x81,
+				0xf0, 0x90, 0xbf, 0xbe,
+				0xf0, 0x90, 0xbf, 0xbf,
+			}),
+			tokens: []*Token{
+				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
+				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
+				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0xbf, 0xbe}),
+				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0xbf, 0xbf}),
+				newEOFToken(),
+			},
+		},
+		{
+			lspec: &spec.LexSpec{
+				Entries: []*spec.LexEntry{
+					// The first byte are the same.
+					spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\xbf\xbf\xbf]"),
+				},
+			},
+			src: string([]byte{
+				0xf0, 0x90, 0x80, 0x80,
+				0xf0, 0x90, 0x80, 0x81,
+				0xf0, 0xbf, 0xbf, 0xbe,
+				0xf0, 0xbf, 0xbf, 0xbf,
+			}),
+			tokens: []*Token{
+				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
+				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
+				newToken(1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbe}),
+				newToken(1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
+				newEOFToken(),
+			},
+		},
+		{
+			lspec: &spec.LexSpec{
+				Entries: []*spec.LexEntry{
+					// all 4 byte characters
+					spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf4\x8f\xbf\xbf]"),
+				},
+			},
+			src: string([]byte{
+				0xf0, 0x90, 0x80, 0x80,
+				0xf0, 0x90, 0x80, 0x81,
+				0xf0, 0xbf, 0xbf, 0xbe,
+				0xf0, 0xbf, 0xbf, 0xbf,
+				0xf1, 0x80, 0x80, 0x80,
+				0xf1, 0x80, 0x80, 0x81,
+				0xf3, 0xbf, 0xbf, 0xbe,
+				0xf3, 0xbf, 0xbf, 0xbf,
+				0xf4, 0x80, 0x80, 0x80,
+				0xf4, 0x80, 0x80, 0x81,
+				0xf4, 0x8f, 0xbf, 0xbe,
+				0xf4, 0x8f, 0xbf, 0xbf,
+			}),
+			tokens: []*Token{
+				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
+				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
+				newToken(1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbe}),
+				newToken(1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
+				newToken(1, "4ByteChar", []byte{0xf1, 0x80, 0x80, 0x80}),
+				newToken(1, "4ByteChar", []byte{0xf1, 0x80, 0x80, 0x81}),
+				newToken(1, "4ByteChar", []byte{0xf3, 0xbf, 0xbf, 0xbe}),
+				newToken(1, "4ByteChar", []byte{0xf3, 0xbf, 0xbf, 0xbf}),
+				newToken(1, "4ByteChar", []byte{0xf4, 0x80, 0x80, 0x80}),
+				newToken(1, "4ByteChar", []byte{0xf4, 0x80, 0x80, 0x81}),
+				newToken(1, "4ByteChar", []byte{0xf4, 0x8f, 0xbf, 0xbe}),
+				newToken(1, "4ByteChar", []byte{0xf4, 0x8f, 0xbf, 0xbf}),
+				newEOFToken(),
+			},
+		},
 	}
 	for _, tt := range test {
 		clspec, err := compiler.Compile(tt.lspec)
@@ -149,7 +408,7 @@ func TestLexer_Next(t *testing.T) {
 				break
 			}
 			testToken(t, eTok, tok)
-			t.Logf("token: ID: %v, Match: %+v Text: \"%v\", EOF: %v, Invalid: %v", tok.ID, tok.Match, string(tok.Match), tok.EOF, tok.Invalid)
+			// t.Logf("token: ID: %v, Match: %+v Text: \"%v\", EOF: %v, Invalid: %v", tok.ID, tok.Match, string(tok.Match), tok.EOF, tok.Invalid)
 			if tok.EOF {
 				break
 			}