1 files changed, 0 insertions, 155 deletions
diff --git a/src/urubu/ucd/parser.go b/src/urubu/ucd/parser.go
deleted file mode 100644
index 88d7134..0000000
--- a/src/urubu/ucd/parser.go
+++ /dev/null
@@ -1,155 +0,0 @@
-package ucd
-
-import (
-	"bufio"
-	"encoding/binary"
-	"encoding/hex"
-	"io"
-	"regexp"
-	"strings"
-)
-
-type CodePointRange struct {
-	From rune
-	To   rune
-}
-
-var codePointRangeNil = &CodePointRange{
-	From: 0,
-	To:   0,
-}
-
-type field string
-
-func (f field) codePointRange() (*CodePointRange, error) {
-	var from, to rune
-	var err error
-	cp := reCodePointRange.FindStringSubmatch(string(f))
-	from, err = decodeHexToRune(cp[1])
-	if err != nil {
-		return codePointRangeNil, err
-	}
-	if cp[2] != "" {
-		to, err = decodeHexToRune(cp[2])
-		if err != nil {
-			return codePointRangeNil, err
-		}
-	} else {
-		to = from
-	}
-	return &CodePointRange{
-		From: from,
-		To:   to,
-	}, nil
-}
-
-func decodeHexToRune(hexCodePoint string) (rune, error) {
-	h := hexCodePoint
-	if len(h)%2 != 0 {
-		h = "0" + h
-	}
-	b, err := hex.DecodeString(h)
-	if err != nil {
-		return 0, err
-	}
-	l := len(b)
-	for i := 0; i < 4-l; i++ {
-		b = append([]byte{0}, b...)
-	}
-	n := binary.BigEndian.Uint32(b)
-	return rune(n), nil
-}
-
-func (f field) symbol() string {
-	return string(f)
-}
-
-func (f field) normalizedSymbol() string {
-	return normalizeSymbolicValue(string(f))
-}
-
-var symValReplacer = strings.NewReplacer("_", "", "-", "", "\x20", "")
-
-// normalizeSymbolicValue normalizes a symbolic value. The normalized value meets UAX44-LM3.
-//
-// https://www.unicode.org/reports/tr44/#UAX44-LM3
-func normalizeSymbolicValue(s string) string {
-	v := strings.ToLower(symValReplacer.Replace(s))
-	if strings.HasPrefix(v, "is") && v != "is" {
-		return v[2:]
-	}
-	return v
-}
-
-var (
-	reLine           = regexp.MustCompile(`^\s*(.*?)\s*(#.*)?$`)
-	reCodePointRange = regexp.MustCompile(`^([[:xdigit:]]+)(?:..([[:xdigit:]]+))?$`)
-
-	specialCommentPrefix = "# @missing:"
-)
-
-// This parser can parse data files of Unicode Character Database (UCD).
-// Specifically, it has the following two functions:
-// - Converts each line of the data files into a slice of fields.
-// - Recognizes specially-formatted comments starting `@missing` and generates a slice of fields.
-//
-// However, for practical purposes, each field needs to be analyzed more specifically.
-// For instance, in UnicodeData.txt, the first field represents a range of code points,
-// so it needs to be recognized as a hexadecimal string.
-// You can perform more specific parsing for each file by implementing a dedicated parser that wraps this parser.
-//
-// https://www.unicode.org/reports/tr44/#Format_Conventions
-type parser struct {
-	scanner       *bufio.Scanner
-	fields        []field
-	defaultFields []field
-	err           error
-
-	fieldBuf        []field
-	defaultFieldBuf []field
-}
-
-func newParser(r io.Reader) *parser {
-	return &parser{
-		scanner:         bufio.NewScanner(r),
-		fieldBuf:        make([]field, 50),
-		defaultFieldBuf: make([]field, 50),
-	}
-}
-
-func (p *parser) parse() bool {
-	for p.scanner.Scan() {
-		p.parseRecord(p.scanner.Text())
-		if p.fields != nil || p.defaultFields != nil {
-			return true
-		}
-	}
-	p.err = p.scanner.Err()
-	return false
-}
-
-func (p *parser) parseRecord(src string) {
-	ms := reLine.FindStringSubmatch(src)
-	mFields := ms[1]
-	mComment := ms[2]
-	if mFields != "" {
-		p.fields = parseFields(p.fieldBuf, mFields)
-	} else {
-		p.fields = nil
-	}
-	if strings.HasPrefix(mComment, specialCommentPrefix) {
-		p.defaultFields = parseFields(p.defaultFieldBuf, strings.Replace(mComment, specialCommentPrefix, "", -1))
-	} else {
-		p.defaultFields = nil
-	}
-}
-
-func parseFields(buf []field, src string) []field {
-	n := 0
-	for _, f := range strings.Split(src, ";") {
-		buf[n] = field(strings.TrimSpace(f))
-		n++
-	}
-
-	return buf[:n]
-}