diff options
author | EuAndreh <eu@euandre.org> | 2024-11-29 13:42:49 -0300 |
---|---|---|
committer | EuAndreh <eu@euandre.org> | 2024-11-29 13:42:49 -0300 |
commit | 990b7e317e4244803bb46f8d7c92a2b23d160a31 (patch) | |
tree | 547adb8176ef6453a736e6adff6acf1c455edcb3 /ucd/unicode_data.go | |
parent | rm go.mod go.sum (diff) | |
download | tre-990b7e317e4244803bb46f8d7c92a2b23d160a31.tar.gz tre-990b7e317e4244803bb46f8d7c92a2b23d160a31.tar.xz |
rm ucd/*.go into separate project
Diffstat (limited to 'ucd/unicode_data.go')
-rw-r--r-- | ucd/unicode_data.go | 56 |
1 files changed, 0 insertions, 56 deletions
diff --git a/ucd/unicode_data.go b/ucd/unicode_data.go deleted file mode 100644 index e2a8e87..0000000 --- a/ucd/unicode_data.go +++ /dev/null @@ -1,56 +0,0 @@ -package ucd - -import "io" - -type UnicodeData struct { - GeneralCategory map[string][]*CodePointRange - - propValAliases *PropertyValueAliases -} - -// ParseUnicodeData parses the UnicodeData.txt. -func ParseUnicodeData(r io.Reader, propValAliases *PropertyValueAliases) (*UnicodeData, error) { - unicodeData := &UnicodeData{ - GeneralCategory: map[string][]*CodePointRange{}, - propValAliases: propValAliases, - } - - p := newParser(r) - for p.parse() { - if len(p.fields) == 0 { - continue - } - cp, err := p.fields[0].codePointRange() - if err != nil { - return nil, err - } - gc := p.fields[2].normalizedSymbol() - unicodeData.addGC(gc, cp) - } - if p.err != nil { - return nil, p.err - } - - return unicodeData, nil -} - -func (u *UnicodeData) addGC(gc string, cp *CodePointRange) { - // https://www.unicode.org/reports/tr44/#Empty_Fields - // > The data file UnicodeData.txt defines many property values in each record. When a field in a data line - // > for a code point is empty, that indicates that the property takes the default value for that code point. - if gc == "" { - return - } - - cps, ok := u.GeneralCategory[u.propValAliases.gcAbb(gc)] - if ok { - c := cps[len(cps)-1] - if cp.From-c.To == 1 { - c.To = cp.To - } else { - u.GeneralCategory[u.propValAliases.gcAbb(gc)] = append(cps, cp) - } - } else { - u.GeneralCategory[u.propValAliases.gcAbb(gc)] = []*CodePointRange{cp} - } -} |