From 0a778f7f02363f30764f52ebada6157a87374d3f Mon Sep 17 00:00:00 2001 From: Nigel Tao Date: Fri, 11 Oct 2013 07:29:40 +1100 Subject: [PATCH] freetype/truetype: parse UCS-4 encoded cmap tables. R=bsiegert CC=golang-dev https://codereview.appspot.com/14548046 --- freetype/truetype/truetype.go | 99 +++++++++++++-------- freetype/truetype/truetype_test.go | 136 +++++++++++++++++++++++++---- 2 files changed, 182 insertions(+), 53 deletions(-) diff --git a/freetype/truetype/truetype.go b/freetype/truetype/truetype.go index ea23ccf..fda3689 100644 --- a/freetype/truetype/truetype.go +++ b/freetype/truetype/truetype.go @@ -87,7 +87,7 @@ const ( // A cm holds a parsed cmap entry. type cm struct { - start, end, delta, offset uint16 + start, end, delta, offset uint32 } // A Font represents a Truetype font. @@ -111,12 +111,14 @@ type Font struct { func (f *Font) parseCmap() error { const ( cmapFormat4 = 4 + cmapFormat12 = 12 languageIndependent = 0 // A 32-bit encoding consists of a most-significant 16-bit Platform ID and a // least-significant 16-bit Platform Specific ID. - unicodeEncoding = 0x00000003 // PID = 0 (Unicode), PSID = 3 (Unicode 2.0) - microsoftEncoding = 0x00030001 // PID = 3 (Microsoft), PSID = 1 (UCS-2) + unicodeEncoding = 0x00000003 // PID = 0 (Unicode), PSID = 3 (Unicode 2.0) + microsoftUCS2Encoding = 0x00030001 // PID = 3 (Microsoft), PSID = 1 (UCS-2) + microsoftUCS4Encoding = 0x0003000a // PID = 3 (Microsoft), PSID = 10 (UCS-4) ) if len(f.cmap) < 4 { @@ -137,7 +139,7 @@ func (f *Font) parseCmap() error { if pidPsid == unicodeEncoding { offset, found = int(o), true break - } else if pidPsid == microsoftEncoding { + } else if pidPsid == microsoftUCS2Encoding || pidPsid == microsoftUCS4Encoding { offset, found = int(o), true // We don't break out of the for loop, so that Unicode can override Microsoft. } @@ -150,39 +152,63 @@ func (f *Font) parseCmap() error { } cmapFormat := u16(f.cmap, offset) - if cmapFormat != cmapFormat4 { - return UnsupportedError(fmt.Sprintf("cmap format: %d", cmapFormat)) - } - language := u16(f.cmap, offset+4) - if language != languageIndependent { - return UnsupportedError(fmt.Sprintf("language: %d", language)) - } - segCountX2 := int(u16(f.cmap, offset+6)) - if segCountX2%2 == 1 { - return FormatError(fmt.Sprintf("bad segCountX2: %d", segCountX2)) - } - segCount := segCountX2 / 2 - offset += 14 - f.cm = make([]cm, segCount) - for i := 0; i < segCount; i++ { - f.cm[i].end = u16(f.cmap, offset) + switch cmapFormat { + case cmapFormat4: + language := u16(f.cmap, offset+4) + if language != languageIndependent { + return UnsupportedError(fmt.Sprintf("language: %d", language)) + } + segCountX2 := int(u16(f.cmap, offset+6)) + if segCountX2%2 == 1 { + return FormatError(fmt.Sprintf("bad segCountX2: %d", segCountX2)) + } + segCount := segCountX2 / 2 + offset += 14 + f.cm = make([]cm, segCount) + for i := 0; i < segCount; i++ { + f.cm[i].end = uint32(u16(f.cmap, offset)) + offset += 2 + } offset += 2 + for i := 0; i < segCount; i++ { + f.cm[i].start = uint32(u16(f.cmap, offset)) + offset += 2 + } + for i := 0; i < segCount; i++ { + f.cm[i].delta = uint32(u16(f.cmap, offset)) + offset += 2 + } + for i := 0; i < segCount; i++ { + f.cm[i].offset = uint32(u16(f.cmap, offset)) + offset += 2 + } + f.cmapIndexes = f.cmap[offset:] + return nil + + case cmapFormat12: + if u16(f.cmap, offset+2) != 0 { + return FormatError(fmt.Sprintf("cmap format: % x", f.cmap[offset:offset+4])) + } + length := u32(f.cmap, offset+4) + language := u32(f.cmap, offset+8) + if language != languageIndependent { + return UnsupportedError(fmt.Sprintf("language: %d", language)) + } + nGroups := u32(f.cmap, offset+12) + if length != 12*nGroups+16 { + return FormatError("inconsistent cmap length") + } + offset += 16 + f.cm = make([]cm, nGroups) + for i := uint32(0); i < nGroups; i++ { + f.cm[i].start = u32(f.cmap, offset+0) + f.cm[i].end = u32(f.cmap, offset+4) + f.cm[i].delta = u32(f.cmap, offset+8) - f.cm[i].start + offset += 12 + } + return nil } - offset += 2 - for i := 0; i < segCount; i++ { - f.cm[i].start = u16(f.cmap, offset) - offset += 2 - } - for i := 0; i < segCount; i++ { - f.cm[i].delta = u16(f.cmap, offset) - offset += 2 - } - for i := 0; i < segCount; i++ { - f.cm[i].offset = u16(f.cmap, offset) - offset += 2 - } - f.cmapIndexes = f.cmap[offset:] - return nil + return UnsupportedError(fmt.Sprintf("cmap format: %d", cmapFormat)) } func (f *Font) parseHead() error { @@ -296,8 +322,9 @@ func (f *Font) FUnitsPerEm() int32 { // Index returns a Font's index for the given rune. func (f *Font) Index(x rune) Index { - c := uint16(x) + c := uint32(x) n := len(f.cm) + // TODO: binary search. for i := 0; i < n; i++ { if f.cm[i].start <= c && c <= f.cm[i].end { if f.cm[i].offset == 0 { diff --git a/freetype/truetype/truetype_test.go b/freetype/truetype/truetype_test.go index 1436b6a..c812b45 100644 --- a/freetype/truetype/truetype_test.go +++ b/freetype/truetype/truetype_test.go @@ -16,14 +16,24 @@ import ( "testing" ) +func parseTestdataFont(name string) (font *Font, testdataIsOptional bool, err error) { + b, err := ioutil.ReadFile(fmt.Sprintf("../../testdata/%s.ttf", name)) + if err != nil { + // The "x-foo" fonts are optional tests, as they are not checked + // in for copyright or file size reasons. + return nil, strings.HasPrefix(name, "x-"), fmt.Errorf("%s: ReadFile: %v", name, err) + } + font, err = Parse(b) + if err != nil { + return nil, true, fmt.Errorf("%s: Parse: %v", name, err) + } + return font, false, nil +} + // TestParse tests that the luxisr.ttf metrics and glyphs are parsed correctly. // The numerical values can be manually verified by examining luxisr.ttx. func TestParse(t *testing.T) { - b, err := ioutil.ReadFile("../../testdata/luxisr.ttf") - if err != nil { - t.Fatal(err) - } - font, err := Parse(b) + font, _, err := parseTestdataFont("luxisr") if err != nil { t.Fatal(err) } @@ -74,6 +84,105 @@ func TestParse(t *testing.T) { } } +func TestIndex(t *testing.T) { + testCases := map[string]map[rune]Index{ + "luxisr": { + ' ': 3, + '!': 4, + 'A': 36, + 'V': 57, + 'É': 101, + 'fl': 193, + '\u22c5': 385, + '中': 0, + }, + "x-arial-bold": { + ' ': 3, + '+': 14, + '0': 19, + '_': 66, + 'w': 90, + '~': 97, + 'Ä': 98, + 'fl': 192, + '½': 242, + 'σ': 305, + 'λ': 540, + 'ỹ': 1275, + '\u04e9': 1319, + '中': 0, + }, + "x-deja-vu-sans-oblique": { + ' ': 3, + '*': 13, + 'Œ': 276, + 'ω': 861, + '‡': 2571, + '⊕': 3109, + 'fl': 4560, + '\ufb03': 4561, + '\ufffd': 4645, + // TODO: '\U0001f640': ???, + '中': 0, + }, + "x-droid-sans-japanese": { + ' ': 0, + '\u3000': 3, + '\u3041': 25, + '\u30fe': 201, + '\uff61': 202, + '\uff67': 208, + '\uff9e': 263, + '\uff9f': 264, + '\u4e00': 265, + '\u557e': 1000, + '\u61b6': 2024, + '\u6ede': 3177, + '\u7505': 3555, + '\u81e3': 4602, + '\u81e5': 4603, + '\u81e7': 4604, + '\u81e8': 4605, + '\u81ea': 4606, + '\u81ed': 4607, + '\u81f3': 4608, + '\u81f4': 4609, + '\u91c7': 5796, + '\u9fa0': 6620, + '\u203e': 12584, + }, + "x-times-new-roman": { + ' ': 3, + ':': 29, + 'fl': 192, + 'Ŀ': 273, + '♠': 388, + 'Ŗ': 451, + 'Σ': 520, + '\u200D': 745, + 'Ẽ': 1216, + '\u04e9': 1319, + '中': 0, + }, + } + for name, wants := range testCases { + font, testdataIsOptional, err := parseTestdataFont(name) + if err != nil { + if testdataIsOptional { + t.Log(err) + } else { + t.Fatal(err) + } + continue + } + for r, want := range wants { + if got := font.Index(r); got != want { + t.Errorf("%s: Index(%q): got %d, want %d", name, r, got, want) + } + } + } +} + var scalingTestCases = []struct { name string size int32 @@ -87,29 +196,22 @@ var scalingTestCases = []struct { // GlyphBuf.Load, and the unhinted values match C Freetype. {"x-arial-bold", 11, 0}, //{"x-deja-vu-sans-oblique", 17, 0}, - //{"x-droid-sans-japanese", 9, 0}, + {"x-droid-sans-japanese", 9, 0}, //{"x-times-new-roman", 13, 0}, } func testScaling(t *testing.T, hinter *Hinter) { loop: for _, tc := range scalingTestCases { - b, err := ioutil.ReadFile(fmt.Sprintf("../../testdata/%s.ttf", tc.name)) + font, testdataIsOptional, err := parseTestdataFont(tc.name) if err != nil { - // The "x-foo" fonts are optional tests, as they are not checked - // in for copyright or file size reasons. - if strings.HasPrefix(tc.name, "x-") { - t.Logf("%s: ReadFile: %v", tc.name, err) + if testdataIsOptional { + t.Log(err) } else { - t.Errorf("%s: ReadFile: %v", tc.name, err) + t.Error(err) } continue loop } - font, err := Parse(b) - if err != nil { - t.Errorf("%s: Parse: %v", tc.name, err) - continue loop - } hinting := "sans" if hinter != nil { hinting = "with"