freetype/truetype: parse UCS-4 encoded cmap tables.

R=bsiegert CC=golang-dev https://codereview.appspot.com/14548046
2013-10-11 07:29:40 +11:00 · 2013-10-11 07:29:40 +11:00 · 0a778f7f02
commit 0a778f7f02
parent ba07cbfbc8
2 changed files with 182 additions and 53 deletions
--- a/freetype/truetype/truetype.go
+++ b/freetype/truetype/truetype.go
@ -87,7 +87,7 @@ const (

 // A cm holds a parsed cmap entry.
 type cm struct {
-	start, end, delta, offset uint16
+	start, end, delta, offset uint32
 }

 // A Font represents a Truetype font.
@ -111,12 +111,14 @@ type Font struct {
 func (f *Font) parseCmap() error {
 	const (
 		cmapFormat4         = 4
+		cmapFormat12        = 12
 		languageIndependent = 0

 		// A 32-bit encoding consists of a most-significant 16-bit Platform ID and a
 		// least-significant 16-bit Platform Specific ID.
-		unicodeEncoding   = 0x00000003 // PID = 0 (Unicode), PSID = 3 (Unicode 2.0)
-		microsoftEncoding = 0x00030001 // PID = 3 (Microsoft), PSID = 1 (UCS-2)
+		unicodeEncoding       = 0x00000003 // PID = 0 (Unicode), PSID = 3 (Unicode 2.0)
+		microsoftUCS2Encoding = 0x00030001 // PID = 3 (Microsoft), PSID = 1 (UCS-2)
+		microsoftUCS4Encoding = 0x0003000a // PID = 3 (Microsoft), PSID = 10 (UCS-4)
 	)

 	if len(f.cmap) < 4 {
@ -137,7 +139,7 @@ func (f *Font) parseCmap() error {
 		if pidPsid == unicodeEncoding {
 			offset, found = int(o), true
 			break
-		} else if pidPsid == microsoftEncoding {
+		} else if pidPsid == microsoftUCS2Encoding || pidPsid == microsoftUCS4Encoding {
 			offset, found = int(o), true
 			// We don't break out of the for loop, so that Unicode can override Microsoft.
 		}
@ -150,39 +152,63 @@ func (f *Font) parseCmap() error {
 	}

 	cmapFormat := u16(f.cmap, offset)
-	if cmapFormat != cmapFormat4 {
-		return UnsupportedError(fmt.Sprintf("cmap format: %d", cmapFormat))
-	}
-	language := u16(f.cmap, offset+4)
-	if language != languageIndependent {
-		return UnsupportedError(fmt.Sprintf("language: %d", language))
-	}
-	segCountX2 := int(u16(f.cmap, offset+6))
-	if segCountX2%2 == 1 {
-		return FormatError(fmt.Sprintf("bad segCountX2: %d", segCountX2))
-	}
-	segCount := segCountX2 / 2
-	offset += 14
-	f.cm = make([]cm, segCount)
-	for i := 0; i < segCount; i++ {
-		f.cm[i].end = u16(f.cmap, offset)
+	switch cmapFormat {
+	case cmapFormat4:
+		language := u16(f.cmap, offset+4)
+		if language != languageIndependent {
+			return UnsupportedError(fmt.Sprintf("language: %d", language))
+		}
+		segCountX2 := int(u16(f.cmap, offset+6))
+		if segCountX2%2 == 1 {
+			return FormatError(fmt.Sprintf("bad segCountX2: %d", segCountX2))
+		}
+		segCount := segCountX2 / 2
+		offset += 14
+		f.cm = make([]cm, segCount)
+		for i := 0; i < segCount; i++ {
+			f.cm[i].end = uint32(u16(f.cmap, offset))
+			offset += 2
+		}
 		offset += 2
+		for i := 0; i < segCount; i++ {
+			f.cm[i].start = uint32(u16(f.cmap, offset))
+			offset += 2
+		}
+		for i := 0; i < segCount; i++ {
+			f.cm[i].delta = uint32(u16(f.cmap, offset))
+			offset += 2
+		}
+		for i := 0; i < segCount; i++ {
+			f.cm[i].offset = uint32(u16(f.cmap, offset))
+			offset += 2
+		}
+		f.cmapIndexes = f.cmap[offset:]
+		return nil
+
+	case cmapFormat12:
+		if u16(f.cmap, offset+2) != 0 {
+			return FormatError(fmt.Sprintf("cmap format: % x", f.cmap[offset:offset+4]))
+		}
+		length := u32(f.cmap, offset+4)
+		language := u32(f.cmap, offset+8)
+		if language != languageIndependent {
+			return UnsupportedError(fmt.Sprintf("language: %d", language))
+		}
+		nGroups := u32(f.cmap, offset+12)
+		if length != 12*nGroups+16 {
+			return FormatError("inconsistent cmap length")
+		}
+		offset += 16
+		f.cm = make([]cm, nGroups)
+		for i := uint32(0); i < nGroups; i++ {
+			f.cm[i].start = u32(f.cmap, offset+0)
+			f.cm[i].end = u32(f.cmap, offset+4)
+			f.cm[i].delta = u32(f.cmap, offset+8) - f.cm[i].start
+			offset += 12
+		}
+		return nil
 	}
-	offset += 2
-	for i := 0; i < segCount; i++ {
-		f.cm[i].start = u16(f.cmap, offset)
-		offset += 2
-	}
-	for i := 0; i < segCount; i++ {
-		f.cm[i].delta = u16(f.cmap, offset)
-		offset += 2
-	}
-	for i := 0; i < segCount; i++ {
-		f.cm[i].offset = u16(f.cmap, offset)
-		offset += 2
-	}
-	f.cmapIndexes = f.cmap[offset:]
-	return nil
+	return UnsupportedError(fmt.Sprintf("cmap format: %d", cmapFormat))
 }

 func (f *Font) parseHead() error {
@ -296,8 +322,9 @@ func (f *Font) FUnitsPerEm() int32 {

 // Index returns a Font's index for the given rune.
 func (f *Font) Index(x rune) Index {
-	c := uint16(x)
+	c := uint32(x)
 	n := len(f.cm)
+	// TODO: binary search.
 	for i := 0; i < n; i++ {
 		if f.cm[i].start <= c && c <= f.cm[i].end {
 			if f.cm[i].offset == 0 {
--- a/freetype/truetype/truetype_test.go
+++ b/freetype/truetype/truetype_test.go
@ -16,14 +16,24 @@ import (
 	"testing"
 )

+func parseTestdataFont(name string) (font *Font, testdataIsOptional bool, err error) {
+	b, err := ioutil.ReadFile(fmt.Sprintf("../../testdata/%s.ttf", name))
+	if err != nil {
+		// The "x-foo" fonts are optional tests, as they are not checked
+		// in for copyright or file size reasons.
+		return nil, strings.HasPrefix(name, "x-"), fmt.Errorf("%s: ReadFile: %v", name, err)
+	}
+	font, err = Parse(b)
+	if err != nil {
+		return nil, true, fmt.Errorf("%s: Parse: %v", name, err)
+	}
+	return font, false, nil
+}
+
 // TestParse tests that the luxisr.ttf metrics and glyphs are parsed correctly.
 // The numerical values can be manually verified by examining luxisr.ttx.
 func TestParse(t *testing.T) {
-	b, err := ioutil.ReadFile("../../testdata/luxisr.ttf")
-	if err != nil {
-		t.Fatal(err)
-	}
-	font, err := Parse(b)
+	font, _, err := parseTestdataFont("luxisr")
 	if err != nil {
 		t.Fatal(err)
 	}
@ -74,6 +84,105 @@ func TestParse(t *testing.T) {
 	}
 }

+func TestIndex(t *testing.T) {
+	testCases := map[string]map[rune]Index{
+		"luxisr": {
+			' ':      3,
+			'!':      4,
+			'A':      36,
+			'V':      57,
+			'É':      101,
+			'ﬂ':      193,
+			'\u22c5': 385,
+			'中':      0,
+		},
+		"x-arial-bold": {
+			' ':      3,
+			'+':      14,
+			'0':      19,
+			'_':      66,
+			'w':      90,
+			'~':      97,
+			'Ä':      98,
+			'ﬂ':      192,
+			'½':      242,
+			'σ':      305,
+			'λ':      540,
+			'ỹ':      1275,
+			'\u04e9': 1319,
+			'中':      0,
+		},
+		"x-deja-vu-sans-oblique": {
+			' ':      3,
+			'*':      13,
+			'Œ':      276,
+			'ω':      861,
+			'‡':      2571,
+			'⊕':      3109,
+			'ﬂ':      4560,
+			'\ufb03': 4561,
+			'\ufffd': 4645,
+			// TODO: '\U0001f640': ???,
+			'中': 0,
+		},
+		"x-droid-sans-japanese": {
+			' ':      0,
+			'\u3000': 3,
+			'\u3041': 25,
+			'\u30fe': 201,
+			'\uff61': 202,
+			'\uff67': 208,
+			'\uff9e': 263,
+			'\uff9f': 264,
+			'\u4e00': 265,
+			'\u557e': 1000,
+			'\u61b6': 2024,
+			'\u6ede': 3177,
+			'\u7505': 3555,
+			'\u81e3': 4602,
+			'\u81e5': 4603,
+			'\u81e7': 4604,
+			'\u81e8': 4605,
+			'\u81ea': 4606,
+			'\u81ed': 4607,
+			'\u81f3': 4608,
+			'\u81f4': 4609,
+			'\u91c7': 5796,
+			'\u9fa0': 6620,
+			'\u203e': 12584,
+		},
+		"x-times-new-roman": {
+			' ':      3,
+			':':      29,
+			'ﬂ':      192,
+			'Ŀ':      273,
+			'♠':      388,
+			'Ŗ':      451,
+			'Σ':      520,
+			'\u200D': 745,
+			'Ẽ':      1216,
+			'\u04e9': 1319,
+			'中':      0,
+		},
+	}
+	for name, wants := range testCases {
+		font, testdataIsOptional, err := parseTestdataFont(name)
+		if err != nil {
+			if testdataIsOptional {
+				t.Log(err)
+			} else {
+				t.Fatal(err)
+			}
+			continue
+		}
+		for r, want := range wants {
+			if got := font.Index(r); got != want {
+				t.Errorf("%s: Index(%q): got %d, want %d", name, r, got, want)
+			}
+		}
+	}
+}
+
 var scalingTestCases = []struct {
 	name string
 	size int32
@ -87,29 +196,22 @@ var scalingTestCases = []struct {
 	// GlyphBuf.Load, and the unhinted values match C Freetype.
 	{"x-arial-bold", 11, 0},
 	//{"x-deja-vu-sans-oblique", 17, 0},
-	//{"x-droid-sans-japanese", 9, 0},
+	{"x-droid-sans-japanese", 9, 0},
 	//{"x-times-new-roman", 13, 0},
 }

 func testScaling(t *testing.T, hinter *Hinter) {
 loop:
 	for _, tc := range scalingTestCases {
-		b, err := ioutil.ReadFile(fmt.Sprintf("../../testdata/%s.ttf", tc.name))
+		font, testdataIsOptional, err := parseTestdataFont(tc.name)
 		if err != nil {
-			// The "x-foo" fonts are optional tests, as they are not checked
-			// in for copyright or file size reasons.
-			if strings.HasPrefix(tc.name, "x-") {
-				t.Logf("%s: ReadFile: %v", tc.name, err)
+			if testdataIsOptional {
+				t.Log(err)
 			} else {
-				t.Errorf("%s: ReadFile: %v", tc.name, err)
+				t.Error(err)
 			}
 			continue loop
 		}
-		font, err := Parse(b)
-		if err != nil {
-			t.Errorf("%s: Parse: %v", tc.name, err)
-			continue loop
-		}
 		hinting := "sans"
 		if hinter != nil {
 			hinting = "with"