From 58e481f86c3fb4ea5cbfcde9b9321c62f46cd2fb Mon Sep 17 00:00:00 2001
From: Gunnsteinn Hall <gunnsteinn.hall@gmail.com>
Date: Sun, 24 May 2020 23:23:03 +0000
Subject: [PATCH 1/4] Fix error when no glyph data

---
 table_glyf.go | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/table_glyf.go b/table_glyf.go
index 061093c..196d396 100644
--- a/table_glyf.go
+++ b/table_glyf.go
@@ -109,6 +109,8 @@ func (gd *glyphDescription) parse() error {
 	r := newByteReader(bytes.NewReader(gd.raw))
 	err := gd.parseHeader(r)
 	if err != nil {
+		logrus.Debugf("ERROR parsing header: %v", err)
+		logrus.Debugf("Raw data: %d bytes", len(gd.raw))
 		return err
 	}
 
@@ -252,6 +254,10 @@ func (glyf *glyfTable) GetComponents(gid GlyphIndex) ([]GlyphIndex, error) {
 	gdesc := glyf.descs[int(gid)]
 
 	if gdesc.header == nil {
+		if len(gdesc.raw) == 0 {
+			// No glyph data.
+			return nil, nil
+		}
 		err := gdesc.parse()
 		if err != nil {
 			logrus.Debugf("ERROR parsing header: %v", err)

From eecd543003934794f0e066377dbf26beca665976 Mon Sep 17 00:00:00 2001
From: Gunnsteinn Hall <gunnsteinn.hall@gmail.com>
Date: Sun, 24 May 2020 23:26:08 +0000
Subject: [PATCH 2/4] Subsetting improvements - cmap fixes

When subsetting font via indices, also reduce the number of glyphs to the first maximum glyphs (i.e. the highest glyph number needed).
Fixes SubsetFirst cmap serialization.
---
 export.go     | 112 +++++++++++++++++++++++++++++++++++++++-----------
 table_cmap.go |  92 ++++++++++++++++++++++++++---------------
 2 files changed, 147 insertions(+), 57 deletions(-)

diff --git a/export.go b/export.go
index d8ab9c6..0109b4c 100644
--- a/export.go
+++ b/export.go
@@ -8,10 +8,10 @@ package unitype
 import (
 	"bytes"
 	"errors"
-	"fmt"
 	"io"
 	"math"
 	"os"
+	"sort"
 
 	"github.com/sirupsen/logrus"
 )
@@ -93,12 +93,16 @@ func (f *Font) GetCmap(platformID, encodingID int) map[rune]GlyphIndex {
 	return nil
 }
 
-// SubsetKeepRunes prunes data for all GIDs except the ones corresponding to `runes`.  The GIDs are
-// maintained. Typically reduces glyf table size significantly.
-func (f *Font) SubsetKeepRunes(runes []rune) (*Font, error) {
+// LookupRunes looks up each rune in `rune` and returns a matching slice of glyph indices.
+// When a rune is not found, a GID of 0 is used (notdef).
+func (f *Font) LookupRunes(runes []rune) []GlyphIndex {
 	var maps []map[rune]GlyphIndex
 	// Search order (3,1), (1,0), (0,3).
-	maps = append(maps, f.GetCmap(3, 1), f.GetCmap(1, 0), f.GetCmap(0, 3))
+	maps = append(maps,
+		f.GetCmap(3, 1),
+		f.GetCmap(1, 0),
+		f.GetCmap(0, 3),
+	)
 
 	var indices []GlyphIndex
 	for _, r := range runes {
@@ -110,13 +114,17 @@ func (f *Font) SubsetKeepRunes(runes []rune) (*Font, error) {
 				break
 			}
 		}
-		if index == 0 {
-			return nil, fmt.Errorf("rune not found: %v", r)
-		}
 		indices = append(indices, index)
 	}
 	logrus.Debugf("Runes: %+v %s", runes, string(runes))
 	logrus.Debugf("GIDs: %+v", indices)
+	return indices
+}
+
+// SubsetKeepRunes prunes data for all GIDs except the ones corresponding to `runes`.  The GIDs are
+// maintained. Typically reduces glyf table size significantly.
+func (f *Font) SubsetKeepRunes(runes []rune) (*Font, error) {
+	indices := f.LookupRunes(runes)
 	return f.SubsetKeepIndices(indices)
 }
 
@@ -126,6 +134,8 @@ func (f *Font) SubsetKeepRunes(runes []rune) (*Font, error) {
 func (f *Font) SubsetKeepIndices(indices []GlyphIndex) (*Font, error) {
 	newfnt := font{}
 
+	// Expand the set of indices if any of the indices are composite
+	// glyphs depending on other glyphs.
 	gidIncludedMap := make(map[GlyphIndex]struct{}, len(indices))
 	for _, gid := range indices {
 		gidIncludedMap[gid] = struct{}{}
@@ -142,6 +152,7 @@ func (f *Font) SubsetKeepIndices(indices []GlyphIndex) (*Font, error) {
 		for _, gid := range toscan {
 			components, err := f.glyf.GetComponents(gid)
 			if err != nil {
+				logrus.Debugf("Error getting components for %d", gid)
 				return nil, err
 			}
 			for _, gid := range components {
@@ -237,10 +248,12 @@ func (f *Font) SubsetKeepIndices(indices []GlyphIndex) (*Font, error) {
 		newfnt.os2 = &os2Table{}
 		*newfnt.os2 = *f.font.os2
 	}
+
 	if f.font.post != nil {
 		newfnt.post = &postTable{}
 		*newfnt.post = *f.font.post
 	}
+
 	if f.font.cmap != nil {
 		newfnt.cmap = &cmapTable{}
 		*newfnt.cmap = *f.font.cmap
@@ -258,6 +271,7 @@ func (f *Font) SubsetKeepIndices(indices []GlyphIndex) (*Font, error) {
 			maxgid = gid
 		}
 	}
+	// Trim font down to only maximum needed glyphs without changing order.
 	maxNeededNum := int(maxgid) + 1
 	return subfnt.SubsetFirst(maxNeededNum)
 }
@@ -337,6 +351,21 @@ func (f *Font) SubsetFirst(numGlyphs int) (*Font, error) {
 		}
 	}
 
+	if f.font.prep != nil {
+		newfnt.prep = &prepTable{}
+		*newfnt.prep = *f.font.prep
+	}
+
+	if f.font.cvt != nil {
+		newfnt.cvt = &cvtTable{}
+		*newfnt.cvt = *f.font.cvt
+	}
+
+	if f.font.fpgm != nil {
+		newfnt.fpgm = &fpgmTable{}
+		*newfnt.fpgm = *f.font.fpgm
+	}
+
 	if f.font.name != nil {
 		newfnt.name = &nameTable{}
 		*newfnt.name = *f.font.name
@@ -358,13 +387,13 @@ func (f *Font) SubsetFirst(numGlyphs int) (*Font, error) {
 			newfnt.post.glyphNameIndex = newfnt.post.glyphNameIndex[0:numGlyphs]
 		}
 		if len(newfnt.post.offsets) > numGlyphs {
-			// TODO: Not sure if this is updated here or generated on the fly?
 			newfnt.post.offsets = newfnt.post.offsets[0:numGlyphs]
 		}
 		if len(newfnt.post.glyphNames) > numGlyphs {
 			newfnt.post.glyphNames = newfnt.post.glyphNames[0:numGlyphs]
 		}
 	}
+
 	if f.font.cmap != nil {
 		newfnt.cmap = &cmapTable{
 			version:   f.cmap.version,
@@ -386,19 +415,32 @@ func (f *Font) SubsetFirst(numGlyphs int) (*Font, error) {
 				// Makes continous entries with deltas.
 				// Does not use glyphIDData, but only the deltas.  Can lead to many segments, but should not
 				// be too bad (especially since subsetting).
+				charcodes := make([]CharCode, 0, len(subt.charcodeToGID))
+				for cc, gid := range subt.charcodeToGID {
+					if int(gid) >= numGlyphs {
+						continue
+					}
+					charcodes = append(charcodes, cc)
+				}
+				sort.Slice(charcodes, func(i, j int) bool {
+					return charcodes[i] < charcodes[j]
+				})
+
 				segments := 0
 				i := 0
-				for i < numGlyphs {
+				for i < len(charcodes) {
 					j := i + 1
-					for ; j < numGlyphs; j++ {
-						if int(subt.runes[j]-subt.runes[i]) != j-i {
+					for ; j < len(charcodes); j++ {
+						if int(charcodes[j]-charcodes[i]) != j-i ||
+							int(subt.charcodeToGID[charcodes[j]]-subt.charcodeToGID[charcodes[i]]) != j-i {
 							break
 						}
 					}
-					// from i:j-1 maps to subt.runes[i]:subt.runes[i]+j-i-1
-					startCode := uint16(subt.runes[i])
-					endCode := uint16(subt.runes[i]) + uint16(j-i-1)
-					idDelta := uint16(uint16(i) - startCode)
+					// from i:j-1 maps to subt.charcodes[i]:subt.charcodes[i]+j-i-1
+					startCode := uint16(charcodes[i])
+					endCode := uint16(charcodes[i]) + uint16(j-i-1)
+					idDelta := uint16(subt.charcodeToGID[charcodes[i]]) - uint16(charcodes[i])
+
 					newt.startCode = append(newt.startCode, startCode)
 					newt.endCode = append(newt.endCode, endCode)
 					newt.idDelta = append(newt.idDelta, idDelta)
@@ -406,6 +448,15 @@ func (f *Font) SubsetFirst(numGlyphs int) (*Font, error) {
 					segments++
 					i = j
 				}
+
+				if segments > 0 && newt.endCode[segments-1] < 65535 {
+					newt.endCode = append(newt.endCode, 65535)
+					newt.startCode = append(newt.startCode, 65535)
+					newt.idDelta = append(newt.idDelta, 1)
+					newt.idRangeOffset = append(newt.idRangeOffset, 0)
+					segments++
+				}
+
 				newt.length = uint16(2*8 + 2*4*segments)
 				newt.language = t.language
 				newt.segCountX2 = uint16(segments * 2)
@@ -423,17 +474,30 @@ func (f *Font) SubsetFirst(numGlyphs int) (*Font, error) {
 				newt := cmapSubtableFormat12{}
 				groups := 0
 
-				for i := 0; i < numGlyphs; i++ {
+				charcodes := make([]CharCode, 0, len(subt.charcodeToGID))
+				for cc, gid := range subt.charcodeToGID {
+					if int(gid) >= numGlyphs {
+						continue
+					}
+					charcodes = append(charcodes, cc)
+				}
+				sort.Slice(charcodes, func(i, j int) bool {
+					return charcodes[i] < charcodes[j]
+				})
+
+				i := 0
+				for i < len(charcodes) {
 					j := i + 1
-					for ; j < numGlyphs; j++ {
-						if int(subt.runes[j]-subt.runes[i]) != j-i {
+					for ; j < len(charcodes); j++ {
+						if int(charcodes[j]-charcodes[i]) != j-i ||
+							int(subt.charcodeToGID[charcodes[j]]-subt.charcodeToGID[charcodes[i]]) != j-i {
 							break
 						}
 					}
-					// from i:j-1 maps to subt.runes[i]:subt.runes[i]+j-i-1
-					startCharCode := uint32(subt.runes[i])
-					endCharCode := uint32(subt.runes[i]) + uint32(j-i-1)
-					startGlyphID := uint32(i)
+					// from i:j-1 maps to subt.charcodes[i]:subt.charcodes[i]+j-i-1
+					startCharCode := uint32(charcodes[i])
+					endCharCode := uint32(charcodes[i]) + uint32(j-i-1)
+					startGlyphID := uint32(subt.charcodeToGID[charcodes[i]])
 
 					group := sequentialMapGroup{
 						startCharCode: startCharCode,
@@ -442,7 +506,9 @@ func (f *Font) SubsetFirst(numGlyphs int) (*Font, error) {
 					}
 					newt.groups = append(newt.groups, group)
 					groups++
+					i = j
 				}
+
 				newt.length = uint32(2*2 + 3*4 + groups*3*4)
 				newt.language = t.language
 				newt.numGroups = uint32(groups)
diff --git a/table_cmap.go b/table_cmap.go
index 233970d..d5359ef 100644
--- a/table_cmap.go
+++ b/table_cmap.go
@@ -120,11 +120,11 @@ type cmapSubtable struct {
 	ctx interface{} // The specific subtable, e.g. cmapSubtableFormat0, etc.
 
 	// TODO: Need GID to rune map too? or just a list of runes (with length = numGlyphs, i.e. one rune per gid)
-	cmap            map[rune]GlyphIndex
-	runes           []rune
-	charcodeToGID   map[CharCode]GlyphIndex
-	runeToCharcodes map[rune][]byte // Quick for going rune -> encoded bytes (charcodes).
-	// Not so quick for going charcodes to rune?
+	cmap                map[rune]GlyphIndex
+	runes               []rune
+	charcodes           []CharCode
+	charcodeToGID       map[CharCode]GlyphIndex
+	runeToCharcodeBytes map[rune][]byte // Quick for going rune -> encoded bytes (charcodes).
 }
 
 // cmapSubtableFormat0 represents format 0: Byte encoding table.
@@ -162,7 +162,8 @@ func (f *font) parseCmapSubtableFormat0(r *byteReader, platformID, encodingID in
 	//   (cmapEncoder).
 	cmap := map[rune]GlyphIndex{}
 	runes := make([]rune, len(st.glyphIDArray))
-	charcodes := map[rune][]byte{}
+	runeToCharcodeBytes := map[rune][]byte{}
+	charcodes := make([]CharCode, len(st.glyphIDArray))
 	charcodeToGID := map[CharCode]GlyphIndex{}
 
 	for glyphID, code := range st.glyphIDArray {
@@ -170,22 +171,24 @@ func (f *font) parseCmapSubtableFormat0(r *byteReader, platformID, encodingID in
 		codeBytes := runeDecoder.ToBytes(uint32(code))
 		r := runeDecoder.DecodeRune(codeBytes)
 		runes[glyphID] = r
+		charcodes[glyphID] = CharCode(code)
 		if _, has := cmap[r]; !has {
 			// Avoid overwrite, if get same twice, use the earlier entry.
 			cmap[r] = GlyphIndex(glyphID)
-			charcodes[r] = codeBytes
+			runeToCharcodeBytes[r] = codeBytes
 		}
 	}
 
 	return &cmapSubtable{
-		format:          0,
-		platformID:      platformID,
-		encodingID:      encodingID,
-		cmap:            cmap,
-		runes:           runes,
-		runeToCharcodes: charcodes,
-		charcodeToGID:   charcodeToGID,
-		ctx:             st,
+		format:              0,
+		platformID:          platformID,
+		encodingID:          encodingID,
+		cmap:                cmap,
+		runes:               runes,
+		runeToCharcodeBytes: runeToCharcodeBytes,
+		charcodes:           charcodes,
+		charcodeToGID:       charcodeToGID,
+		ctx:                 st,
 	}, nil
 }
 
@@ -246,7 +249,6 @@ func (f *font) parseCmapSubtableFormat4(r *byteReader, platformID, encodingID in
 	if err != nil {
 		return nil, err
 	}
-
 	err = r.readSlice(&st.idDelta, segCount)
 	if err != nil {
 		return nil, err
@@ -274,6 +276,8 @@ func (f *font) parseCmapSubtableFormat4(r *byteReader, platformID, encodingID in
 
 	cmap := map[rune]GlyphIndex{}
 	runes := make([]rune, int(f.maxp.numGlyphs))
+	charcodes := make([]CharCode, int(f.maxp.numGlyphs))
+	charcodeMap := make(map[CharCode]GlyphIndex, f.maxp.numGlyphs)
 	logrus.Debugf("Number of glyphs in font: %d\n", f.maxp.numGlyphs)
 	for i := 0; i < segCount-1; i++ {
 		c1 := st.startCode[i]
@@ -313,6 +317,9 @@ func (f *font) parseCmapSubtableFormat4(r *byteReader, platformID, encodingID in
 					return nil, errors.New("gid out of range")
 				}
 				runes[int(gid)] = r
+				charcodes[int(gid)] = CharCode(c)
+				charcodeMap[CharCode(c)] = GlyphIndex(gid)
+
 				if _, has := cmap[r]; !has {
 					// Avoid overwrite, if get same twice, use the earlier entry.
 					cmap[r] = GlyphIndex(gid)
@@ -322,12 +329,14 @@ func (f *font) parseCmapSubtableFormat4(r *byteReader, platformID, encodingID in
 	}
 
 	return &cmapSubtable{
-		format:     4,
-		platformID: platformID,
-		encodingID: encodingID,
-		cmap:       cmap,
-		runes:      runes,
-		ctx:        st,
+		format:        4,
+		platformID:    platformID,
+		encodingID:    encodingID,
+		cmap:          cmap,
+		charcodes:     charcodes,
+		charcodeToGID: charcodeMap,
+		runes:         runes,
+		ctx:           st,
 	}, nil
 }
 
@@ -398,12 +407,16 @@ func (f *font) parseCmapSubtableFormat6(r *byteReader, platformID, encodingID in
 
 	cmap := map[rune]GlyphIndex{}
 	runes := make([]rune, st.entryCount)
+	charcodes := make([]CharCode, st.entryCount)
+	charcodeMap := make(map[CharCode]GlyphIndex, st.entryCount)
 	for i := 0; i < int(st.entryCount); i++ {
 		gid := GlyphIndex(st.glyphIDArray[i])
 		code := st.firstCode + uint16(i)
 		b := runeDecoder.ToBytes(uint32(code))
 		r := runeDecoder.DecodeRune(b)
 		runes[i] = r
+		charcodes[i] = CharCode(code)
+		charcodeMap[CharCode(code)] = gid
 		if _, has := cmap[r]; !has {
 			// Avoid ovewriting (stick to first gid).
 			cmap[r] = gid
@@ -411,12 +424,14 @@ func (f *font) parseCmapSubtableFormat6(r *byteReader, platformID, encodingID in
 	}
 
 	return &cmapSubtable{
-		format:     6,
-		platformID: platformID,
-		encodingID: encodingID,
-		cmap:       cmap,
-		runes:      runes,
-		ctx:        st,
+		format:        6,
+		platformID:    platformID,
+		encodingID:    encodingID,
+		cmap:          cmap,
+		runes:         runes,
+		charcodes:     charcodes,
+		charcodeToGID: charcodeMap,
+		ctx:           st,
 	}, nil
 }
 
@@ -475,6 +490,8 @@ func (f *font) parseCmapSubtableFormat12(r *byteReader, platformID, encodingID i
 
 	cmap := map[rune]GlyphIndex{}
 	runes := make([]rune, f.maxp.numGlyphs)
+	charcodes := make([]CharCode, f.maxp.numGlyphs)
+	charcodeMap := make(map[CharCode]GlyphIndex, f.maxp.numGlyphs)
 	for _, group := range st.groups {
 		gid := GlyphIndex(group.startGlyphID)
 		if int(gid) >= int(f.maxp.numGlyphs) {
@@ -483,9 +500,14 @@ func (f *font) parseCmapSubtableFormat12(r *byteReader, platformID, encodingID i
 			return nil, errRangeCheck
 		}
 		for charcode := group.startCharCode; charcode <= group.endCharCode; charcode++ {
+			if int(gid) >= int(f.maxp.numGlyphs) {
+				break
+			}
 			b := runeDecoder.ToBytes(charcode)
 			r := runeDecoder.DecodeRune(b)
 			runes[gid] = r
+			charcodes[gid] = CharCode(charcode)
+			charcodeMap[CharCode(charcode)] = gid
 			if _, has := cmap[r]; !has {
 				// Avoid overwrite, if get same twice, use the earlier entry.
 				cmap[r] = gid
@@ -495,12 +517,14 @@ func (f *font) parseCmapSubtableFormat12(r *byteReader, platformID, encodingID i
 	}
 
 	return &cmapSubtable{
-		format:     12,
-		ctx:        st,
-		platformID: platformID,
-		encodingID: encodingID,
-		cmap:       cmap,
-		runes:      runes,
+		format:        12,
+		ctx:           st,
+		platformID:    platformID,
+		encodingID:    encodingID,
+		cmap:          cmap,
+		runes:         runes,
+		charcodes:     charcodes,
+		charcodeToGID: charcodeMap,
 	}, nil
 }
 

From 0356ac29eaaaabe024dd98363a49896f4890781e Mon Sep 17 00:00:00 2001
From: Gunnsteinn Hall <gunnsteinn.hall@gmail.com>
Date: Sun, 24 May 2020 23:26:21 +0000
Subject: [PATCH 3/4] Add needed read type

---
 byte_reader.go | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/byte_reader.go b/byte_reader.go
index 55df850..48c5d43 100644
--- a/byte_reader.go
+++ b/byte_reader.go
@@ -117,6 +117,12 @@ func (r *byteReader) readSlice(slice interface{}, length int) error {
 func (r byteReader) read(fields ...interface{}) error {
 	for _, f := range fields {
 		switch t := f.(type) {
+		case **f2dot14:
+			val, err := r.readF2dot14()
+			if err != nil {
+				return err
+			}
+			*t = &val
 		case *f2dot14:
 			val, err := r.readF2dot14()
 			if err != nil {

From 77d42b645b0270a0a7b5f401c0eeaf5a7c165785 Mon Sep 17 00:00:00 2001
From: Gunnsteinn Hall <gunnsteinn.hall@gmail.com>
Date: Sun, 24 May 2020 23:26:39 +0000
Subject: [PATCH 4/4] Add more debug info

---
 font.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/font.go b/font.go
index 754911d..c8b6bff 100644
--- a/font.go
+++ b/font.go
@@ -525,6 +525,9 @@ func (f *font) TableInfo(table string) string {
 		for _, k := range f.cmap.subtableKeys {
 			subt := f.cmap.subtables[k]
 			b.WriteString(fmt.Sprintf("cmap subtable: %s: runes: %d\n", k, len(subt.runes)))
+			for i := range subt.charcodes {
+				b.WriteString(fmt.Sprintf("\t%d - Charcode %d (0x%X) - rune % X\n", i, subt.charcodes[i], subt.charcodes[i], subt.runes[i]))
+			}
 		}
 	case "loca":
 		if f.loca == nil {