From 58e481f86c3fb4ea5cbfcde9b9321c62f46cd2fb Mon Sep 17 00:00:00 2001 From: Gunnsteinn Hall Date: Sun, 24 May 2020 23:23:03 +0000 Subject: [PATCH 1/4] Fix error when no glyph data --- table_glyf.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/table_glyf.go b/table_glyf.go index 061093c..196d396 100644 --- a/table_glyf.go +++ b/table_glyf.go @@ -109,6 +109,8 @@ func (gd *glyphDescription) parse() error { r := newByteReader(bytes.NewReader(gd.raw)) err := gd.parseHeader(r) if err != nil { + logrus.Debugf("ERROR parsing header: %v", err) + logrus.Debugf("Raw data: %d bytes", len(gd.raw)) return err } @@ -252,6 +254,10 @@ func (glyf *glyfTable) GetComponents(gid GlyphIndex) ([]GlyphIndex, error) { gdesc := glyf.descs[int(gid)] if gdesc.header == nil { + if len(gdesc.raw) == 0 { + // No glyph data. + return nil, nil + } err := gdesc.parse() if err != nil { logrus.Debugf("ERROR parsing header: %v", err) From eecd543003934794f0e066377dbf26beca665976 Mon Sep 17 00:00:00 2001 From: Gunnsteinn Hall Date: Sun, 24 May 2020 23:26:08 +0000 Subject: [PATCH 2/4] Subsetting improvements - cmap fixes When subsetting font via indices, also reduce the number of glyphs to the first maximum glyphs (i.e. the highest glyph number needed). Fixes SubsetFirst cmap serialization. --- export.go | 112 +++++++++++++++++++++++++++++++++++++++----------- table_cmap.go | 92 ++++++++++++++++++++++++++--------------- 2 files changed, 147 insertions(+), 57 deletions(-) diff --git a/export.go b/export.go index d8ab9c6..0109b4c 100644 --- a/export.go +++ b/export.go @@ -8,10 +8,10 @@ package unitype import ( "bytes" "errors" - "fmt" "io" "math" "os" + "sort" "github.com/sirupsen/logrus" ) @@ -93,12 +93,16 @@ func (f *Font) GetCmap(platformID, encodingID int) map[rune]GlyphIndex { return nil } -// SubsetKeepRunes prunes data for all GIDs except the ones corresponding to `runes`. The GIDs are -// maintained. Typically reduces glyf table size significantly. -func (f *Font) SubsetKeepRunes(runes []rune) (*Font, error) { +// LookupRunes looks up each rune in `rune` and returns a matching slice of glyph indices. +// When a rune is not found, a GID of 0 is used (notdef). +func (f *Font) LookupRunes(runes []rune) []GlyphIndex { var maps []map[rune]GlyphIndex // Search order (3,1), (1,0), (0,3). - maps = append(maps, f.GetCmap(3, 1), f.GetCmap(1, 0), f.GetCmap(0, 3)) + maps = append(maps, + f.GetCmap(3, 1), + f.GetCmap(1, 0), + f.GetCmap(0, 3), + ) var indices []GlyphIndex for _, r := range runes { @@ -110,13 +114,17 @@ func (f *Font) SubsetKeepRunes(runes []rune) (*Font, error) { break } } - if index == 0 { - return nil, fmt.Errorf("rune not found: %v", r) - } indices = append(indices, index) } logrus.Debugf("Runes: %+v %s", runes, string(runes)) logrus.Debugf("GIDs: %+v", indices) + return indices +} + +// SubsetKeepRunes prunes data for all GIDs except the ones corresponding to `runes`. The GIDs are +// maintained. Typically reduces glyf table size significantly. +func (f *Font) SubsetKeepRunes(runes []rune) (*Font, error) { + indices := f.LookupRunes(runes) return f.SubsetKeepIndices(indices) } @@ -126,6 +134,8 @@ func (f *Font) SubsetKeepRunes(runes []rune) (*Font, error) { func (f *Font) SubsetKeepIndices(indices []GlyphIndex) (*Font, error) { newfnt := font{} + // Expand the set of indices if any of the indices are composite + // glyphs depending on other glyphs. gidIncludedMap := make(map[GlyphIndex]struct{}, len(indices)) for _, gid := range indices { gidIncludedMap[gid] = struct{}{} @@ -142,6 +152,7 @@ func (f *Font) SubsetKeepIndices(indices []GlyphIndex) (*Font, error) { for _, gid := range toscan { components, err := f.glyf.GetComponents(gid) if err != nil { + logrus.Debugf("Error getting components for %d", gid) return nil, err } for _, gid := range components { @@ -237,10 +248,12 @@ func (f *Font) SubsetKeepIndices(indices []GlyphIndex) (*Font, error) { newfnt.os2 = &os2Table{} *newfnt.os2 = *f.font.os2 } + if f.font.post != nil { newfnt.post = &postTable{} *newfnt.post = *f.font.post } + if f.font.cmap != nil { newfnt.cmap = &cmapTable{} *newfnt.cmap = *f.font.cmap @@ -258,6 +271,7 @@ func (f *Font) SubsetKeepIndices(indices []GlyphIndex) (*Font, error) { maxgid = gid } } + // Trim font down to only maximum needed glyphs without changing order. maxNeededNum := int(maxgid) + 1 return subfnt.SubsetFirst(maxNeededNum) } @@ -337,6 +351,21 @@ func (f *Font) SubsetFirst(numGlyphs int) (*Font, error) { } } + if f.font.prep != nil { + newfnt.prep = &prepTable{} + *newfnt.prep = *f.font.prep + } + + if f.font.cvt != nil { + newfnt.cvt = &cvtTable{} + *newfnt.cvt = *f.font.cvt + } + + if f.font.fpgm != nil { + newfnt.fpgm = &fpgmTable{} + *newfnt.fpgm = *f.font.fpgm + } + if f.font.name != nil { newfnt.name = &nameTable{} *newfnt.name = *f.font.name @@ -358,13 +387,13 @@ func (f *Font) SubsetFirst(numGlyphs int) (*Font, error) { newfnt.post.glyphNameIndex = newfnt.post.glyphNameIndex[0:numGlyphs] } if len(newfnt.post.offsets) > numGlyphs { - // TODO: Not sure if this is updated here or generated on the fly? newfnt.post.offsets = newfnt.post.offsets[0:numGlyphs] } if len(newfnt.post.glyphNames) > numGlyphs { newfnt.post.glyphNames = newfnt.post.glyphNames[0:numGlyphs] } } + if f.font.cmap != nil { newfnt.cmap = &cmapTable{ version: f.cmap.version, @@ -386,19 +415,32 @@ func (f *Font) SubsetFirst(numGlyphs int) (*Font, error) { // Makes continous entries with deltas. // Does not use glyphIDData, but only the deltas. Can lead to many segments, but should not // be too bad (especially since subsetting). + charcodes := make([]CharCode, 0, len(subt.charcodeToGID)) + for cc, gid := range subt.charcodeToGID { + if int(gid) >= numGlyphs { + continue + } + charcodes = append(charcodes, cc) + } + sort.Slice(charcodes, func(i, j int) bool { + return charcodes[i] < charcodes[j] + }) + segments := 0 i := 0 - for i < numGlyphs { + for i < len(charcodes) { j := i + 1 - for ; j < numGlyphs; j++ { - if int(subt.runes[j]-subt.runes[i]) != j-i { + for ; j < len(charcodes); j++ { + if int(charcodes[j]-charcodes[i]) != j-i || + int(subt.charcodeToGID[charcodes[j]]-subt.charcodeToGID[charcodes[i]]) != j-i { break } } - // from i:j-1 maps to subt.runes[i]:subt.runes[i]+j-i-1 - startCode := uint16(subt.runes[i]) - endCode := uint16(subt.runes[i]) + uint16(j-i-1) - idDelta := uint16(uint16(i) - startCode) + // from i:j-1 maps to subt.charcodes[i]:subt.charcodes[i]+j-i-1 + startCode := uint16(charcodes[i]) + endCode := uint16(charcodes[i]) + uint16(j-i-1) + idDelta := uint16(subt.charcodeToGID[charcodes[i]]) - uint16(charcodes[i]) + newt.startCode = append(newt.startCode, startCode) newt.endCode = append(newt.endCode, endCode) newt.idDelta = append(newt.idDelta, idDelta) @@ -406,6 +448,15 @@ func (f *Font) SubsetFirst(numGlyphs int) (*Font, error) { segments++ i = j } + + if segments > 0 && newt.endCode[segments-1] < 65535 { + newt.endCode = append(newt.endCode, 65535) + newt.startCode = append(newt.startCode, 65535) + newt.idDelta = append(newt.idDelta, 1) + newt.idRangeOffset = append(newt.idRangeOffset, 0) + segments++ + } + newt.length = uint16(2*8 + 2*4*segments) newt.language = t.language newt.segCountX2 = uint16(segments * 2) @@ -423,17 +474,30 @@ func (f *Font) SubsetFirst(numGlyphs int) (*Font, error) { newt := cmapSubtableFormat12{} groups := 0 - for i := 0; i < numGlyphs; i++ { + charcodes := make([]CharCode, 0, len(subt.charcodeToGID)) + for cc, gid := range subt.charcodeToGID { + if int(gid) >= numGlyphs { + continue + } + charcodes = append(charcodes, cc) + } + sort.Slice(charcodes, func(i, j int) bool { + return charcodes[i] < charcodes[j] + }) + + i := 0 + for i < len(charcodes) { j := i + 1 - for ; j < numGlyphs; j++ { - if int(subt.runes[j]-subt.runes[i]) != j-i { + for ; j < len(charcodes); j++ { + if int(charcodes[j]-charcodes[i]) != j-i || + int(subt.charcodeToGID[charcodes[j]]-subt.charcodeToGID[charcodes[i]]) != j-i { break } } - // from i:j-1 maps to subt.runes[i]:subt.runes[i]+j-i-1 - startCharCode := uint32(subt.runes[i]) - endCharCode := uint32(subt.runes[i]) + uint32(j-i-1) - startGlyphID := uint32(i) + // from i:j-1 maps to subt.charcodes[i]:subt.charcodes[i]+j-i-1 + startCharCode := uint32(charcodes[i]) + endCharCode := uint32(charcodes[i]) + uint32(j-i-1) + startGlyphID := uint32(subt.charcodeToGID[charcodes[i]]) group := sequentialMapGroup{ startCharCode: startCharCode, @@ -442,7 +506,9 @@ func (f *Font) SubsetFirst(numGlyphs int) (*Font, error) { } newt.groups = append(newt.groups, group) groups++ + i = j } + newt.length = uint32(2*2 + 3*4 + groups*3*4) newt.language = t.language newt.numGroups = uint32(groups) diff --git a/table_cmap.go b/table_cmap.go index 233970d..d5359ef 100644 --- a/table_cmap.go +++ b/table_cmap.go @@ -120,11 +120,11 @@ type cmapSubtable struct { ctx interface{} // The specific subtable, e.g. cmapSubtableFormat0, etc. // TODO: Need GID to rune map too? or just a list of runes (with length = numGlyphs, i.e. one rune per gid) - cmap map[rune]GlyphIndex - runes []rune - charcodeToGID map[CharCode]GlyphIndex - runeToCharcodes map[rune][]byte // Quick for going rune -> encoded bytes (charcodes). - // Not so quick for going charcodes to rune? + cmap map[rune]GlyphIndex + runes []rune + charcodes []CharCode + charcodeToGID map[CharCode]GlyphIndex + runeToCharcodeBytes map[rune][]byte // Quick for going rune -> encoded bytes (charcodes). } // cmapSubtableFormat0 represents format 0: Byte encoding table. @@ -162,7 +162,8 @@ func (f *font) parseCmapSubtableFormat0(r *byteReader, platformID, encodingID in // (cmapEncoder). cmap := map[rune]GlyphIndex{} runes := make([]rune, len(st.glyphIDArray)) - charcodes := map[rune][]byte{} + runeToCharcodeBytes := map[rune][]byte{} + charcodes := make([]CharCode, len(st.glyphIDArray)) charcodeToGID := map[CharCode]GlyphIndex{} for glyphID, code := range st.glyphIDArray { @@ -170,22 +171,24 @@ func (f *font) parseCmapSubtableFormat0(r *byteReader, platformID, encodingID in codeBytes := runeDecoder.ToBytes(uint32(code)) r := runeDecoder.DecodeRune(codeBytes) runes[glyphID] = r + charcodes[glyphID] = CharCode(code) if _, has := cmap[r]; !has { // Avoid overwrite, if get same twice, use the earlier entry. cmap[r] = GlyphIndex(glyphID) - charcodes[r] = codeBytes + runeToCharcodeBytes[r] = codeBytes } } return &cmapSubtable{ - format: 0, - platformID: platformID, - encodingID: encodingID, - cmap: cmap, - runes: runes, - runeToCharcodes: charcodes, - charcodeToGID: charcodeToGID, - ctx: st, + format: 0, + platformID: platformID, + encodingID: encodingID, + cmap: cmap, + runes: runes, + runeToCharcodeBytes: runeToCharcodeBytes, + charcodes: charcodes, + charcodeToGID: charcodeToGID, + ctx: st, }, nil } @@ -246,7 +249,6 @@ func (f *font) parseCmapSubtableFormat4(r *byteReader, platformID, encodingID in if err != nil { return nil, err } - err = r.readSlice(&st.idDelta, segCount) if err != nil { return nil, err @@ -274,6 +276,8 @@ func (f *font) parseCmapSubtableFormat4(r *byteReader, platformID, encodingID in cmap := map[rune]GlyphIndex{} runes := make([]rune, int(f.maxp.numGlyphs)) + charcodes := make([]CharCode, int(f.maxp.numGlyphs)) + charcodeMap := make(map[CharCode]GlyphIndex, f.maxp.numGlyphs) logrus.Debugf("Number of glyphs in font: %d\n", f.maxp.numGlyphs) for i := 0; i < segCount-1; i++ { c1 := st.startCode[i] @@ -313,6 +317,9 @@ func (f *font) parseCmapSubtableFormat4(r *byteReader, platformID, encodingID in return nil, errors.New("gid out of range") } runes[int(gid)] = r + charcodes[int(gid)] = CharCode(c) + charcodeMap[CharCode(c)] = GlyphIndex(gid) + if _, has := cmap[r]; !has { // Avoid overwrite, if get same twice, use the earlier entry. cmap[r] = GlyphIndex(gid) @@ -322,12 +329,14 @@ func (f *font) parseCmapSubtableFormat4(r *byteReader, platformID, encodingID in } return &cmapSubtable{ - format: 4, - platformID: platformID, - encodingID: encodingID, - cmap: cmap, - runes: runes, - ctx: st, + format: 4, + platformID: platformID, + encodingID: encodingID, + cmap: cmap, + charcodes: charcodes, + charcodeToGID: charcodeMap, + runes: runes, + ctx: st, }, nil } @@ -398,12 +407,16 @@ func (f *font) parseCmapSubtableFormat6(r *byteReader, platformID, encodingID in cmap := map[rune]GlyphIndex{} runes := make([]rune, st.entryCount) + charcodes := make([]CharCode, st.entryCount) + charcodeMap := make(map[CharCode]GlyphIndex, st.entryCount) for i := 0; i < int(st.entryCount); i++ { gid := GlyphIndex(st.glyphIDArray[i]) code := st.firstCode + uint16(i) b := runeDecoder.ToBytes(uint32(code)) r := runeDecoder.DecodeRune(b) runes[i] = r + charcodes[i] = CharCode(code) + charcodeMap[CharCode(code)] = gid if _, has := cmap[r]; !has { // Avoid ovewriting (stick to first gid). cmap[r] = gid @@ -411,12 +424,14 @@ func (f *font) parseCmapSubtableFormat6(r *byteReader, platformID, encodingID in } return &cmapSubtable{ - format: 6, - platformID: platformID, - encodingID: encodingID, - cmap: cmap, - runes: runes, - ctx: st, + format: 6, + platformID: platformID, + encodingID: encodingID, + cmap: cmap, + runes: runes, + charcodes: charcodes, + charcodeToGID: charcodeMap, + ctx: st, }, nil } @@ -475,6 +490,8 @@ func (f *font) parseCmapSubtableFormat12(r *byteReader, platformID, encodingID i cmap := map[rune]GlyphIndex{} runes := make([]rune, f.maxp.numGlyphs) + charcodes := make([]CharCode, f.maxp.numGlyphs) + charcodeMap := make(map[CharCode]GlyphIndex, f.maxp.numGlyphs) for _, group := range st.groups { gid := GlyphIndex(group.startGlyphID) if int(gid) >= int(f.maxp.numGlyphs) { @@ -483,9 +500,14 @@ func (f *font) parseCmapSubtableFormat12(r *byteReader, platformID, encodingID i return nil, errRangeCheck } for charcode := group.startCharCode; charcode <= group.endCharCode; charcode++ { + if int(gid) >= int(f.maxp.numGlyphs) { + break + } b := runeDecoder.ToBytes(charcode) r := runeDecoder.DecodeRune(b) runes[gid] = r + charcodes[gid] = CharCode(charcode) + charcodeMap[CharCode(charcode)] = gid if _, has := cmap[r]; !has { // Avoid overwrite, if get same twice, use the earlier entry. cmap[r] = gid @@ -495,12 +517,14 @@ func (f *font) parseCmapSubtableFormat12(r *byteReader, platformID, encodingID i } return &cmapSubtable{ - format: 12, - ctx: st, - platformID: platformID, - encodingID: encodingID, - cmap: cmap, - runes: runes, + format: 12, + ctx: st, + platformID: platformID, + encodingID: encodingID, + cmap: cmap, + runes: runes, + charcodes: charcodes, + charcodeToGID: charcodeMap, }, nil } From 0356ac29eaaaabe024dd98363a49896f4890781e Mon Sep 17 00:00:00 2001 From: Gunnsteinn Hall Date: Sun, 24 May 2020 23:26:21 +0000 Subject: [PATCH 3/4] Add needed read type --- byte_reader.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/byte_reader.go b/byte_reader.go index 55df850..48c5d43 100644 --- a/byte_reader.go +++ b/byte_reader.go @@ -117,6 +117,12 @@ func (r *byteReader) readSlice(slice interface{}, length int) error { func (r byteReader) read(fields ...interface{}) error { for _, f := range fields { switch t := f.(type) { + case **f2dot14: + val, err := r.readF2dot14() + if err != nil { + return err + } + *t = &val case *f2dot14: val, err := r.readF2dot14() if err != nil { From 77d42b645b0270a0a7b5f401c0eeaf5a7c165785 Mon Sep 17 00:00:00 2001 From: Gunnsteinn Hall Date: Sun, 24 May 2020 23:26:39 +0000 Subject: [PATCH 4/4] Add more debug info --- font.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/font.go b/font.go index 754911d..c8b6bff 100644 --- a/font.go +++ b/font.go @@ -525,6 +525,9 @@ func (f *font) TableInfo(table string) string { for _, k := range f.cmap.subtableKeys { subt := f.cmap.subtables[k] b.WriteString(fmt.Sprintf("cmap subtable: %s: runes: %d\n", k, len(subt.runes))) + for i := range subt.charcodes { + b.WriteString(fmt.Sprintf("\t%d - Charcode %d (0x%X) - rune % X\n", i, subt.charcodes[i], subt.charcodes[i], subt.runes[i])) + } } case "loca": if f.loca == nil {