Skip to content

Commit

Permalink
Merge pull request #2 from unidoc/subsetting-cmap-fixes
Browse files Browse the repository at this point in the history
Subsetting and cmap fixes
  • Loading branch information
gunnsth authored May 25, 2020
2 parents 5c5bbee + 77d42b6 commit 2591bc0
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 57 deletions.
6 changes: 6 additions & 0 deletions byte_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,12 @@ func (r *byteReader) readSlice(slice interface{}, length int) error {
func (r byteReader) read(fields ...interface{}) error {
for _, f := range fields {
switch t := f.(type) {
case **f2dot14:
val, err := r.readF2dot14()
if err != nil {
return err
}
*t = &val
case *f2dot14:
val, err := r.readF2dot14()
if err != nil {
Expand Down
112 changes: 89 additions & 23 deletions export.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ package unitype
import (
"bytes"
"errors"
"fmt"
"io"
"math"
"os"
"sort"

"github.com/sirupsen/logrus"
)
Expand Down Expand Up @@ -93,12 +93,16 @@ func (f *Font) GetCmap(platformID, encodingID int) map[rune]GlyphIndex {
return nil
}

// SubsetKeepRunes prunes data for all GIDs except the ones corresponding to `runes`. The GIDs are
// maintained. Typically reduces glyf table size significantly.
func (f *Font) SubsetKeepRunes(runes []rune) (*Font, error) {
// LookupRunes looks up each rune in `rune` and returns a matching slice of glyph indices.
// When a rune is not found, a GID of 0 is used (notdef).
func (f *Font) LookupRunes(runes []rune) []GlyphIndex {
var maps []map[rune]GlyphIndex
// Search order (3,1), (1,0), (0,3).
maps = append(maps, f.GetCmap(3, 1), f.GetCmap(1, 0), f.GetCmap(0, 3))
maps = append(maps,
f.GetCmap(3, 1),
f.GetCmap(1, 0),
f.GetCmap(0, 3),
)

var indices []GlyphIndex
for _, r := range runes {
Expand All @@ -110,13 +114,17 @@ func (f *Font) SubsetKeepRunes(runes []rune) (*Font, error) {
break
}
}
if index == 0 {
return nil, fmt.Errorf("rune not found: %v", r)
}
indices = append(indices, index)
}
logrus.Debugf("Runes: %+v %s", runes, string(runes))
logrus.Debugf("GIDs: %+v", indices)
return indices
}

// SubsetKeepRunes prunes data for all GIDs except the ones corresponding to `runes`. The GIDs are
// maintained. Typically reduces glyf table size significantly.
func (f *Font) SubsetKeepRunes(runes []rune) (*Font, error) {
indices := f.LookupRunes(runes)
return f.SubsetKeepIndices(indices)
}

Expand All @@ -126,6 +134,8 @@ func (f *Font) SubsetKeepRunes(runes []rune) (*Font, error) {
func (f *Font) SubsetKeepIndices(indices []GlyphIndex) (*Font, error) {
newfnt := font{}

// Expand the set of indices if any of the indices are composite
// glyphs depending on other glyphs.
gidIncludedMap := make(map[GlyphIndex]struct{}, len(indices))
for _, gid := range indices {
gidIncludedMap[gid] = struct{}{}
Expand All @@ -142,6 +152,7 @@ func (f *Font) SubsetKeepIndices(indices []GlyphIndex) (*Font, error) {
for _, gid := range toscan {
components, err := f.glyf.GetComponents(gid)
if err != nil {
logrus.Debugf("Error getting components for %d", gid)
return nil, err
}
for _, gid := range components {
Expand Down Expand Up @@ -237,10 +248,12 @@ func (f *Font) SubsetKeepIndices(indices []GlyphIndex) (*Font, error) {
newfnt.os2 = &os2Table{}
*newfnt.os2 = *f.font.os2
}

if f.font.post != nil {
newfnt.post = &postTable{}
*newfnt.post = *f.font.post
}

if f.font.cmap != nil {
newfnt.cmap = &cmapTable{}
*newfnt.cmap = *f.font.cmap
Expand All @@ -258,6 +271,7 @@ func (f *Font) SubsetKeepIndices(indices []GlyphIndex) (*Font, error) {
maxgid = gid
}
}
// Trim font down to only maximum needed glyphs without changing order.
maxNeededNum := int(maxgid) + 1
return subfnt.SubsetFirst(maxNeededNum)
}
Expand Down Expand Up @@ -337,6 +351,21 @@ func (f *Font) SubsetFirst(numGlyphs int) (*Font, error) {
}
}

if f.font.prep != nil {
newfnt.prep = &prepTable{}
*newfnt.prep = *f.font.prep
}

if f.font.cvt != nil {
newfnt.cvt = &cvtTable{}
*newfnt.cvt = *f.font.cvt
}

if f.font.fpgm != nil {
newfnt.fpgm = &fpgmTable{}
*newfnt.fpgm = *f.font.fpgm
}

if f.font.name != nil {
newfnt.name = &nameTable{}
*newfnt.name = *f.font.name
Expand All @@ -358,13 +387,13 @@ func (f *Font) SubsetFirst(numGlyphs int) (*Font, error) {
newfnt.post.glyphNameIndex = newfnt.post.glyphNameIndex[0:numGlyphs]
}
if len(newfnt.post.offsets) > numGlyphs {
// TODO: Not sure if this is updated here or generated on the fly?
newfnt.post.offsets = newfnt.post.offsets[0:numGlyphs]
}
if len(newfnt.post.glyphNames) > numGlyphs {
newfnt.post.glyphNames = newfnt.post.glyphNames[0:numGlyphs]
}
}

if f.font.cmap != nil {
newfnt.cmap = &cmapTable{
version: f.cmap.version,
Expand All @@ -386,26 +415,48 @@ func (f *Font) SubsetFirst(numGlyphs int) (*Font, error) {
// Makes continous entries with deltas.
// Does not use glyphIDData, but only the deltas. Can lead to many segments, but should not
// be too bad (especially since subsetting).
charcodes := make([]CharCode, 0, len(subt.charcodeToGID))
for cc, gid := range subt.charcodeToGID {
if int(gid) >= numGlyphs {
continue
}
charcodes = append(charcodes, cc)
}
sort.Slice(charcodes, func(i, j int) bool {
return charcodes[i] < charcodes[j]
})

segments := 0
i := 0
for i < numGlyphs {
for i < len(charcodes) {
j := i + 1
for ; j < numGlyphs; j++ {
if int(subt.runes[j]-subt.runes[i]) != j-i {
for ; j < len(charcodes); j++ {
if int(charcodes[j]-charcodes[i]) != j-i ||
int(subt.charcodeToGID[charcodes[j]]-subt.charcodeToGID[charcodes[i]]) != j-i {
break
}
}
// from i:j-1 maps to subt.runes[i]:subt.runes[i]+j-i-1
startCode := uint16(subt.runes[i])
endCode := uint16(subt.runes[i]) + uint16(j-i-1)
idDelta := uint16(uint16(i) - startCode)
// from i:j-1 maps to subt.charcodes[i]:subt.charcodes[i]+j-i-1
startCode := uint16(charcodes[i])
endCode := uint16(charcodes[i]) + uint16(j-i-1)
idDelta := uint16(subt.charcodeToGID[charcodes[i]]) - uint16(charcodes[i])

newt.startCode = append(newt.startCode, startCode)
newt.endCode = append(newt.endCode, endCode)
newt.idDelta = append(newt.idDelta, idDelta)
newt.idRangeOffset = append(newt.idRangeOffset, 0)
segments++
i = j
}

if segments > 0 && newt.endCode[segments-1] < 65535 {
newt.endCode = append(newt.endCode, 65535)
newt.startCode = append(newt.startCode, 65535)
newt.idDelta = append(newt.idDelta, 1)
newt.idRangeOffset = append(newt.idRangeOffset, 0)
segments++
}

newt.length = uint16(2*8 + 2*4*segments)
newt.language = t.language
newt.segCountX2 = uint16(segments * 2)
Expand All @@ -423,17 +474,30 @@ func (f *Font) SubsetFirst(numGlyphs int) (*Font, error) {
newt := cmapSubtableFormat12{}
groups := 0

for i := 0; i < numGlyphs; i++ {
charcodes := make([]CharCode, 0, len(subt.charcodeToGID))
for cc, gid := range subt.charcodeToGID {
if int(gid) >= numGlyphs {
continue
}
charcodes = append(charcodes, cc)
}
sort.Slice(charcodes, func(i, j int) bool {
return charcodes[i] < charcodes[j]
})

i := 0
for i < len(charcodes) {
j := i + 1
for ; j < numGlyphs; j++ {
if int(subt.runes[j]-subt.runes[i]) != j-i {
for ; j < len(charcodes); j++ {
if int(charcodes[j]-charcodes[i]) != j-i ||
int(subt.charcodeToGID[charcodes[j]]-subt.charcodeToGID[charcodes[i]]) != j-i {
break
}
}
// from i:j-1 maps to subt.runes[i]:subt.runes[i]+j-i-1
startCharCode := uint32(subt.runes[i])
endCharCode := uint32(subt.runes[i]) + uint32(j-i-1)
startGlyphID := uint32(i)
// from i:j-1 maps to subt.charcodes[i]:subt.charcodes[i]+j-i-1
startCharCode := uint32(charcodes[i])
endCharCode := uint32(charcodes[i]) + uint32(j-i-1)
startGlyphID := uint32(subt.charcodeToGID[charcodes[i]])

group := sequentialMapGroup{
startCharCode: startCharCode,
Expand All @@ -442,7 +506,9 @@ func (f *Font) SubsetFirst(numGlyphs int) (*Font, error) {
}
newt.groups = append(newt.groups, group)
groups++
i = j
}

newt.length = uint32(2*2 + 3*4 + groups*3*4)
newt.language = t.language
newt.numGroups = uint32(groups)
Expand Down
3 changes: 3 additions & 0 deletions font.go
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,9 @@ func (f *font) TableInfo(table string) string {
for _, k := range f.cmap.subtableKeys {
subt := f.cmap.subtables[k]
b.WriteString(fmt.Sprintf("cmap subtable: %s: runes: %d\n", k, len(subt.runes)))
for i := range subt.charcodes {
b.WriteString(fmt.Sprintf("\t%d - Charcode %d (0x%X) - rune % X\n", i, subt.charcodes[i], subt.charcodes[i], subt.runes[i]))
}
}
case "loca":
if f.loca == nil {
Expand Down
Loading

0 comments on commit 2591bc0

Please sign in to comment.