diff options
Diffstat (limited to 'pdf/xpdf/GfxFont.cc')
-rw-r--r-- | pdf/xpdf/GfxFont.cc | 317 |
1 files changed, 240 insertions, 77 deletions
diff --git a/pdf/xpdf/GfxFont.cc b/pdf/xpdf/GfxFont.cc index d687588..6f83676 100644 --- a/pdf/xpdf/GfxFont.cc +++ b/pdf/xpdf/GfxFont.cc @@ -17,6 +17,7 @@ #include <string.h> #include <ctype.h> #include "gmem.h" +#include "GHash.h" #include "Error.h" #include "Object.h" #include "Dict.h" @@ -136,12 +137,16 @@ GfxFont::GfxFont(char *tagA, Ref idA, GString *nameA) { tag = new GString(tagA); id = idA; name = nameA; + origName = nameA; embFontName = NULL; extFontFile = NULL; } GfxFont::~GfxFont() { delete tag; + if (origName && origName != name) { + delete origName; + } if (name) { delete name; } @@ -286,8 +291,8 @@ void GfxFont::readFontDescriptor(XRef *xref, Dict *fontDict) { obj1.free(); } -CharCodeToUnicode *GfxFont::readToUnicodeCMap(Dict *fontDict, int nBits) { - CharCodeToUnicode *ctu; +CharCodeToUnicode *GfxFont::readToUnicodeCMap(Dict *fontDict, int nBits, + CharCodeToUnicode *ctu) { GString *buf; Object obj1; int c; @@ -303,7 +308,11 @@ CharCodeToUnicode *GfxFont::readToUnicodeCMap(Dict *fontDict, int nBits) { } obj1.streamClose(); obj1.free(); - ctu = CharCodeToUnicode::parseCMap(buf, nBits); + if (ctu) { + ctu->mergeCMap(buf, nBits); + } else { + ctu = CharCodeToUnicode::parseCMap(buf, nBits); + } delete buf; return ctu; } @@ -334,7 +343,8 @@ char *GfxFont::readExtFontFile(int *len) { fseek(f, 0, SEEK_SET); buf = (char *)gmalloc(*len); if ((int)fread(buf, 1, *len, f) != *len) { - error(-1, "Error reading external font file '%s'", extFontFile); + error(-1, "Error reading external font file '%s'", + extFontFile->getCString()); } fclose(f); return buf; @@ -395,6 +405,8 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, char *tagA, Ref idA, GString *nameA, char *charName; GBool missing, hex; Unicode toUnicode[256]; + CharCodeToUnicode *utu, *ctu2; + Unicode uBuf[8]; double mul; int firstChar, lastChar; Gushort w; @@ -419,7 +431,6 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, char *tagA, Ref idA, GString *nameA, } } if (!name->cmp(stdFontMap[a].altName)) { - delete name; name = new GString(stdFontMap[a].properName); } } @@ -504,6 +515,7 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, char *tagA, Ref idA, GString *nameA, // check FontDict for base encoding hasEncoding = gFalse; + usesMacRomanEnc = gFalse; baseEnc = NULL; baseEncFromFontFile = gFalse; fontDict->lookup("Encoding", &obj1); @@ -511,6 +523,7 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, char *tagA, Ref idA, GString *nameA, obj1.dictLookup("BaseEncoding", &obj2); if (obj2.isName("MacRomanEncoding")) { hasEncoding = gTrue; + usesMacRomanEnc = gTrue; baseEnc = macRomanEncoding; } else if (obj2.isName("MacExpertEncoding")) { hasEncoding = gTrue; @@ -525,6 +538,7 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, char *tagA, Ref idA, GString *nameA, obj2.free(); } else if (obj1.isName("MacRomanEncoding")) { hasEncoding = gTrue; + usesMacRomanEnc = gTrue; baseEnc = macRomanEncoding; } else if (obj1.isName("MacExpertEncoding")) { hasEncoding = gTrue; @@ -609,7 +623,7 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, char *tagA, Ref idA, GString *nameA, if (obj3.isInt()) { code = obj3.getInt(); } else if (obj3.isName()) { - if (code < 256) { + if (code >= 0 && code < 256) { if (encFree[code]) { gfree(enc[code]); } @@ -633,76 +647,97 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, char *tagA, Ref idA, GString *nameA, //----- build the mapping to Unicode ----- - // look for a ToUnicode CMap - if (!(ctu = readToUnicodeCMap(fontDict, 8))) { - - // no ToUnicode CMap, so use the char names + // pass 1: use the name-to-Unicode mapping table + missing = hex = gFalse; + for (code = 0; code < 256; ++code) { + if ((charName = enc[code])) { + if (!(toUnicode[code] = globalParams->mapNameToUnicode(charName)) && + strcmp(charName, ".notdef")) { + // if it wasn't in the name-to-Unicode table, check for a + // name that looks like 'Axx' or 'xx', where 'A' is any letter + // and 'xx' is two hex digits + if ((strlen(charName) == 3 && + isalpha(charName[0]) && + isxdigit(charName[1]) && isxdigit(charName[2]) && + ((charName[1] >= 'a' && charName[1] <= 'f') || + (charName[1] >= 'A' && charName[1] <= 'F') || + (charName[2] >= 'a' && charName[2] <= 'f') || + (charName[2] >= 'A' && charName[2] <= 'F'))) || + (strlen(charName) == 2 && + isxdigit(charName[0]) && isxdigit(charName[1]) && + ((charName[0] >= 'a' && charName[0] <= 'f') || + (charName[0] >= 'A' && charName[0] <= 'F') || + (charName[1] >= 'a' && charName[1] <= 'f') || + (charName[1] >= 'A' && charName[1] <= 'F')))) { + hex = gTrue; + } + missing = gTrue; + } + } else { + toUnicode[code] = 0; + } + } - // pass 1: use the name-to-Unicode mapping table - missing = hex = gFalse; + // pass 2: try to fill in the missing chars, looking for names of + // the form 'Axx', 'xx', 'Ann', 'ABnn', or 'nn', where 'A' and 'B' + // are any letters, 'xx' is two hex digits, and 'nn' is 2-4 + // decimal digits + if (missing && globalParams->getMapNumericCharNames()) { for (code = 0; code < 256; ++code) { - if ((charName = enc[code])) { - if (!(toUnicode[code] = globalParams->mapNameToUnicode(charName)) && - strcmp(charName, ".notdef")) { - // if it wasn't in the name-to-Unicode table, check for a - // name that looks like 'Axx' or 'xx', where 'A' is any letter - // and 'xx' is two hex digits - if ((strlen(charName) == 3 && - isalpha(charName[0]) && - isxdigit(charName[1]) && isxdigit(charName[2]) && - ((charName[1] >= 'a' && charName[1] <= 'f') || - (charName[1] >= 'A' && charName[1] <= 'F') || - (charName[2] >= 'a' && charName[2] <= 'f') || - (charName[2] >= 'A' && charName[2] <= 'F'))) || - (strlen(charName) == 2 && - isxdigit(charName[0]) && isxdigit(charName[1]) && - ((charName[0] >= 'a' && charName[0] <= 'f') || - (charName[0] >= 'A' && charName[0] <= 'F') || - (charName[1] >= 'a' && charName[1] <= 'f') || - (charName[1] >= 'A' && charName[1] <= 'F')))) { - hex = gTrue; - } - missing = gTrue; + if ((charName = enc[code]) && !toUnicode[code] && + strcmp(charName, ".notdef")) { + n = strlen(charName); + code2 = -1; + if (hex && n == 3 && isalpha(charName[0]) && + isxdigit(charName[1]) && isxdigit(charName[2])) { + sscanf(charName+1, "%x", &code2); + } else if (hex && n == 2 && + isxdigit(charName[0]) && isxdigit(charName[1])) { + sscanf(charName, "%x", &code2); + } else if (!hex && n >= 2 && n <= 4 && + isdigit(charName[0]) && isdigit(charName[1])) { + code2 = atoi(charName); + } else if (n >= 3 && n <= 5 && + isdigit(charName[1]) && isdigit(charName[2])) { + code2 = atoi(charName+1); + } else if (n >= 4 && n <= 6 && + isdigit(charName[2]) && isdigit(charName[3])) { + code2 = atoi(charName+2); + } + if (code2 >= 0 && code2 <= 0xff) { + toUnicode[code] = (Unicode)code2; } - } else { - toUnicode[code] = 0; } } + } - // pass 2: try to fill in the missing chars, looking for names of - // the form 'Axx', 'xx', 'Ann', 'ABnn', or 'nn', where 'A' and 'B' - // are any letters, 'xx' is two hex digits, and 'nn' is 2-4 - // decimal digits - if (missing && globalParams->getMapNumericCharNames()) { - for (code = 0; code < 256; ++code) { - if ((charName = enc[code]) && !toUnicode[code] && - strcmp(charName, ".notdef")) { - n = strlen(charName); - code2 = -1; - if (hex && n == 3 && isalpha(charName[0]) && - isxdigit(charName[1]) && isxdigit(charName[2])) { - sscanf(charName+1, "%x", &code2); - } else if (hex && n == 2 && - isxdigit(charName[0]) && isxdigit(charName[1])) { - sscanf(charName, "%x", &code2); - } else if (!hex && n >= 2 && n <= 4 && - isdigit(charName[0]) && isdigit(charName[1])) { - code2 = atoi(charName); - } else if (n >= 3 && n <= 5 && - isdigit(charName[1]) && isdigit(charName[2])) { - code2 = atoi(charName+1); - } else if (n >= 4 && n <= 6 && - isdigit(charName[2]) && isdigit(charName[3])) { - code2 = atoi(charName+2); - } - if (code2 >= 0 && code2 <= 0xff) { - toUnicode[code] = (Unicode)code2; - } + // construct the char code -> Unicode mapping object + ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode); + + // merge in a ToUnicode CMap, if there is one -- this overwrites + // existing entries in ctu, i.e., the ToUnicode CMap takes + // precedence, but the other encoding info is allowed to fill in any + // holes + readToUnicodeCMap(fontDict, 8, ctu); + + // look for a Unicode-to-Unicode mapping + if (name && (utu = globalParams->getUnicodeToUnicode(name))) { + for (i = 0; i < 256; ++i) { + toUnicode[i] = 0; + } + ctu2 = CharCodeToUnicode::make8BitToUnicode(toUnicode); + for (i = 0; i < 256; ++i) { + n = ctu->mapToUnicode((CharCode)i, uBuf, 8); + if (n >= 1) { + n = utu->mapToUnicode((CharCode)uBuf[0], uBuf, 8); + if (n >= 1) { + ctu2->setMapping((CharCode)i, uBuf, n); } } } - - ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode); + utu->decRefCnt(); + delete ctu; + ctu = ctu2; } //----- get the character widths ----- @@ -716,9 +751,15 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, char *tagA, Ref idA, GString *nameA, fontDict->lookup("FirstChar", &obj1); firstChar = obj1.isInt() ? obj1.getInt() : 0; obj1.free(); + if (firstChar < 0 || firstChar > 255) { + firstChar = 0; + } fontDict->lookup("LastChar", &obj1); lastChar = obj1.isInt() ? obj1.getInt() : 255; obj1.free(); + if (lastChar < 0 || lastChar > 255) { + lastChar = 255; + } mul = (type == fontType3) ? fontMat[0] : 0.001; fontDict->lookup("Widths", &obj1); if (obj1.isArray()) { @@ -819,6 +860,124 @@ CharCodeToUnicode *Gfx8BitFont::getToUnicode() { return ctu; } +Gushort *Gfx8BitFont::getCodeToGIDMap(TrueTypeFontFile *ff) { + Gushort *map; + int cmapPlatform, cmapEncoding; + int unicodeCmap, macRomanCmap, msSymbolCmap, cmap; + GBool useMacRoman, useUnicode; + GHash *nameToGID; + char *charName; + Unicode u; + int code, i, n; + + map = (Gushort *)gmalloc(256 * sizeof(Gushort)); + for (i = 0; i < 256; ++i) { + map[i] = 0; + } + + // To match up with the Adobe-defined behaviour, we choose a cmap + // like this: + // 1. If the PDF font has an encoding: + // 1a. If the PDF font specified MacRomanEncoding and the + // TrueType font has a Macintosh Roman cmap, use it, and + // reverse map the char names through MacRomanEncoding to + // get char codes. + // 1b. If the TrueType font has a Microsoft Unicode cmap or a + // non-Microsoft Unicode cmap, use it, and use the Unicode + // indexes, not the char codes. + // 1c. If the PDF font is symbolic and the TrueType font has a + // Microsoft Symbol cmap, use it, and use char codes + // directly (possibly with an offset of 0xf000). + // 1d. If the TrueType font has a Macintosh Roman cmap, use it, + // as in case 1a. + // 2. If the PDF font does not have an encoding: + // 2a. If the TrueType font has a Macintosh Roman cmap, use it, + // and use char codes directly (possibly with an offset of + // 0xf000). + // 2b. If the TrueType font has a Microsoft Symbol cmap, use it, + // and use char codes directly (possible with an offset of + // 0xf000). + // 3. If none of these rules apply, use the first cmap and hope for + // the best (this shouldn't happen). + unicodeCmap = macRomanCmap = msSymbolCmap = -1; + for (i = 0; i < ff->getNumCmaps(); ++i) { + cmapPlatform = ff->getCmapPlatform(i); + cmapEncoding = ff->getCmapEncoding(i); + if ((cmapPlatform == 3 && cmapEncoding == 1) || + cmapPlatform == 0) { + unicodeCmap = i; + } else if (cmapPlatform == 1 && cmapEncoding == 0) { + macRomanCmap = i; + } else if (cmapPlatform == 3 && cmapEncoding == 0) { + msSymbolCmap = i; + } + } + cmap = 0; + useMacRoman = gFalse; + useUnicode = gFalse; + if (hasEncoding) { + if (usesMacRomanEnc && macRomanCmap >= 0) { + cmap = macRomanCmap; + useMacRoman = gTrue; + } else if (unicodeCmap >= 0) { + cmap = unicodeCmap; + useUnicode = gTrue; + } else if ((flags & fontSymbolic) && msSymbolCmap >= 0) { + cmap = msSymbolCmap; + } else if (macRomanCmap >= 0) { + cmap = macRomanCmap; + useMacRoman = gTrue; + } + } else { + if (macRomanCmap >= 0) { + cmap = macRomanCmap; + } else if (msSymbolCmap >= 0) { + cmap = msSymbolCmap; + } + } + + // reverse map the char names through MacRomanEncoding, then map the + // char codes through the cmap + if (useMacRoman) { + for (i = 0; i < 256; ++i) { + if ((charName = enc[i])) { + if ((code = globalParams->getMacRomanCharCode(charName))) { + map[i] = ff->mapCodeToGID(cmap, code); + } + } + } + + // map Unicode through the cmap + } else if (useUnicode) { + for (i = 0; i < 256; ++i) { + if ((n = ctu->mapToUnicode((CharCode)i, &u, 1))) { + map[i] = ff->mapCodeToGID(cmap, u); + } + } + + // map the char codes through the cmap, possibly with an offset of + // 0xf000 + } else { + for (i = 0; i < 256; ++i) { + if (!(map[i] = ff->mapCodeToGID(cmap, i))) { + map[i] = ff->mapCodeToGID(cmap, 0xf000 + i); + } + } + } + + // try the TrueType 'post' table to handle any unmapped characters + if ((nameToGID = ff->getNameToGID())) { + for (i = 0; i < 256; ++i) { + if (!map[i] && (charName = enc[i])) { + map[i] = (Gushort)(int)nameToGID->lookup(charName); + } + } + delete nameToGID; + } + + return map; +} + Dict *Gfx8BitFont::getCharProcs() { return charProcs.isDict() ? charProcs.getDict() : (Dict *)NULL; } @@ -930,7 +1089,7 @@ GfxCIDFont::GfxCIDFont(XRef *xref, char *tagA, Ref idA, GString *nameA, obj1.free(); // look for a ToUnicode CMap - if (!(ctu = readToUnicodeCMap(fontDict, 16))) { + if (!(ctu = readToUnicodeCMap(fontDict, 16, NULL))) { // the "Adobe-Identity" and "Adobe-UCS" collections don't have // cidToUnicode files @@ -1058,11 +1217,11 @@ GfxCIDFont::GfxCIDFont(XRef *xref, char *tagA, Ref idA, GString *nameA, if (desFontDict->lookup("DW2", &obj1)->isArray() && obj1.arrayGetLength() == 2) { if (obj1.arrayGet(0, &obj2)->isNum()) { - widths.defVY = obj1.getNum() * 0.001; + widths.defVY = obj2.getNum() * 0.001; } obj2.free(); if (obj1.arrayGet(1, &obj2)->isNum()) { - widths.defHeight = obj1.getNum() * 0.001; + widths.defHeight = obj2.getNum() * 0.001; } obj2.free(); } @@ -1073,8 +1232,8 @@ GfxCIDFont::GfxCIDFont(XRef *xref, char *tagA, Ref idA, GString *nameA, excepsSize = 0; i = 0; while (i + 1 < obj1.arrayGetLength()) { - obj1.arrayGet(0, &obj2); - obj2.arrayGet(0, &obj3); + obj1.arrayGet(i, &obj2); + obj1.arrayGet(i+ 1, &obj3); if (obj2.isInt() && obj3.isInt() && i + 4 < obj1.arrayGetLength()) { if (obj1.arrayGet(i + 2, &obj4)->isNum() && obj1.arrayGet(i + 3, &obj5)->isNum() && @@ -1107,10 +1266,10 @@ GfxCIDFont::GfxCIDFont(XRef *xref, char *tagA, Ref idA, GString *nameA, excepsSize * sizeof(GfxFontCIDWidthExcepV)); } j = obj2.getInt(); - for (k = 0; k < obj3.arrayGetLength(); ++k) { + for (k = 0; k < obj3.arrayGetLength(); k += 3) { if (obj3.arrayGet(k, &obj4)->isNum() && - obj3.arrayGet(k, &obj5)->isNum() && - obj3.arrayGet(k, &obj6)->isNum()) { + obj3.arrayGet(k+1, &obj5)->isNum() && + obj3.arrayGet(k+2, &obj6)->isNum()) { widths.excepsV[widths.nExceps].first = j; widths.excepsV[widths.nExceps].last = j; widths.excepsV[widths.nExceps].height = obj4.getNum() * 0.001; @@ -1259,7 +1418,7 @@ GString *GfxCIDFont::getCollection() { // GfxFontDict //------------------------------------------------------------------------ -GfxFontDict::GfxFontDict(XRef *xref, Dict *fontDict) { +GfxFontDict::GfxFontDict(XRef *xref, Ref *fontDictRef, Dict *fontDict) { int i; Object obj1, obj2; Ref r; @@ -1277,7 +1436,11 @@ GfxFontDict::GfxFontDict(XRef *xref, Dict *fontDict) { // (legal generation numbers are five digits, so any 6-digit // number would be safe) r.num = i; - r.gen = 999999; + if (fontDictRef) { + r.gen = 100000 + fontDictRef->num; + } else { + r.gen = 999999; + } } fonts[i] = GfxFont::makeFont(xref, fontDict->getKey(i), r, obj2.getDict()); |