Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/pdf/xpdf/CharCodeToUnicode.cc
diff options
context:
space:
mode:
Diffstat (limited to 'pdf/xpdf/CharCodeToUnicode.cc')
-rw-r--r--pdf/xpdf/CharCodeToUnicode.cc363
1 files changed, 252 insertions, 111 deletions
diff --git a/pdf/xpdf/CharCodeToUnicode.cc b/pdf/xpdf/CharCodeToUnicode.cc
index a374b1b..2e2ad47 100644
--- a/pdf/xpdf/CharCodeToUnicode.cc
+++ b/pdf/xpdf/CharCodeToUnicode.cc
@@ -54,7 +54,8 @@ static int getCharFromFile(void *data) {
//------------------------------------------------------------------------
-CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *collectionA) {
+CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *fileName,
+ GString *collection) {
FILE *f;
Unicode *mapA;
CharCode size, mapLenA;
@@ -62,9 +63,9 @@ CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *collectionA) {
Unicode u;
CharCodeToUnicode *ctu;
- if (!(f = globalParams->getCIDToUnicodeFile(collectionA))) {
- error(-1, "Couldn't find cidToUnicode file for the '%s' collection",
- collectionA->getCString());
+ if (!(f = fopen(fileName->getCString(), "r"))) {
+ error(-1, "Couldn't open cidToUnicode file '%s'",
+ fileName->getCString());
return NULL;
}
@@ -80,22 +81,110 @@ CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *collectionA) {
if (sscanf(buf, "%x", &u) == 1) {
mapA[mapLenA] = u;
} else {
- error(-1, "Bad line (%d) in cidToUnicode file for the '%s' collection",
- (int)(mapLenA + 1), collectionA->getCString());
+ error(-1, "Bad line (%d) in cidToUnicode file '%s'",
+ (int)(mapLenA + 1), fileName->getCString());
mapA[mapLenA] = 0;
}
++mapLenA;
}
fclose(f);
- ctu = new CharCodeToUnicode(collectionA->copy(), mapA, mapLenA, gTrue,
- NULL, 0);
+ ctu = new CharCodeToUnicode(collection->copy(), mapA, mapLenA, gTrue,
+ NULL, 0, 0);
+ gfree(mapA);
+ return ctu;
+}
+
+CharCodeToUnicode *CharCodeToUnicode::parseUnicodeToUnicode(
+ GString *fileName) {
+ FILE *f;
+ Unicode *mapA;
+ CharCodeToUnicodeString *sMapA;
+ CharCode size, oldSize, len, sMapSizeA, sMapLenA;
+ char buf[256];
+ char *tok;
+ Unicode u0;
+ Unicode uBuf[maxUnicodeString];
+ CharCodeToUnicode *ctu;
+ int line, n, i;
+
+ if (!(f = fopen(fileName->getCString(), "r"))) {
+ error(-1, "Couldn't open unicodeToUnicode file '%s'",
+ fileName->getCString());
+ return NULL;
+ }
+
+ size = 4096;
+ mapA = (Unicode *)gmalloc(size * sizeof(Unicode));
+ memset(mapA, 0, size * sizeof(Unicode));
+ len = 0;
+ sMapA = NULL;
+ sMapSizeA = sMapLenA = 0;
+
+ line = 0;
+ while (getLine(buf, sizeof(buf), f)) {
+ ++line;
+ if (!(tok = strtok(buf, " \t\r\n")) ||
+ sscanf(tok, "%x", &u0) != 1) {
+ error(-1, "Bad line (%d) in unicodeToUnicode file '%s'",
+ line, fileName->getCString());
+ continue;
+ }
+ n = 0;
+ while (n < maxUnicodeString) {
+ if (!(tok = strtok(NULL, " \t\r\n"))) {
+ break;
+ }
+ if (sscanf(tok, "%x", &uBuf[n]) != 1) {
+ error(-1, "Bad line (%d) in unicodeToUnicode file '%s'",
+ line, fileName->getCString());
+ break;
+ }
+ ++n;
+ }
+ if (n < 1) {
+ error(-1, "Bad line (%d) in unicodeToUnicode file '%s'",
+ line, fileName->getCString());
+ continue;
+ }
+ if (u0 >= size) {
+ oldSize = size;
+ while (u0 >= size) {
+ size *= 2;
+ }
+ mapA = (Unicode *)grealloc(mapA, size * sizeof(Unicode));
+ memset(mapA + oldSize, 0, (size - oldSize) * sizeof(Unicode));
+ }
+ if (n == 1) {
+ mapA[u0] = uBuf[0];
+ } else {
+ mapA[u0] = 0;
+ if (sMapLenA == sMapSizeA) {
+ sMapSizeA += 16;
+ sMapA = (CharCodeToUnicodeString *)
+ grealloc(sMapA, sMapSizeA * sizeof(CharCodeToUnicodeString));
+ }
+ sMapA[sMapLenA].c = u0;
+ for (i = 0; i < n; ++i) {
+ sMapA[sMapLenA].u[i] = uBuf[i];
+ }
+ sMapA[sMapLenA].len = n;
+ ++sMapLenA;
+ }
+ if (u0 >= len) {
+ len = u0 + 1;
+ }
+ }
+ fclose(f);
+
+ ctu = new CharCodeToUnicode(fileName->copy(), mapA, len, gTrue,
+ sMapA, sMapLenA, sMapSizeA);
gfree(mapA);
return ctu;
}
CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) {
- return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0);
+ return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0, 0);
}
CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) {
@@ -108,16 +197,20 @@ CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) {
return ctu;
}
+void CharCodeToUnicode::mergeCMap(GString *buf, int nBits) {
+ char *p;
+
+ p = buf->getCString();
+ parseCMap1(&getCharFromString, &p, nBits);
+}
+
void CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data,
int nBits) {
PSTokenizer *pst;
char tok1[256], tok2[256], tok3[256];
int nDigits, n1, n2, n3;
- CharCode oldLen, i;
+ CharCode i;
CharCode code1, code2;
- Unicode u;
- char uHex[5];
- int j;
GString *name;
FILE *f;
@@ -158,38 +251,7 @@ void CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data,
error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
continue;
}
- if (code1 >= mapLen) {
- oldLen = mapLen;
- mapLen = (code1 + 256) & ~255;
- map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode));
- for (i = oldLen; i < mapLen; ++i) {
- map[i] = 0;
- }
- }
- if (n2 == 6) {
- if (sscanf(tok2 + 1, "%x", &u) != 1) {
- error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
- continue;
- }
- map[code1] = u;
- } else {
- map[code1] = 0;
- if (sMapLen == sMapSize) {
- sMapSize += 8;
- sMap = (CharCodeToUnicodeString *)
- grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString));
- }
- sMap[sMapLen].c = code1;
- sMap[sMapLen].len = (n2 - 2) / 4;
- for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) {
- strncpy(uHex, tok2 + 1 + j*4, 4);
- uHex[4] = '\0';
- if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) {
- error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
- }
- }
- ++sMapLen;
- }
+ addMapping(code1, tok2 + 1, n2 - 1, 0);
}
pst->getToken(tok1, sizeof(tok1), &n1);
} else if (!strcmp(tok2, "beginbfrange")) {
@@ -205,53 +267,39 @@ void CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data,
break;
}
if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
- n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>' &&
- tok3[0] == '<' && tok3[n3 - 1] == '>')) {
+ n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>')) {
error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
continue;
}
- tok1[n1 - 1] = tok2[n2 - 1] = tok3[n3 - 1] = '\0';
+ tok1[n1 - 1] = tok2[n2 - 1] = '\0';
if (sscanf(tok1 + 1, "%x", &code1) != 1 ||
sscanf(tok2 + 1, "%x", &code2) != 1) {
error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
continue;
}
- if (code2 >= mapLen) {
- oldLen = mapLen;
- mapLen = (code2 + 256) & ~255;
- map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode));
- for (i = oldLen; i < mapLen; ++i) {
- map[i] = 0;
- }
- }
- if (n3 <= 6) {
- if (sscanf(tok3 + 1, "%x", &u) != 1) {
- error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
- continue;
- }
- for (; code1 <= code2; ++code1) {
- map[code1] = u++;
- }
- } else {
- if (sMapLen + (int)(code2 - code1 + 1) > sMapSize) {
- sMapSize = (sMapSize + (code2 - code1 + 1) + 7) & ~7;
- sMap = (CharCodeToUnicodeString *)
- grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString));
+ if (!strcmp(tok3, "[")) {
+ i = 0;
+ while (pst->getToken(tok1, sizeof(tok1), &n1) &&
+ code1 + i <= code2) {
+ if (!strcmp(tok1, "]")) {
+ break;
+ }
+ if (tok1[0] == '<' && tok1[n1 - 1] == '>') {
+ tok1[n1 - 1] = '\0';
+ addMapping(code1 + i, tok1 + 1, n1 - 2, 0);
+ } else {
+ error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
+ }
+ ++i;
}
+ } else if (tok3[0] == '<' && tok3[n3 - 1] == '>') {
+ tok3[n3 - 1] = '\0';
for (i = 0; code1 <= code2; ++code1, ++i) {
- map[code1] = 0;
- sMap[sMapLen].c = code1;
- sMap[sMapLen].len = (n3 - 2) / 4;
- for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) {
- strncpy(uHex, tok3 + 1 + j*4, 4);
- uHex[4] = '\0';
- if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) {
- error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
- }
- }
- sMap[sMapLen].u[sMap[sMapLen].len - 1] += i;
- ++sMapLen;
+ addMapping(code1, tok3 + 1, n3 - 2, i);
}
+
+ } else {
+ error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
}
}
pst->getToken(tok1, sizeof(tok1), &n1);
@@ -262,10 +310,52 @@ void CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data,
delete pst;
}
-CharCodeToUnicode::CharCodeToUnicode(GString *collectionA) {
+void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n,
+ int offset) {
+ CharCode oldLen, i;
+ Unicode u;
+ char uHex[5];
+ int j;
+
+ if (code >= mapLen) {
+ oldLen = mapLen;
+ mapLen = (code + 256) & ~255;
+ map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode));
+ for (i = oldLen; i < mapLen; ++i) {
+ map[i] = 0;
+ }
+ }
+ if (n <= 4) {
+ if (sscanf(uStr, "%x", &u) != 1) {
+ error(-1, "Illegal entry in ToUnicode CMap");
+ return;
+ }
+ map[code] = u + offset;
+ } else {
+ if (sMapLen >= sMapSize) {
+ sMapSize = sMapSize + 16;
+ sMap = (CharCodeToUnicodeString *)
+ grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString));
+ }
+ map[code] = 0;
+ sMap[sMapLen].c = code;
+ sMap[sMapLen].len = n / 4;
+ for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) {
+ strncpy(uHex, uStr + j*4, 4);
+ uHex[4] = '\0';
+ if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) {
+ error(-1, "Illegal entry in ToUnicode CMap");
+ }
+ }
+ sMap[sMapLen].u[sMap[sMapLen].len - 1] += offset;
+ ++sMapLen;
+ }
+}
+
+CharCodeToUnicode::CharCodeToUnicode(GString *tagA) {
CharCode i;
- collection = collectionA;
+ tag = tagA;
mapLen = 256;
map = (Unicode *)gmalloc(mapLen * sizeof(Unicode));
for (i = 0; i < mapLen; ++i) {
@@ -274,13 +364,16 @@ CharCodeToUnicode::CharCodeToUnicode(GString *collectionA) {
sMap = NULL;
sMapLen = sMapSize = 0;
refCnt = 1;
+#if MULTITHREADED
+ gInitMutex(&mutex);
+#endif
}
-CharCodeToUnicode::CharCodeToUnicode(GString *collectionA, Unicode *mapA,
+CharCodeToUnicode::CharCodeToUnicode(GString *tagA, Unicode *mapA,
CharCode mapLenA, GBool copyMap,
CharCodeToUnicodeString *sMapA,
- int sMapLenA) {
- collection = collectionA;
+ int sMapLenA, int sMapSizeA) {
+ tag = tagA;
mapLen = mapLenA;
if (copyMap) {
map = (Unicode *)gmalloc(mapLen * sizeof(Unicode));
@@ -289,32 +382,75 @@ CharCodeToUnicode::CharCodeToUnicode(GString *collectionA, Unicode *mapA,
map = mapA;
}
sMap = sMapA;
- sMapLen = sMapSize = sMapLenA;
+ sMapLen = sMapLenA;
+ sMapSize = sMapSizeA;
refCnt = 1;
+#if MULTITHREADED
+ gInitMutex(&mutex);
+#endif
}
CharCodeToUnicode::~CharCodeToUnicode() {
- if (collection) {
- delete collection;
+ if (tag) {
+ delete tag;
}
gfree(map);
if (sMap) {
gfree(sMap);
}
+#if MULTITHREADED
+ gDestroyMutex(&mutex);
+#endif
}
void CharCodeToUnicode::incRefCnt() {
+#if MULTITHREADED
+ gLockMutex(&mutex);
+#endif
++refCnt;
+#if MULTITHREADED
+ gUnlockMutex(&mutex);
+#endif
}
void CharCodeToUnicode::decRefCnt() {
- if (--refCnt == 0) {
+ GBool done;
+
+#if MULTITHREADED
+ gLockMutex(&mutex);
+#endif
+ done = --refCnt == 0;
+#if MULTITHREADED
+ gUnlockMutex(&mutex);
+#endif
+ if (done) {
delete this;
}
}
-GBool CharCodeToUnicode::match(GString *collectionA) {
- return collection && !collection->cmp(collectionA);
+GBool CharCodeToUnicode::match(GString *tagA) {
+ return tag && !tag->cmp(tagA);
+}
+
+void CharCodeToUnicode::setMapping(CharCode c, Unicode *u, int len) {
+ int i;
+
+ if (len == 1) {
+ map[c] = u[0];
+ } else {
+ map[c] = 0;
+ if (sMapLen == sMapSize) {
+ sMapSize += 8;
+ sMap = (CharCodeToUnicodeString *)
+ grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString));
+ }
+ sMap[sMapLen].c = c;
+ sMap[sMapLen].len = len;
+ for (i = 0; i < len && i < maxUnicodeString; ++i) {
+ sMap[sMapLen].u[i] = u[i];
+ }
+ ++sMapLen;
+ }
}
int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode *u, int size) {
@@ -340,34 +476,37 @@ int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode *u, int size) {
//------------------------------------------------------------------------
-CIDToUnicodeCache::CIDToUnicodeCache() {
+CharCodeToUnicodeCache::CharCodeToUnicodeCache(int sizeA) {
int i;
- for (i = 0; i < cidToUnicodeCacheSize; ++i) {
+ size = sizeA;
+ cache = (CharCodeToUnicode **)gmalloc(size * sizeof(CharCodeToUnicode *));
+ for (i = 0; i < size; ++i) {
cache[i] = NULL;
}
}
-CIDToUnicodeCache::~CIDToUnicodeCache() {
+CharCodeToUnicodeCache::~CharCodeToUnicodeCache() {
int i;
- for (i = 0; i < cidToUnicodeCacheSize; ++i) {
+ for (i = 0; i < size; ++i) {
if (cache[i]) {
cache[i]->decRefCnt();
}
}
+ gfree(cache);
}
-CharCodeToUnicode *CIDToUnicodeCache::getCIDToUnicode(GString *collection) {
+CharCodeToUnicode *CharCodeToUnicodeCache::getCharCodeToUnicode(GString *tag) {
CharCodeToUnicode *ctu;
int i, j;
- if (cache[0] && cache[0]->match(collection)) {
+ if (cache[0] && cache[0]->match(tag)) {
cache[0]->incRefCnt();
return cache[0];
}
- for (i = 1; i < cidToUnicodeCacheSize; ++i) {
- if (cache[i] && cache[i]->match(collection)) {
+ for (i = 1; i < size; ++i) {
+ if (cache[i] && cache[i]->match(tag)) {
ctu = cache[i];
for (j = i; j >= 1; --j) {
cache[j] = cache[j - 1];
@@ -377,16 +516,18 @@ CharCodeToUnicode *CIDToUnicodeCache::getCIDToUnicode(GString *collection) {
return ctu;
}
}
- if ((ctu = CharCodeToUnicode::parseCIDToUnicode(collection))) {
- if (cache[cidToUnicodeCacheSize - 1]) {
- cache[cidToUnicodeCacheSize - 1]->decRefCnt();
- }
- for (j = cidToUnicodeCacheSize - 1; j >= 1; --j) {
- cache[j] = cache[j - 1];
- }
- cache[0] = ctu;
- ctu->incRefCnt();
- return ctu;
- }
return NULL;
}
+
+void CharCodeToUnicodeCache::add(CharCodeToUnicode *ctu) {
+ int i;
+
+ if (cache[size - 1]) {
+ cache[size - 1]->decRefCnt();
+ }
+ for (i = size - 1; i >= 1; --i) {
+ cache[i] = cache[i - 1];
+ }
+ cache[0] = ctu;
+ ctu->incRefCnt();
+}