//======================================================================== // // CharCodeToUnicode.cc // // Copyright 2001-2003 Glyph & Cog, LLC // //======================================================================== #include #ifdef USE_GCC_PRAGMAS #pragma implementation #endif #include #include #include "gmem.h" #include "gfile.h" #include "GString.h" #include "Error.h" #include "GlobalParams.h" #include "PSTokenizer.h" #include "CharCodeToUnicode.h" //------------------------------------------------------------------------ #define maxUnicodeString 8 struct CharCodeToUnicodeString { CharCode c; Unicode u[maxUnicodeString]; int len; }; //------------------------------------------------------------------------ static int getCharFromString(void *data) { char *p; int c; p = *(char **)data; if (*p) { c = *p++; *(char **)data = p; } else { c = EOF; } return c; } static int getCharFromFile(void *data) { return fgetc((FILE *)data); } //------------------------------------------------------------------------ CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *fileName, GString *collection) { FILE *f; Unicode *mapA; CharCode size, mapLenA; char buf[64]; Unicode u; CharCodeToUnicode *ctu; if (!(f = fopen(fileName->getCString(), "r"))) { error(-1, "Couldn't open cidToUnicode file '%s'", fileName->getCString()); return NULL; } size = 32768; mapA = (Unicode *)gmalloc(size * sizeof(Unicode)); mapLenA = 0; while (getLine(buf, sizeof(buf), f)) { if (mapLenA == size) { size *= 2; mapA = (Unicode *)grealloc(mapA, size * sizeof(Unicode)); } if (sscanf(buf, "%x", &u) == 1) { mapA[mapLenA] = u; } else { error(-1, "Bad line (%d) in cidToUnicode file '%s'", (int)(mapLenA + 1), fileName->getCString()); mapA[mapLenA] = 0; } ++mapLenA; } fclose(f); ctu = new CharCodeToUnicode(collection->copy(), mapA, mapLenA, gTrue, NULL, 0, 0); gfree(mapA); return ctu; } CharCodeToUnicode *CharCodeToUnicode::parseUnicodeToUnicode( GString *fileName) { FILE *f; Unicode *mapA; CharCodeToUnicodeString *sMapA; CharCode size, oldSize, len, sMapSizeA, sMapLenA; char buf[256]; char *tok; Unicode u0; Unicode uBuf[maxUnicodeString]; CharCodeToUnicode *ctu; int line, n, i; if (!(f = fopen(fileName->getCString(), "r"))) { error(-1, "Couldn't open unicodeToUnicode file '%s'", fileName->getCString()); return NULL; } size = 4096; mapA = (Unicode *)gmalloc(size * sizeof(Unicode)); memset(mapA, 0, size * sizeof(Unicode)); len = 0; sMapA = NULL; sMapSizeA = sMapLenA = 0; line = 0; while (getLine(buf, sizeof(buf), f)) { ++line; if (!(tok = strtok(buf, " \t\r\n")) || sscanf(tok, "%x", &u0) != 1) { error(-1, "Bad line (%d) in unicodeToUnicode file '%s'", line, fileName->getCString()); continue; } n = 0; while (n < maxUnicodeString) { if (!(tok = strtok(NULL, " \t\r\n"))) { break; } if (sscanf(tok, "%x", &uBuf[n]) != 1) { error(-1, "Bad line (%d) in unicodeToUnicode file '%s'", line, fileName->getCString()); break; } ++n; } if (n < 1) { error(-1, "Bad line (%d) in unicodeToUnicode file '%s'", line, fileName->getCString()); continue; } if (u0 >= size) { oldSize = size; while (u0 >= size) { size *= 2; } mapA = (Unicode *)grealloc(mapA, size * sizeof(Unicode)); memset(mapA + oldSize, 0, (size - oldSize) * sizeof(Unicode)); } if (n == 1) { mapA[u0] = uBuf[0]; } else { mapA[u0] = 0; if (sMapLenA == sMapSizeA) { sMapSizeA += 16; sMapA = (CharCodeToUnicodeString *) grealloc(sMapA, sMapSizeA * sizeof(CharCodeToUnicodeString)); } sMapA[sMapLenA].c = u0; for (i = 0; i < n; ++i) { sMapA[sMapLenA].u[i] = uBuf[i]; } sMapA[sMapLenA].len = n; ++sMapLenA; } if (u0 >= len) { len = u0 + 1; } } fclose(f); ctu = new CharCodeToUnicode(fileName->copy(), mapA, len, gTrue, sMapA, sMapLenA, sMapSizeA); gfree(mapA); return ctu; } CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) { return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0, 0); } CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) { CharCodeToUnicode *ctu; char *p; ctu = new CharCodeToUnicode(NULL); p = buf->getCString(); ctu->parseCMap1(&getCharFromString, &p, nBits); return ctu; } void CharCodeToUnicode::mergeCMap(GString *buf, int nBits) { char *p; p = buf->getCString(); parseCMap1(&getCharFromString, &p, nBits); } void CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data, int nBits) { PSTokenizer *pst; char tok1[256], tok2[256], tok3[256]; int nDigits, n1, n2, n3; CharCode i; CharCode code1, code2; GString *name; FILE *f; nDigits = nBits / 4; pst = new PSTokenizer(getCharFunc, data); pst->getToken(tok1, sizeof(tok1), &n1); while (pst->getToken(tok2, sizeof(tok2), &n2)) { if (!strcmp(tok2, "usecmap")) { if (tok1[0] == '/') { name = new GString(tok1 + 1); if ((f = globalParams->findToUnicodeFile(name))) { parseCMap1(&getCharFromFile, f, nBits); fclose(f); } else { error(-1, "Couldn't find ToUnicode CMap file for '%s'", name->getCString()); } delete name; } pst->getToken(tok1, sizeof(tok1), &n1); } else if (!strcmp(tok2, "beginbfchar")) { while (pst->getToken(tok1, sizeof(tok1), &n1)) { if (!strcmp(tok1, "endbfchar")) { break; } if (!pst->getToken(tok2, sizeof(tok2), &n2) || !strcmp(tok2, "endbfchar")) { error(-1, "Illegal entry in bfchar block in ToUnicode CMap"); break; } if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && tok2[0] == '<' && tok2[n2 - 1] == '>')) { error(-1, "Illegal entry in bfchar block in ToUnicode CMap"); continue; } tok1[n1 - 1] = tok2[n2 - 1] = '\0'; if (sscanf(tok1 + 1, "%x", &code1) != 1) { error(-1, "Illegal entry in bfchar block in ToUnicode CMap"); continue; } addMapping(code1, tok2 + 1, n2 - 1, 0); } pst->getToken(tok1, sizeof(tok1), &n1); } else if (!strcmp(tok2, "beginbfrange")) { while (pst->getToken(tok1, sizeof(tok1), &n1)) { if (!strcmp(tok1, "endbfrange")) { break; } if (!pst->getToken(tok2, sizeof(tok2), &n2) || !strcmp(tok2, "endbfrange") || !pst->getToken(tok3, sizeof(tok3), &n3) || !strcmp(tok3, "endbfrange")) { error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); break; } if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>')) { error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); continue; } tok1[n1 - 1] = tok2[n2 - 1] = '\0'; if (sscanf(tok1 + 1, "%x", &code1) != 1 || sscanf(tok2 + 1, "%x", &code2) != 1) { error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); continue; } if (!strcmp(tok3, "[")) { i = 0; while (pst->getToken(tok1, sizeof(tok1), &n1) && code1 + i <= code2) { if (!strcmp(tok1, "]")) { break; } if (tok1[0] == '<' && tok1[n1 - 1] == '>') { tok1[n1 - 1] = '\0'; addMapping(code1 + i, tok1 + 1, n1 - 2, 0); } else { error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); } ++i; } } else if (tok3[0] == '<' && tok3[n3 - 1] == '>') { tok3[n3 - 1] = '\0'; for (i = 0; code1 <= code2; ++code1, ++i) { addMapping(code1, tok3 + 1, n3 - 2, i); } } else { error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); } } pst->getToken(tok1, sizeof(tok1), &n1); } else { strcpy(tok1, tok2); } } delete pst; } void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n, int offset) { CharCode oldLen, i; Unicode u; char uHex[5]; int j; if (code >= mapLen) { oldLen = mapLen; mapLen = (code + 256) & ~255; map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode)); for (i = oldLen; i < mapLen; ++i) { map[i] = 0; } } if (n <= 4) { if (sscanf(uStr, "%x", &u) != 1) { error(-1, "Illegal entry in ToUnicode CMap"); return; } map[code] = u + offset; } else { if (sMapLen >= sMapSize) { sMapSize = sMapSize + 16; sMap = (CharCodeToUnicodeString *) grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString)); } map[code] = 0; sMap[sMapLen].c = code; sMap[sMapLen].len = n / 4; for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) { strncpy(uHex, uStr + j*4, 4); uHex[4] = '\0'; if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) { error(-1, "Illegal entry in ToUnicode CMap"); } } sMap[sMapLen].u[sMap[sMapLen].len - 1] += offset; ++sMapLen; } } CharCodeToUnicode::CharCodeToUnicode(GString *tagA) { CharCode i; tag = tagA; mapLen = 256; map = (Unicode *)gmalloc(mapLen * sizeof(Unicode)); for (i = 0; i < mapLen; ++i) { map[i] = 0; } sMap = NULL; sMapLen = sMapSize = 0; refCnt = 1; #if MULTITHREADED gInitMutex(&mutex); #endif } CharCodeToUnicode::CharCodeToUnicode(GString *tagA, Unicode *mapA, CharCode mapLenA, GBool copyMap, CharCodeToUnicodeString *sMapA, int sMapLenA, int sMapSizeA) { tag = tagA; mapLen = mapLenA; if (copyMap) { map = (Unicode *)gmalloc(mapLen * sizeof(Unicode)); memcpy(map, mapA, mapLen * sizeof(Unicode)); } else { map = mapA; } sMap = sMapA; sMapLen = sMapLenA; sMapSize = sMapSizeA; refCnt = 1; #if MULTITHREADED gInitMutex(&mutex); #endif } CharCodeToUnicode::~CharCodeToUnicode() { if (tag) { delete tag; } gfree(map); if (sMap) { gfree(sMap); } #if MULTITHREADED gDestroyMutex(&mutex); #endif } void CharCodeToUnicode::incRefCnt() { #if MULTITHREADED gLockMutex(&mutex); #endif ++refCnt; #if MULTITHREADED gUnlockMutex(&mutex); #endif } void CharCodeToUnicode::decRefCnt() { GBool done; #if MULTITHREADED gLockMutex(&mutex); #endif done = --refCnt == 0; #if MULTITHREADED gUnlockMutex(&mutex); #endif if (done) { delete this; } } GBool CharCodeToUnicode::match(GString *tagA) { return tag && !tag->cmp(tagA); } void CharCodeToUnicode::setMapping(CharCode c, Unicode *u, int len) { int i; if (len == 1) { map[c] = u[0]; } else { map[c] = 0; if (sMapLen == sMapSize) { sMapSize += 8; sMap = (CharCodeToUnicodeString *) grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString)); } sMap[sMapLen].c = c; sMap[sMapLen].len = len; for (i = 0; i < len && i < maxUnicodeString; ++i) { sMap[sMapLen].u[i] = u[i]; } ++sMapLen; } } int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode *u, int size) { int i, j; if (c >= mapLen) { return 0; } if (map[c]) { u[0] = map[c]; return 1; } for (i = 0; i < sMapLen; ++i) { if (sMap[i].c == c) { for (j = 0; j < sMap[i].len && j < size; ++j) { u[j] = sMap[i].u[j]; } return j; } } return 0; } //------------------------------------------------------------------------ CharCodeToUnicodeCache::CharCodeToUnicodeCache(int sizeA) { int i; size = sizeA; cache = (CharCodeToUnicode **)gmalloc(size * sizeof(CharCodeToUnicode *)); for (i = 0; i < size; ++i) { cache[i] = NULL; } } CharCodeToUnicodeCache::~CharCodeToUnicodeCache() { int i; for (i = 0; i < size; ++i) { if (cache[i]) { cache[i]->decRefCnt(); } } gfree(cache); } CharCodeToUnicode *CharCodeToUnicodeCache::getCharCodeToUnicode(GString *tag) { CharCodeToUnicode *ctu; int i, j; if (cache[0] && cache[0]->match(tag)) { cache[0]->incRefCnt(); return cache[0]; } for (i = 1; i < size; ++i) { if (cache[i] && cache[i]->match(tag)) { ctu = cache[i]; for (j = i; j >= 1; --j) { cache[j] = cache[j - 1]; } cache[0] = ctu; ctu->incRefCnt(); return ctu; } } return NULL; } void CharCodeToUnicodeCache::add(CharCodeToUnicode *ctu) { int i; if (cache[size - 1]) { cache[size - 1]->decRefCnt(); } for (i = size - 1; i >= 1; --i) { cache[i] = cache[i - 1]; } cache[0] = ctu; ctu->incRefCnt(); }