From 02eb16fef45712a91e24f6471b9e2f31249c888e Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Tue, 01 Mar 2005 22:24:10 +0000 Subject: Use poppler instead of including xpdf source code. Poppler is a fork of 2005-03-01 Kristian Høgsberg Use poppler instead of including xpdf source code. Poppler is a fork of xpdf to build it as a shared library. See http://freedesktop.org/wiki/Software/poppler. * pdf/xpdf/*, pdf/goo/*, pdf/splash/*, pdf/fofi/*: Remove included xpdf fork. * pdf/Makefile.am: Build libpdfdocument.a here. * pdf/GDKSplashOutputDev.cc: * pdf/GDKSplashOutputDev.h: * pdf/GnomeVFSStream.cc: * pdf/GnomeVFSStream.h: * pdf-document.cc: * pdf-document.h: * test-gdk-output-dev.cc * Thumb.cc: * Thumb.h: Pull these files out of pdf/xpdf and adjust to compile against poppler. --- (limited to 'pdf/xpdf/TextOutputDev.cc') diff --git a/pdf/xpdf/TextOutputDev.cc b/pdf/xpdf/TextOutputDev.cc deleted file mode 100644 index a492e7f..0000000 --- a/pdf/xpdf/TextOutputDev.cc +++ /dev/null @@ -1,3529 +0,0 @@ -//======================================================================== -// -// TextOutputDev.cc -// -// Copyright 1997-2003 Glyph & Cog, LLC -// -//======================================================================== - -#include - -#ifdef USE_GCC_PRAGMAS -#pragma implementation -#endif - -#include -#include -#include -#include -#include -#ifdef WIN32 -#include // for O_BINARY -#include // for setmode -#endif -#include "gmem.h" -#include "GString.h" -#include "GList.h" -#include "xpdfconfig.h" -#include "Error.h" -#include "GlobalParams.h" -#include "UnicodeMap.h" -#include "UnicodeTypeTable.h" -#include "GfxState.h" -#include "TextOutputDev.h" - -#ifdef MACOS -// needed for setting type/creator of MacOS files -#include "ICSupport.h" -#endif - -//------------------------------------------------------------------------ -// parameters -//------------------------------------------------------------------------ - -// Each bucket in a text pool includes baselines within a range of -// this many points. -#define textPoolStep 4 - -// Inter-character space width which will cause addChar to start a new -// word. -#define minWordBreakSpace 0.1 - -// Negative inter-character space width, i.e., overlap, which will -// cause addChar to start a new word. -#define minDupBreakOverlap 0.2 - -// Max distance between baselines of two lines within a block, as a -// fraction of the font size. -#define maxLineSpacingDelta 1.5 - -// Max difference in primary font sizes on two lines in the same -// block. Delta1 is used when examining new lines above and below the -// current block; delta2 is used when examining text that overlaps the -// current block; delta3 is used when examining text to the left and -// right of the current block. -#define maxBlockFontSizeDelta1 0.05 -#define maxBlockFontSizeDelta2 0.6 -#define maxBlockFontSizeDelta3 0.2 - -// Max difference in font sizes inside a word. -#define maxWordFontSizeDelta 0.05 - -// Maximum distance between baselines of two words on the same line, -// e.g., distance between subscript or superscript and the primary -// baseline, as a fraction of the font size. -#define maxIntraLineDelta 0.5 - -// Minimum inter-word spacing, as a fraction of the font size. (Only -// used for raw ordering.) -#define minWordSpacing 0.15 - -// Maximum inter-word spacing, as a fraction of the font size. -#define maxWordSpacing 1.5 - -// Maximum horizontal spacing which will allow a word to be pulled -// into a block. -#define minColSpacing1 0.3 - -// Minimum spacing between columns, as a fraction of the font size. -#define minColSpacing2 1.0 - -// Maximum vertical spacing between blocks within a flow, as a -// multiple of the font size. -#define maxBlockSpacing 2.5 - -// Minimum spacing between characters within a word, as a fraction of -// the font size. -#define minCharSpacing -0.2 - -// Maximum spacing between characters within a word, as a fraction of -// the font size, when there is no obvious extra-wide character -// spacing. -#define maxCharSpacing 0.03 - -// When extra-wide character spacing is detected, the inter-character -// space threshold is set to the minimum inter-character space -// multiplied by this constant. -#define maxWideCharSpacingMul 1.3 - -// Max difference in primary,secondary coordinates (as a fraction of -// the font size) allowed for duplicated text (fake boldface, drop -// shadows) which is to be discarded. -#define dupMaxPriDelta 0.1 -#define dupMaxSecDelta 0.2 - -//------------------------------------------------------------------------ -// TextFontInfo -//------------------------------------------------------------------------ - -TextFontInfo::TextFontInfo(GfxState *state) { - gfxFont = state->getFont(); -#if TEXTOUT_WORD_LIST - fontName = (gfxFont && gfxFont->getOrigName()) - ? gfxFont->getOrigName()->copy() - : (GString *)NULL; -#endif -} - -TextFontInfo::~TextFontInfo() { -#if TEXTOUT_WORD_LIST - if (fontName) { - delete fontName; - } -#endif -} - -GBool TextFontInfo::matches(GfxState *state) { - return state->getFont() == gfxFont; -} - -//------------------------------------------------------------------------ -// TextWord -//------------------------------------------------------------------------ - -TextWord::TextWord(GfxState *state, int rotA, double x0, double y0, - int charPosA, TextFontInfo *fontA, double fontSizeA) { - GfxFont *gfxFont; - double x, y, ascent, descent; - - rot = rotA; - charPos = charPosA; - charLen = 0; - font = fontA; - fontSize = fontSizeA; - state->transform(x0, y0, &x, &y); - if ((gfxFont = font->gfxFont)) { - ascent = gfxFont->getAscent() * fontSize; - descent = gfxFont->getDescent() * fontSize; - } else { - // this means that the PDF file draws text without a current font, - // which should never happen - ascent = 0.95 * fontSize; - descent = -0.35 * fontSize; - } - switch (rot) { - case 0: - yMin = y - ascent; - yMax = y - descent; - if (yMin == yMax) { - // this is a sanity check for a case that shouldn't happen -- but - // if it does happen, we want to avoid dividing by zero later - yMin = y; - yMax = y + 1; - } - base = y; - break; - case 1: - xMin = x + descent; - xMax = x + ascent; - if (xMin == xMax) { - // this is a sanity check for a case that shouldn't happen -- but - // if it does happen, we want to avoid dividing by zero later - xMin = x; - xMax = x + 1; - } - base = x; - break; - case 2: - yMin = y + descent; - yMax = y + ascent; - if (yMin == yMax) { - // this is a sanity check for a case that shouldn't happen -- but - // if it does happen, we want to avoid dividing by zero later - yMin = y; - yMax = y + 1; - } - base = y; - break; - case 3: - xMin = x - ascent; - xMax = x - descent; - if (xMin == xMax) { - // this is a sanity check for a case that shouldn't happen -- but - // if it does happen, we want to avoid dividing by zero later - xMin = x; - xMax = x + 1; - } - base = x; - break; - } - text = NULL; - edge = NULL; - len = size = 0; - spaceAfter = gFalse; - next = NULL; - -#if TEXTOUT_WORD_LIST - GfxRGB rgb; - - if ((state->getRender() & 3) == 1) { - state->getStrokeRGB(&rgb); - } else { - state->getFillRGB(&rgb); - } - colorR = rgb.r; - colorG = rgb.g; - colorB = rgb.b; -#endif -} - -TextWord::~TextWord() { - gfree(text); - gfree(edge); -} - -void TextWord::addChar(GfxState *state, double x, double y, - double dx, double dy, Unicode u) { - if (len == size) { - size += 16; - text = (Unicode *)grealloc(text, size * sizeof(Unicode)); - edge = (double *)grealloc(edge, (size + 1) * sizeof(double)); - } - text[len] = u; - switch (rot) { - case 0: - if (len == 0) { - xMin = x; - } - edge[len] = x; - xMax = edge[len+1] = x + dx; - break; - case 1: - if (len == 0) { - yMin = y; - } - edge[len] = y; - yMax = edge[len+1] = y + dy; - break; - case 2: - if (len == 0) { - xMax = x; - } - edge[len] = x; - xMin = edge[len+1] = x + dx; - break; - case 3: - if (len == 0) { - yMax = y; - } - edge[len] = y; - yMin = edge[len+1] = y + dy; - break; - } - ++len; -} - -void TextWord::merge(TextWord *word) { - int i; - - if (word->xMin < xMin) { - xMin = word->xMin; - } - if (word->yMin < yMin) { - yMin = word->yMin; - } - if (word->xMax > xMax) { - xMax = word->xMax; - } - if (word->yMax > yMax) { - yMax = word->yMax; - } - if (len + word->len > size) { - size = len + word->len; - text = (Unicode *)grealloc(text, size * sizeof(Unicode)); - edge = (double *)grealloc(edge, (size + 1) * sizeof(double)); - } - for (i = 0; i < word->len; ++i) { - text[len + i] = word->text[i]; - edge[len + i] = word->edge[i]; - } - edge[len + word->len] = word->edge[word->len]; - len += word->len; - charLen += word->charLen; -} - -inline int TextWord::primaryCmp(TextWord *word) { - double cmp; - - cmp = 0; // make gcc happy - switch (rot) { - case 0: - cmp = xMin - word->xMin; - break; - case 1: - cmp = yMin - word->yMin; - break; - case 2: - cmp = word->xMax - xMax; - break; - case 3: - cmp = word->yMax - yMax; - break; - } - return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; -} - -double TextWord::primaryDelta(TextWord *word) { - double delta; - - delta = 0; // make gcc happy - switch (rot) { - case 0: - delta = word->xMin - xMax; - break; - case 1: - delta = word->yMin - yMax; - break; - case 2: - delta = xMin - word->xMax; - break; - case 3: - delta = yMin - word->yMax; - break; - } - return delta; -} - -int TextWord::cmpYX(const void *p1, const void *p2) { - TextWord *word1 = *(TextWord **)p1; - TextWord *word2 = *(TextWord **)p2; - double cmp; - - cmp = word1->yMin - word2->yMin; - if (cmp == 0) { - cmp = word1->xMin - word2->xMin; - } - return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; -} - -#if TEXTOUT_WORD_LIST - -GString *TextWord::getText() { - GString *s; - UnicodeMap *uMap; - char buf[8]; - int n, i; - - s = new GString(); - if (!(uMap = globalParams->getTextEncoding())) { - return s; - } - for (i = 0; i < len; ++i) { - n = uMap->mapUnicode(text[i], buf, sizeof(buf)); - s->append(buf, n); - } - uMap->decRefCnt(); - return s; -} - -#endif // TEXTOUT_WORD_LIST - -//------------------------------------------------------------------------ -// TextPool -//------------------------------------------------------------------------ - -TextPool::TextPool() { - minBaseIdx = 0; - maxBaseIdx = -1; - pool = NULL; - cursor = NULL; - cursorBaseIdx = -1; -} - -TextPool::~TextPool() { - int baseIdx; - TextWord *word, *word2; - - for (baseIdx = minBaseIdx; baseIdx <= maxBaseIdx; ++baseIdx) { - for (word = pool[baseIdx - minBaseIdx]; word; word = word2) { - word2 = word->next; - delete word; - } - } - gfree(pool); -} - -int TextPool::getBaseIdx(double base) { - int baseIdx; - - baseIdx = (int)(base / textPoolStep); - if (baseIdx < minBaseIdx) { - return minBaseIdx; - } - if (baseIdx > maxBaseIdx) { - return maxBaseIdx; - } - return baseIdx; -} - -void TextPool::addWord(TextWord *word) { - TextWord **newPool; - int wordBaseIdx, newMinBaseIdx, newMaxBaseIdx, baseIdx; - TextWord *w0, *w1; - - // expand the array if needed - wordBaseIdx = (int)(word->base / textPoolStep); - if (minBaseIdx > maxBaseIdx) { - minBaseIdx = wordBaseIdx - 128; - maxBaseIdx = wordBaseIdx + 128; - pool = (TextWord **)gmalloc((maxBaseIdx - minBaseIdx + 1) * - sizeof(TextWord *)); - for (baseIdx = minBaseIdx; baseIdx <= maxBaseIdx; ++baseIdx) { - pool[baseIdx - minBaseIdx] = NULL; - } - } else if (wordBaseIdx < minBaseIdx) { - newMinBaseIdx = wordBaseIdx - 128; - newPool = (TextWord **)gmalloc((maxBaseIdx - newMinBaseIdx + 1) * - sizeof(TextWord *)); - for (baseIdx = newMinBaseIdx; baseIdx < minBaseIdx; ++baseIdx) { - newPool[baseIdx - newMinBaseIdx] = NULL; - } - memcpy(&newPool[minBaseIdx - newMinBaseIdx], pool, - (maxBaseIdx - minBaseIdx + 1) * sizeof(TextWord *)); - gfree(pool); - pool = newPool; - minBaseIdx = newMinBaseIdx; - } else if (wordBaseIdx > maxBaseIdx) { - newMaxBaseIdx = wordBaseIdx + 128; - pool = (TextWord **)grealloc(pool, (newMaxBaseIdx - minBaseIdx + 1) * - sizeof(TextWord *)); - for (baseIdx = maxBaseIdx + 1; baseIdx <= newMaxBaseIdx; ++baseIdx) { - pool[baseIdx - minBaseIdx] = NULL; - } - maxBaseIdx = newMaxBaseIdx; - } - - // insert the new word - if (cursor && wordBaseIdx == cursorBaseIdx && - word->primaryCmp(cursor) > 0) { - w0 = cursor; - w1 = cursor->next; - } else { - w0 = NULL; - w1 = pool[wordBaseIdx - minBaseIdx]; - } - for (; w1 && word->primaryCmp(w1) > 0; w0 = w1, w1 = w1->next) ; - word->next = w1; - if (w0) { - w0->next = word; - } else { - pool[wordBaseIdx - minBaseIdx] = word; - } - cursor = word; - cursorBaseIdx = wordBaseIdx; -} - -//------------------------------------------------------------------------ -// TextLine -//------------------------------------------------------------------------ - -TextLine::TextLine(TextBlock *blkA, int rotA, double baseA) { - blk = blkA; - rot = rotA; - xMin = yMin = 0; - xMax = yMax = -1; - base = baseA; - words = lastWord = NULL; - text = NULL; - edge = NULL; - col = NULL; - len = 0; - convertedLen = 0; - hyphenated = gFalse; - next = NULL; -} - -TextLine::~TextLine() { - TextWord *word; - - while (words) { - word = words; - words = words->next; - delete word; - } - gfree(text); - gfree(edge); - gfree(col); -} - -void TextLine::addWord(TextWord *word) { - if (lastWord) { - lastWord->next = word; - } else { - words = word; - } - lastWord = word; - - if (xMin > xMax) { - xMin = word->xMin; - xMax = word->xMax; - yMin = word->yMin; - yMax = word->yMax; - } else { - if (word->xMin < xMin) { - xMin = word->xMin; - } - if (word->xMax > xMax) { - xMax = word->xMax; - } - if (word->yMin < yMin) { - yMin = word->yMin; - } - if (word->yMax > yMax) { - yMax = word->yMax; - } - } -} - -double TextLine::primaryDelta(TextLine *line) { - double delta; - - delta = 0; // make gcc happy - switch (rot) { - case 0: - delta = line->xMin - xMax; - break; - case 1: - delta = line->yMin - yMax; - break; - case 2: - delta = xMin - line->xMax; - break; - case 3: - delta = yMin - line->yMax; - break; - } - return delta; -} - -int TextLine::primaryCmp(TextLine *line) { - double cmp; - - cmp = 0; // make gcc happy - switch (rot) { - case 0: - cmp = xMin - line->xMin; - break; - case 1: - cmp = yMin - line->yMin; - break; - case 2: - cmp = line->xMax - xMax; - break; - case 3: - cmp = line->yMax - yMax; - break; - } - return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; -} - -int TextLine::secondaryCmp(TextLine *line) { - double cmp; - - cmp = (rot == 0 || rot == 3) ? base - line->base : line->base - base; - return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; -} - -int TextLine::cmpYX(TextLine *line) { - int cmp; - - if ((cmp = secondaryCmp(line))) { - return cmp; - } - return primaryCmp(line); -} - -int TextLine::cmpXY(const void *p1, const void *p2) { - TextLine *line1 = *(TextLine **)p1; - TextLine *line2 = *(TextLine **)p2; - int cmp; - - if ((cmp = line1->primaryCmp(line2))) { - return cmp; - } - return line1->secondaryCmp(line2); -} - -void TextLine::coalesce(UnicodeMap *uMap) { - TextWord *word0, *word1; - double space, delta, minSpace; - GBool isUnicode; - char buf[8]; - int i, j; - - if (words->next) { - - // compute the inter-word space threshold - if (words->len > 1 || words->next->len > 1) { - minSpace = 0; - } else { - minSpace = words->primaryDelta(words->next); - for (word0 = words->next, word1 = word0->next; - word1 && minSpace > 0; - word0 = word1, word1 = word0->next) { - if (word1->len > 1) { - minSpace = 0; - } - delta = word0->primaryDelta(word1); - if (delta < minSpace) { - minSpace = delta; - } - } - } - if (minSpace <= 0) { - space = maxCharSpacing * words->fontSize; - } else { - space = maxWideCharSpacingMul * minSpace; - } - - // merge words - word0 = words; - word1 = words->next; - while (word1) { - if (word0->primaryDelta(word1) >= space) { - word0->spaceAfter = gTrue; - word0 = word1; - word1 = word1->next; - } else if (word0->font == word1->font && - fabs(word0->fontSize - word1->fontSize) < - maxWordFontSizeDelta * words->fontSize && - word1->charPos == word0->charPos + word0->charLen) { - word0->merge(word1); - word0->next = word1->next; - delete word1; - word1 = word0->next; - } else { - word0 = word1; - word1 = word1->next; - } - } - } - - // build the line text - isUnicode = uMap ? uMap->isUnicode() : gFalse; - len = 0; - for (word1 = words; word1; word1 = word1->next) { - len += word1->len; - if (word1->spaceAfter) { - ++len; - } - } - text = (Unicode *)gmalloc(len * sizeof(Unicode)); - edge = (double *)gmalloc((len + 1) * sizeof(double)); - i = 0; - for (word1 = words; word1; word1 = word1->next) { - for (j = 0; j < word1->len; ++j) { - text[i] = word1->text[j]; - edge[i] = word1->edge[j]; - ++i; - } - edge[i] = word1->edge[word1->len]; - if (word1->spaceAfter) { - text[i] = (Unicode)0x0020; - ++i; - } - } - - // compute convertedLen and set up the col array - col = (int *)gmalloc((len + 1) * sizeof(int)); - convertedLen = 0; - for (i = 0; i < len; ++i) { - col[i] = convertedLen; - if (isUnicode) { - ++convertedLen; - } else if (uMap) { - convertedLen += uMap->mapUnicode(text[i], buf, sizeof(buf)); - } - } - col[len] = convertedLen; - - // check for hyphen at end of line - //~ need to check for other chars used as hyphens - hyphenated = text[len - 1] == (Unicode)'-'; -} - -//------------------------------------------------------------------------ -// TextLineFrag -//------------------------------------------------------------------------ - -class TextLineFrag { -public: - - TextLine *line; // the line object - int start, len; // offset and length of this fragment - // (in Unicode chars) - double xMin, xMax; // bounding box coordinates - double yMin, yMax; - double base; // baseline virtual coordinate - int col; // first column - - void init(TextLine *lineA, int startA, int lenA); - void computeCoords(GBool oneRot); - - static int cmpYXPrimaryRot(const void *p1, const void *p2); - static int cmpYXLineRot(const void *p1, const void *p2); - static int cmpXYLineRot(const void *p1, const void *p2); -}; - -void TextLineFrag::init(TextLine *lineA, int startA, int lenA) { - line = lineA; - start = startA; - len = lenA; - col = line->col[start]; -} - -void TextLineFrag::computeCoords(GBool oneRot) { - TextBlock *blk; - double d0, d1, d2, d3, d4; - - if (oneRot) { - - switch (line->rot) { - case 0: - xMin = line->edge[start]; - xMax = line->edge[start + len]; - yMin = line->yMin; - yMax = line->yMax; - break; - case 1: - xMin = line->xMin; - xMax = line->xMax; - yMin = line->edge[start]; - yMax = line->edge[start + len]; - break; - case 2: - xMin = line->edge[start + len]; - xMax = line->edge[start]; - yMin = line->yMin; - yMax = line->yMax; - break; - case 3: - xMin = line->xMin; - xMax = line->xMax; - yMin = line->edge[start + len]; - yMax = line->edge[start]; - break; - } - base = line->base; - - } else { - - if (line->rot == 0 && line->blk->page->primaryRot == 0) { - - xMin = line->edge[start]; - xMax = line->edge[start + len]; - yMin = line->yMin; - yMax = line->yMax; - base = line->base; - - } else { - - blk = line->blk; - d0 = line->edge[start]; - d1 = line->edge[start + len]; - d2 = d3 = d4 = 0; // make gcc happy - - switch (line->rot) { - case 0: - d2 = line->yMin; - d3 = line->yMax; - d4 = line->base; - d0 = (d0 - blk->xMin) / (blk->xMax - blk->xMin); - d1 = (d1 - blk->xMin) / (blk->xMax - blk->xMin); - d2 = (d2 - blk->yMin) / (blk->yMax - blk->yMin); - d3 = (d3 - blk->yMin) / (blk->yMax - blk->yMin); - d4 = (d4 - blk->yMin) / (blk->yMax - blk->yMin); - break; - case 1: - d2 = line->xMax; - d3 = line->xMin; - d4 = line->base; - d0 = (d0 - blk->yMin) / (blk->yMax - blk->yMin); - d1 = (d1 - blk->yMin) / (blk->yMax - blk->yMin); - d2 = (blk->xMax - d2) / (blk->xMax - blk->xMin); - d3 = (blk->xMax - d3) / (blk->xMax - blk->xMin); - d4 = (blk->xMax - d4) / (blk->xMax - blk->xMin); - break; - case 2: - d2 = line->yMax; - d3 = line->yMin; - d4 = line->base; - d0 = (blk->xMax - d0) / (blk->xMax - blk->xMin); - d1 = (blk->xMax - d1) / (blk->xMax - blk->xMin); - d2 = (blk->yMax - d2) / (blk->yMax - blk->yMin); - d3 = (blk->yMax - d3) / (blk->yMax - blk->yMin); - d4 = (blk->yMax - d4) / (blk->yMax - blk->yMin); - break; - case 3: - d2 = line->xMin; - d3 = line->xMax; - d4 = line->base; - d0 = (blk->yMax - d0) / (blk->yMax - blk->yMin); - d1 = (blk->yMax - d1) / (blk->yMax - blk->yMin); - d2 = (d2 - blk->xMin) / (blk->xMax - blk->xMin); - d3 = (d3 - blk->xMin) / (blk->xMax - blk->xMin); - d4 = (d4 - blk->xMin) / (blk->xMax - blk->xMin); - break; - } - - switch (line->blk->page->primaryRot) { - case 0: - xMin = blk->xMin + d0 * (blk->xMax - blk->xMin); - xMax = blk->xMin + d1 * (blk->xMax - blk->xMin); - yMin = blk->yMin + d2 * (blk->yMax - blk->yMin); - yMax = blk->yMin + d3 * (blk->yMax - blk->yMin); - base = blk->yMin + base * (blk->yMax - blk->yMin); - break; - case 1: - xMin = blk->xMax - d3 * (blk->xMax - blk->xMin); - xMax = blk->xMax - d2 * (blk->xMax - blk->xMin); - yMin = blk->yMin + d0 * (blk->yMax - blk->yMin); - yMax = blk->yMin + d1 * (blk->yMax - blk->yMin); - base = blk->xMax - d4 * (blk->xMax - blk->xMin); - break; - case 2: - xMin = blk->xMax - d1 * (blk->xMax - blk->xMin); - xMax = blk->xMax - d0 * (blk->xMax - blk->xMin); - yMin = blk->yMax - d3 * (blk->yMax - blk->yMin); - yMax = blk->yMax - d2 * (blk->yMax - blk->yMin); - base = blk->yMax - d4 * (blk->yMax - blk->yMin); - break; - case 3: - xMin = blk->xMin + d2 * (blk->xMax - blk->xMin); - xMax = blk->xMin + d3 * (blk->xMax - blk->xMin); - yMin = blk->yMax - d1 * (blk->yMax - blk->yMin); - yMax = blk->yMax - d0 * (blk->yMax - blk->yMin); - base = blk->xMin + d4 * (blk->xMax - blk->xMin); - break; - } - - } - } -} - -int TextLineFrag::cmpYXPrimaryRot(const void *p1, const void *p2) { - TextLineFrag *frag1 = (TextLineFrag *)p1; - TextLineFrag *frag2 = (TextLineFrag *)p2; - double cmp; - - cmp = 0; // make gcc happy - switch (frag1->line->blk->page->primaryRot) { - case 0: - if ((cmp = frag1->yMin - frag2->yMin) == 0) { - cmp = frag1->xMin - frag2->xMin; - } - break; - case 1: - if ((cmp = frag2->xMax - frag1->xMax) == 0) { - cmp = frag1->yMin - frag2->yMin; - } - break; - case 2: - if ((cmp = frag2->yMin - frag1->yMin) == 0) { - cmp = frag2->xMax - frag1->xMax; - } - break; - case 3: - if ((cmp = frag1->xMax - frag2->xMax) == 0) { - cmp = frag2->yMax - frag1->yMax; - } - break; - } - return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; -} - -int TextLineFrag::cmpYXLineRot(const void *p1, const void *p2) { - TextLineFrag *frag1 = (TextLineFrag *)p1; - TextLineFrag *frag2 = (TextLineFrag *)p2; - double cmp; - - cmp = 0; // make gcc happy - switch (frag1->line->rot) { - case 0: - if ((cmp = frag1->yMin - frag2->yMin) == 0) { - cmp = frag1->xMin - frag2->xMin; - } - break; - case 1: - if ((cmp = frag2->xMax - frag1->xMax) == 0) { - cmp = frag1->yMin - frag2->yMin; - } - break; - case 2: - if ((cmp = frag2->yMin - frag1->yMin) == 0) { - cmp = frag2->xMax - frag1->xMax; - } - break; - case 3: - if ((cmp = frag1->xMax - frag2->xMax) == 0) { - cmp = frag2->yMax - frag1->yMax; - } - break; - } - return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; -} - -int TextLineFrag::cmpXYLineRot(const void *p1, const void *p2) { - TextLineFrag *frag1 = (TextLineFrag *)p1; - TextLineFrag *frag2 = (TextLineFrag *)p2; - double cmp; - - cmp = 0; // make gcc happy - switch (frag1->line->rot) { - case 0: - if ((cmp = frag1->xMin - frag2->xMin) == 0) { - cmp = frag1->yMin - frag2->yMin; - } - break; - case 1: - if ((cmp = frag1->yMin - frag2->yMin) == 0) { - cmp = frag2->xMax - frag1->xMax; - } - break; - case 2: - if ((cmp = frag2->xMax - frag1->xMax) == 0) { - cmp = frag2->yMin - frag1->yMin; - } - break; - case 3: - if ((cmp = frag2->yMax - frag1->yMax) == 0) { - cmp = frag1->xMax - frag2->xMax; - } - break; - } - return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; -} - -//------------------------------------------------------------------------ -// TextBlock -//------------------------------------------------------------------------ - -TextBlock::TextBlock(TextPage *pageA, int rotA) { - page = pageA; - rot = rotA; - xMin = yMin = 0; - xMax = yMax = -1; - priMin = 0; - priMax = page->pageWidth; - pool = new TextPool(); - lines = NULL; - curLine = NULL; - next = NULL; - stackNext = NULL; -} - -TextBlock::~TextBlock() { - TextLine *line; - - delete pool; - while (lines) { - line = lines; - lines = lines->next; - delete line; - } -} - -void TextBlock::addWord(TextWord *word) { - pool->addWord(word); - if (xMin > xMax) { - xMin = word->xMin; - xMax = word->xMax; - yMin = word->yMin; - yMax = word->yMax; - } else { - if (word->xMin < xMin) { - xMin = word->xMin; - } - if (word->xMax > xMax) { - xMax = word->xMax; - } - if (word->yMin < yMin) { - yMin = word->yMin; - } - if (word->yMax > yMax) { - yMax = word->yMax; - } - } -} - -void TextBlock::coalesce(UnicodeMap *uMap) { - TextWord *word0, *word1, *word2, *bestWord0, *bestWord1, *lastWord; - TextLine *line, *line0, *line1; - int poolMinBaseIdx, startBaseIdx, minBaseIdx, maxBaseIdx; - int baseIdx, bestWordBaseIdx, idx0, idx1; - double minBase, maxBase; - double fontSize, delta, priDelta, secDelta; - TextLine **lineArray; - GBool found; - int col1, col2; - int i, j, k; - - // discard duplicated text (fake boldface, drop shadows) - for (idx0 = pool->minBaseIdx; idx0 <= pool->maxBaseIdx; ++idx0) { - word0 = pool->getPool(idx0); - while (word0) { - priDelta = dupMaxPriDelta * word0->fontSize; - secDelta = dupMaxSecDelta * word0->fontSize; - if (rot == 0 || rot == 3) { - maxBaseIdx = pool->getBaseIdx(word0->base + secDelta); - } else { - maxBaseIdx = pool->getBaseIdx(word0->base - secDelta); - } - found = gFalse; - word1 = word2 = NULL; // make gcc happy - for (idx1 = idx0; idx1 <= maxBaseIdx; ++idx1) { - if (idx1 == idx0) { - word1 = word0; - word2 = word0->next; - } else { - word1 = NULL; - word2 = pool->getPool(idx1); - } - for (; word2; word1 = word2, word2 = word2->next) { - if (word2->len == word0->len && - !memcmp(word2->text, word0->text, - word0->len * sizeof(Unicode))) { - switch (rot) { - case 0: - case 2: - found = fabs(word0->xMin - word2->xMin) < priDelta && - fabs(word0->xMax - word2->xMax) < priDelta && - fabs(word0->yMin - word2->yMin) < secDelta && - fabs(word0->yMax - word2->yMax) < secDelta; - break; - case 1: - case 3: - found = fabs(word0->xMin - word2->xMin) < secDelta && - fabs(word0->xMax - word2->xMax) < secDelta && - fabs(word0->yMin - word2->yMin) < priDelta && - fabs(word0->yMax - word2->yMax) < priDelta; - break; - } - } - if (found) { - break; - } - } - if (found) { - break; - } - } - if (found) { - if (word1) { - word1->next = word2->next; - } else { - pool->setPool(idx1, word2->next); - } - delete word2; - } else { - word0 = word0->next; - } - } - } - - // build the lines - curLine = NULL; - poolMinBaseIdx = pool->minBaseIdx; - charCount = 0; - nLines = 0; - while (1) { - - // find the first non-empty line in the pool - for (; - poolMinBaseIdx <= pool->maxBaseIdx && !pool->getPool(poolMinBaseIdx); - ++poolMinBaseIdx) ; - if (poolMinBaseIdx > pool->maxBaseIdx) { - break; - } - - // look for the left-most word in the first four lines of the - // pool -- this avoids starting with a superscript word - startBaseIdx = poolMinBaseIdx; - for (baseIdx = poolMinBaseIdx + 1; - baseIdx < poolMinBaseIdx + 4 && baseIdx <= pool->maxBaseIdx; - ++baseIdx) { - if (!pool->getPool(baseIdx)) { - continue; - } - if (pool->getPool(baseIdx)->primaryCmp(pool->getPool(startBaseIdx)) - < 0) { - startBaseIdx = baseIdx; - } - } - - // create a new line - word0 = pool->getPool(startBaseIdx); - pool->setPool(startBaseIdx, word0->next); - word0->next = NULL; - line = new TextLine(this, word0->rot, word0->base); - line->addWord(word0); - lastWord = word0; - - // compute the search range - fontSize = word0->fontSize; - minBase = word0->base - maxIntraLineDelta * fontSize; - maxBase = word0->base + maxIntraLineDelta * fontSize; - minBaseIdx = pool->getBaseIdx(minBase); - maxBaseIdx = pool->getBaseIdx(maxBase); - - // find the rest of the words in this line - while (1) { - - // find the left-most word whose baseline is in the range for - // this line - bestWordBaseIdx = 0; - bestWord0 = bestWord1 = NULL; - for (baseIdx = minBaseIdx; baseIdx <= maxBaseIdx; ++baseIdx) { - for (word0 = NULL, word1 = pool->getPool(baseIdx); - word1; - word0 = word1, word1 = word1->next) { - if (word1->base >= minBase && - word1->base <= maxBase && - (delta = lastWord->primaryDelta(word1)) >= - minCharSpacing * fontSize) { - if (delta < maxWordSpacing * fontSize && - (!bestWord1 || word1->primaryCmp(bestWord1) < 0)) { - bestWordBaseIdx = baseIdx; - bestWord0 = word0; - bestWord1 = word1; - } - break; - } - } - } - if (!bestWord1) { - break; - } - - // remove it from the pool, and add it to the line - if (bestWord0) { - bestWord0->next = bestWord1->next; - } else { - pool->setPool(bestWordBaseIdx, bestWord1->next); - } - bestWord1->next = NULL; - line->addWord(bestWord1); - lastWord = bestWord1; - } - - // add the line - if (curLine && line->cmpYX(curLine) > 0) { - line0 = curLine; - line1 = curLine->next; - } else { - line0 = NULL; - line1 = lines; - } - for (; - line1 && line->cmpYX(line1) > 0; - line0 = line1, line1 = line1->next) ; - if (line0) { - line0->next = line; - } else { - lines = line; - } - line->next = line1; - curLine = line; - line->coalesce(uMap); - charCount += line->len; - ++nLines; - } - - // sort lines into xy order for column assignment - lineArray = (TextLine **)gmalloc(nLines * sizeof(TextLine *)); - for (line = lines, i = 0; line; line = line->next, ++i) { - lineArray[i] = line; - } - qsort(lineArray, nLines, sizeof(TextLine *), &TextLine::cmpXY); - - // column assignment - nColumns = 0; - for (i = 0; i < nLines; ++i) { - line0 = lineArray[i]; - col1 = 0; - for (j = 0; j < i; ++j) { - line1 = lineArray[j]; - if (line1->primaryDelta(line0) >= 0) { - col2 = line1->col[line1->len] + 1; - } else { - k = 0; // make gcc happy - switch (rot) { - case 0: - for (k = 0; - k < line1->len && - line0->xMin >= 0.5 * (line1->edge[k] + line1->edge[k+1]); - ++k) ; - break; - case 1: - for (k = 0; - k < line1->len && - line0->yMin >= 0.5 * (line1->edge[k] + line1->edge[k+1]); - ++k) ; - break; - case 2: - for (k = 0; - k < line1->len && - line0->xMax <= 0.5 * (line1->edge[k] + line1->edge[k+1]); - ++k) ; - break; - case 3: - for (k = 0; - k < line1->len && - line0->yMax <= 0.5 * (line1->edge[k] + line1->edge[k+1]); - ++k) ; - break; - } - col2 = line1->col[k]; - } - if (col2 > col1) { - col1 = col2; - } - } - for (k = 0; k <= line0->len; ++k) { - line0->col[k] += col1; - } - if (line0->col[line0->len] > nColumns) { - nColumns = line0->col[line0->len]; - } - } - gfree(lineArray); -} - -void TextBlock::updatePriMinMax(TextBlock *blk) { - double newPriMin, newPriMax; - GBool gotPriMin, gotPriMax; - - gotPriMin = gotPriMax = gFalse; - newPriMin = newPriMax = 0; // make gcc happy - switch (page->primaryRot) { - case 0: - case 2: - if (blk->yMin < yMax && blk->yMax > yMin) { - if (blk->xMin < xMin) { - newPriMin = blk->xMax; - gotPriMin = gTrue; - } - if (blk->xMax > xMax) { - newPriMax = blk->xMin; - gotPriMax = gTrue; - } - } - break; - case 1: - case 3: - if (blk->xMin < xMax && blk->xMax > xMin) { - if (blk->yMin < yMin) { - newPriMin = blk->yMax; - gotPriMin = gTrue; - } - if (blk->yMax > yMax) { - newPriMax = blk->yMin; - gotPriMax = gTrue; - } - } - break; - } - if (gotPriMin) { - if (newPriMin > xMin) { - newPriMin = xMin; - } - if (newPriMin > priMin) { - priMin = newPriMin; - } - } - if (gotPriMax) { - if (newPriMax < xMax) { - newPriMax = xMax; - } - if (newPriMax < priMax) { - priMax = newPriMax; - } - } -} - -int TextBlock::cmpXYPrimaryRot(const void *p1, const void *p2) { - TextBlock *blk1 = *(TextBlock **)p1; - TextBlock *blk2 = *(TextBlock **)p2; - double cmp; - - cmp = 0; // make gcc happy - switch (blk1->page->primaryRot) { - case 0: - if ((cmp = blk1->xMin - blk2->xMin) == 0) { - cmp = blk1->yMin - blk2->yMin; - } - break; - case 1: - if ((cmp = blk1->yMin - blk2->yMin) == 0) { - cmp = blk2->xMax - blk1->xMax; - } - break; - case 2: - if ((cmp = blk2->xMax - blk1->xMax) == 0) { - cmp = blk2->yMin - blk1->yMin; - } - break; - case 3: - if ((cmp = blk2->yMax - blk1->yMax) == 0) { - cmp = blk1->xMax - blk2->xMax; - } - break; - } - return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; -} - -int TextBlock::cmpYXPrimaryRot(const void *p1, const void *p2) { - TextBlock *blk1 = *(TextBlock **)p1; - TextBlock *blk2 = *(TextBlock **)p2; - double cmp; - - cmp = 0; // make gcc happy - switch (blk1->page->primaryRot) { - case 0: - if ((cmp = blk1->yMin - blk2->yMin) == 0) { - cmp = blk1->xMin - blk2->xMin; - } - break; - case 1: - if ((cmp = blk2->xMax - blk1->xMax) == 0) { - cmp = blk1->yMin - blk2->yMin; - } - break; - case 2: - if ((cmp = blk2->yMin - blk1->yMin) == 0) { - cmp = blk2->xMax - blk1->xMax; - } - break; - case 3: - if ((cmp = blk1->xMax - blk2->xMax) == 0) { - cmp = blk2->yMax - blk1->yMax; - } - break; - } - return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; -} - -int TextBlock::primaryCmp(TextBlock *blk) { - double cmp; - - cmp = 0; // make gcc happy - switch (rot) { - case 0: - cmp = xMin - blk->xMin; - break; - case 1: - cmp = yMin - blk->yMin; - break; - case 2: - cmp = blk->xMax - xMax; - break; - case 3: - cmp = blk->yMax - yMax; - break; - } - return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; -} - -double TextBlock::secondaryDelta(TextBlock *blk) { - double delta; - - delta = 0; // make gcc happy - switch (rot) { - case 0: - delta = blk->yMin - yMax; - break; - case 1: - delta = xMin - blk->xMax; - break; - case 2: - delta = yMin - blk->yMax; - break; - case 3: - delta = blk->xMin - xMax; - break; - } - return delta; -} - -GBool TextBlock::isBelow(TextBlock *blk) { - GBool below; - - below = gFalse; // make gcc happy - switch (page->primaryRot) { - case 0: - below = xMin >= blk->priMin && xMax <= blk->priMax && - yMin > blk->yMin; - break; - case 1: - below = yMin >= blk->priMin && yMax <= blk->priMax && - xMax < blk->xMax; - break; - case 2: - below = xMin >= blk->priMin && xMax <= blk->priMax && - yMax < blk->yMax; - break; - case 3: - below = yMin >= blk->priMin && yMax <= blk->priMax && - xMin > blk->xMin; - break; - } - - return below; -} - -//------------------------------------------------------------------------ -// TextFlow -//------------------------------------------------------------------------ - -TextFlow::TextFlow(TextPage *pageA, TextBlock *blk) { - page = pageA; - xMin = blk->xMin; - xMax = blk->xMax; - yMin = blk->yMin; - yMax = blk->yMax; - priMin = blk->priMin; - priMax = blk->priMax; - blocks = lastBlk = blk; - next = NULL; -} - -TextFlow::~TextFlow() { - TextBlock *blk; - - while (blocks) { - blk = blocks; - blocks = blocks->next; - delete blk; - } -} - -void TextFlow::addBlock(TextBlock *blk) { - if (lastBlk) { - lastBlk->next = blk; - } else { - blocks = blk; - } - lastBlk = blk; - if (blk->xMin < xMin) { - xMin = blk->xMin; - } - if (blk->xMax > xMax) { - xMax = blk->xMax; - } - if (blk->yMin < yMin) { - yMin = blk->yMin; - } - if (blk->yMax > yMax) { - yMax = blk->yMax; - } -} - -GBool TextFlow::blockFits(TextBlock *blk, TextBlock *prevBlk) { - GBool fits; - - // lower blocks must use smaller fonts - if (blk->lines->words->fontSize > lastBlk->lines->words->fontSize) { - return gFalse; - } - - fits = gFalse; // make gcc happy - switch (page->primaryRot) { - case 0: - fits = blk->xMin >= priMin && blk->xMax <= priMax; - break; - case 1: - fits = blk->yMin >= priMin && blk->yMax <= priMax; - break; - case 2: - fits = blk->xMin >= priMin && blk->xMax <= priMax; - break; - case 3: - fits = blk->yMin >= priMin && blk->yMax <= priMax; - break; - } - return fits; -} - -#if TEXTOUT_WORD_LIST - -//------------------------------------------------------------------------ -// TextWordList -//------------------------------------------------------------------------ - -TextWordList::TextWordList(TextPage *text, GBool physLayout) { - TextFlow *flow; - TextBlock *blk; - TextLine *line; - TextWord *word; - TextWord **wordArray; - int nWords, i; - - words = new GList(); - - if (text->rawOrder) { - for (word = text->rawWords; word; word = word->next) { - words->append(word); - } - - } else if (physLayout) { - // this is inefficient, but it's also the least useful of these - // three cases - nWords = 0; - for (flow = text->flows; flow; flow = flow->next) { - for (blk = flow->blocks; blk; blk = blk->next) { - for (line = blk->lines; line; line = line->next) { - for (word = line->words; word; word = word->next) { - ++nWords; - } - } - } - } - wordArray = (TextWord **)gmalloc(nWords * sizeof(TextWord *)); - i = 0; - for (flow = text->flows; flow; flow = flow->next) { - for (blk = flow->blocks; blk; blk = blk->next) { - for (line = blk->lines; line; line = line->next) { - for (word = line->words; word; word = word->next) { - wordArray[i++] = word; - } - } - } - } - qsort(wordArray, nWords, sizeof(TextWord *), &TextWord::cmpYX); - for (i = 0; i < nWords; ++i) { - words->append(wordArray[i]); - } - gfree(wordArray); - - } else { - for (flow = text->flows; flow; flow = flow->next) { - for (blk = flow->blocks; blk; blk = blk->next) { - for (line = blk->lines; line; line = line->next) { - for (word = line->words; word; word = word->next) { - words->append(word); - } - } - } - } - } -} - -TextWordList::~TextWordList() { - delete words; -} - -int TextWordList::getLength() { - return words->getLength(); -} - -TextWord *TextWordList::get(int idx) { - if (idx < 0 || idx >= words->getLength()) { - return NULL; - } - return (TextWord *)words->get(idx); -} - -#endif // TEXTOUT_WORD_LIST - -//------------------------------------------------------------------------ -// TextPage -//------------------------------------------------------------------------ - -TextPage::TextPage(GBool rawOrderA) { - int rot; - - rawOrder = rawOrderA; - curWord = NULL; - charPos = 0; - curFont = NULL; - curFontSize = 0; - nest = 0; - nTinyChars = 0; - lastCharOverlap = gFalse; - if (!rawOrder) { - for (rot = 0; rot < 4; ++rot) { - pools[rot] = new TextPool(); - } - } - flows = NULL; - blocks = NULL; - rawWords = NULL; - rawLastWord = NULL; - fonts = new GList(); - lastFindXMin = lastFindYMin = 0; - haveLastFind = gFalse; -} - -TextPage::~TextPage() { - int rot; - - clear(); - if (!rawOrder) { - for (rot = 0; rot < 4; ++rot) { - delete pools[rot]; - } - } - delete fonts; -} - -void TextPage::startPage(GfxState *state) { - clear(); - if (state) { - pageWidth = state->getPageWidth(); - pageHeight = state->getPageHeight(); - } else { - pageWidth = pageHeight = 0; - } -} - -void TextPage::endPage() { - if (curWord) { - endWord(); - } -} - -void TextPage::clear() { - int rot; - TextFlow *flow; - TextWord *word; - - if (curWord) { - delete curWord; - curWord = NULL; - } - if (rawOrder) { - while (rawWords) { - word = rawWords; - rawWords = rawWords->next; - delete word; - } - } else { - for (rot = 0; rot < 4; ++rot) { - delete pools[rot]; - } - while (flows) { - flow = flows; - flows = flows->next; - delete flow; - } - gfree(blocks); - } - deleteGList(fonts, TextFontInfo); - - curWord = NULL; - charPos = 0; - curFont = NULL; - curFontSize = 0; - nest = 0; - nTinyChars = 0; - if (!rawOrder) { - for (rot = 0; rot < 4; ++rot) { - pools[rot] = new TextPool(); - } - } - flows = NULL; - blocks = NULL; - rawWords = NULL; - rawLastWord = NULL; - fonts = new GList(); -} - -void TextPage::updateFont(GfxState *state) { - GfxFont *gfxFont; - double *fm; - char *name; - int code, mCode, letterCode, anyCode; - double w; - int i; - - // get the font info object - curFont = NULL; - for (i = 0; i < fonts->getLength(); ++i) { - curFont = (TextFontInfo *)fonts->get(i); - if (curFont->matches(state)) { - break; - } - curFont = NULL; - } - if (!curFont) { - curFont = new TextFontInfo(state); - fonts->append(curFont); - } - - // adjust the font size - gfxFont = state->getFont(); - curFontSize = state->getTransformedFontSize(); - if (gfxFont && gfxFont->getType() == fontType3) { - // This is a hack which makes it possible to deal with some Type 3 - // fonts. The problem is that it's impossible to know what the - // base coordinate system used in the font is without actually - // rendering the font. This code tries to guess by looking at the - // width of the character 'm' (which breaks if the font is a - // subset that doesn't contain 'm'). - mCode = letterCode = anyCode = -1; - for (code = 0; code < 256; ++code) { - name = ((Gfx8BitFont *)gfxFont)->getCharName(code); - if (name && name[0] == 'm' && name[1] == '\0') { - mCode = code; - } - if (letterCode < 0 && name && name[1] == '\0' && - ((name[0] >= 'A' && name[0] <= 'Z') || - (name[0] >= 'a' && name[0] <= 'z'))) { - letterCode = code; - } - if (anyCode < 0 && name && - ((Gfx8BitFont *)gfxFont)->getWidth(code) > 0) { - anyCode = code; - } - } - if (mCode >= 0 && - (w = ((Gfx8BitFont *)gfxFont)->getWidth(mCode)) > 0) { - // 0.6 is a generic average 'm' width -- yes, this is a hack - curFontSize *= w / 0.6; - } else if (letterCode >= 0 && - (w = ((Gfx8BitFont *)gfxFont)->getWidth(letterCode)) > 0) { - // even more of a hack: 0.5 is a generic letter width - curFontSize *= w / 0.5; - } else if (anyCode >= 0 && - (w = ((Gfx8BitFont *)gfxFont)->getWidth(anyCode)) > 0) { - // better than nothing: 0.5 is a generic character width - curFontSize *= w / 0.5; - } - fm = gfxFont->getFontMatrix(); - if (fm[0] != 0) { - curFontSize *= fabs(fm[3] / fm[0]); - } - } -} - -void TextPage::beginWord(GfxState *state, double x0, double y0) { - double *txtm, *ctm, *fontm; - double m[4], m2[4]; - int rot; - - // This check is needed because Type 3 characters can contain - // text-drawing operations (when TextPage is being used via - // {X,Win}SplashOutputDev rather than TextOutputDev). - if (curWord) { - ++nest; - return; - } - - // compute the rotation - txtm = state->getTextMat(); - ctm = state->getCTM(); - m[0] = txtm[0] * ctm[0] + txtm[1] * ctm[2]; - m[1] = txtm[0] * ctm[1] + txtm[1] * ctm[3]; - m[2] = txtm[2] * ctm[0] + txtm[3] * ctm[2]; - m[3] = txtm[2] * ctm[1] + txtm[3] * ctm[3]; - if (state->getFont()->getType() == fontType3) { - fontm = state->getFont()->getFontMatrix(); - m2[0] = fontm[0] * m[0] + fontm[1] * m[2]; - m2[1] = fontm[0] * m[1] + fontm[1] * m[3]; - m2[2] = fontm[2] * m[0] + fontm[3] * m[2]; - m2[3] = fontm[2] * m[1] + fontm[3] * m[3]; - m[0] = m2[0]; - m[1] = m2[1]; - m[2] = m2[2]; - m[3] = m2[3]; - } - if (fabs(m[0] * m[3]) > fabs(m[1] * m[2])) { - rot = (m[3] < 0) ? 0 : 2; - } else { - rot = (m[2] > 0) ? 1 : 3; - } - - curWord = new TextWord(state, rot, x0, y0, charPos, curFont, curFontSize); -} - -void TextPage::addChar(GfxState *state, double x, double y, - double dx, double dy, - CharCode c, Unicode *u, int uLen) { - double x1, y1, w1, h1, dx2, dy2, base, sp; - int i; - - // if the previous char was a space, addChar will have called - // endWord, so we need to start a new word - if (!curWord) { - beginWord(state, x, y); - } - - // throw away chars that aren't inside the page bounds - state->transform(x, y, &x1, &y1); - if (x1 < 0 || x1 > pageWidth || - y1 < 0 || y1 > pageHeight) { - return; - } - - // subtract char and word spacing from the dx,dy values - sp = state->getCharSpace(); - if (c == (CharCode)0x20) { - sp += state->getWordSpace(); - } - state->textTransformDelta(sp * state->getHorizScaling(), 0, &dx2, &dy2); - dx -= dx2; - dy -= dy2; - state->transformDelta(dx, dy, &w1, &h1); - - // check the tiny chars limit - if (!globalParams->getTextKeepTinyChars() && - fabs(w1) < 3 && fabs(h1) < 3) { - if (++nTinyChars > 50000) { - return; - } - } - - // break words at space character - if (uLen == 1 && u[0] == (Unicode)0x20) { - ++curWord->charLen; - ++charPos; - endWord(); - return; - } - - // start a new word if: - // (1) this character's baseline doesn't match the current word's - // baseline, or - // (2) there is space between the end of the current word and this - // character, or - // (3) this character overlaps the previous one (duplicated text), or - // (4) the previous character was an overlap (we want each duplicated - // characters to be in a word by itself) - base = sp = 0; // make gcc happy - if (curWord->len > 0) { - switch (curWord->rot) { - case 0: - base = y1; - sp = x1 - curWord->xMax; - break; - case 1: - base = x1; - sp = y1 - curWord->yMax; - break; - case 2: - base = y1; - sp = curWord->xMin - x1; - break; - case 3: - base = x1; - sp = curWord->yMin - y1; - break; - } - if (fabs(base - curWord->base) > 0.5 || - sp > minWordBreakSpace * curWord->fontSize || - sp < -minDupBreakOverlap * curWord->fontSize || - lastCharOverlap) { - lastCharOverlap = gTrue; - endWord(); - beginWord(state, x, y); - } else { - lastCharOverlap = gFalse; - } - } else { - lastCharOverlap = gFalse; - } - - // page rotation and/or transform matrices can cause text to be - // drawn in reverse order -- in this case, swap the begin/end - // coordinates and break text into individual chars - if ((curWord->rot == 0 && w1 < 0) || - (curWord->rot == 1 && h1 < 0) || - (curWord->rot == 2 && w1 > 0) || - (curWord->rot == 3 && h1 > 0)) { - endWord(); - beginWord(state, x + dx, y + dy); - x1 += w1; - y1 += h1; - w1 = -w1; - h1 = -h1; - } - - // add the characters to the current word - if (uLen != 0) { - w1 /= uLen; - h1 /= uLen; - } - for (i = 0; i < uLen; ++i) { - curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u[i]); - } - ++curWord->charLen; - ++charPos; -} - -void TextPage::endWord() { - // This check is needed because Type 3 characters can contain - // text-drawing operations (when TextPage is being used via - // {X,Win}SplashOutputDev rather than TextOutputDev). - if (nest > 0) { - --nest; - return; - } - - if (curWord) { - addWord(curWord); - curWord = NULL; - } -} - -void TextPage::addWord(TextWord *word) { - // throw away zero-length words -- they don't have valid xMin/xMax - // values, and they're useless anyway - if (word->len == 0) { - delete word; - return; - } - - if (rawOrder) { - if (rawLastWord) { - rawLastWord->next = word; - } else { - rawWords = word; - } - rawLastWord = word; - } else { - pools[word->rot]->addWord(word); - } -} - -void TextPage::coalesce(GBool physLayout) { - UnicodeMap *uMap; - TextPool *pool; - TextWord *word0, *word1, *word2; - TextLine *line; - TextBlock *blkList, *blkStack, *blk, *lastBlk, *blk0, *blk1; - TextBlock **blkArray; - TextFlow *flow, *lastFlow; - int rot, poolMinBaseIdx, baseIdx, startBaseIdx; - double minBase, maxBase, newMinBase, newMaxBase; - double fontSize, colSpace1, colSpace2, lineSpace, intraLineSpace, blkSpace; - GBool found; - int count[4]; - int lrCount; - int firstBlkIdx, nBlocksLeft; - int col1, col2; - int i, j, n; - - if (rawOrder) { - primaryRot = 0; - primaryLR = gTrue; - return; - } - - uMap = globalParams->getTextEncoding(); - blkList = NULL; - lastBlk = NULL; - nBlocks = 0; - primaryRot = -1; - -#if 0 // for debugging - printf("*** initial words ***\n"); - for (rot = 0; rot < 4; ++rot) { - pool = pools[rot]; - for (baseIdx = pool->minBaseIdx; baseIdx <= pool->maxBaseIdx; ++baseIdx) { - for (word0 = pool->getPool(baseIdx); word0; word0 = word0->next) { - printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSize=%.2f '", - word0->xMin, word0->xMax, word0->yMin, word0->yMax, - word0->base, word0->fontSize); - for (i = 0; i < word0->len; ++i) { - fputc(word0->text[i] & 0xff, stdout); - } - printf("'\n"); - } - } - } - printf("\n"); -#endif - - //----- assemble the blocks - - //~ add an outer loop for writing mode (vertical text) - - // build blocks for each rotation value - for (rot = 0; rot < 4; ++rot) { - pool = pools[rot]; - poolMinBaseIdx = pool->minBaseIdx; - count[rot] = 0; - - // add blocks until no more words are left - while (1) { - - // find the first non-empty line in the pool - for (; - poolMinBaseIdx <= pool->maxBaseIdx && - !pool->getPool(poolMinBaseIdx); - ++poolMinBaseIdx) ; - if (poolMinBaseIdx > pool->maxBaseIdx) { - break; - } - - // look for the left-most word in the first four lines of the - // pool -- this avoids starting with a superscript word - startBaseIdx = poolMinBaseIdx; - for (baseIdx = poolMinBaseIdx + 1; - baseIdx < poolMinBaseIdx + 4 && baseIdx <= pool->maxBaseIdx; - ++baseIdx) { - if (!pool->getPool(baseIdx)) { - continue; - } - if (pool->getPool(baseIdx)->primaryCmp(pool->getPool(startBaseIdx)) - < 0) { - startBaseIdx = baseIdx; - } - } - - // create a new block - word0 = pool->getPool(startBaseIdx); - pool->setPool(startBaseIdx, word0->next); - word0->next = NULL; - blk = new TextBlock(this, rot); - blk->addWord(word0); - - fontSize = word0->fontSize; - minBase = maxBase = word0->base; - colSpace1 = minColSpacing1 * fontSize; - colSpace2 = minColSpacing2 * fontSize; - lineSpace = maxLineSpacingDelta * fontSize; - intraLineSpace = maxIntraLineDelta * fontSize; - - // add words to the block - do { - found = gFalse; - - // look for words on the line above the current top edge of - // the block - newMinBase = minBase; - for (baseIdx = pool->getBaseIdx(minBase); - baseIdx >= pool->getBaseIdx(minBase - lineSpace); - --baseIdx) { - word0 = NULL; - word1 = pool->getPool(baseIdx); - while (word1) { - if (word1->base < minBase && - word1->base >= minBase - lineSpace && - ((rot == 0 || rot == 2) - ? (word1->xMin < blk->xMax && word1->xMax > blk->xMin) - : (word1->yMin < blk->yMax && word1->yMax > blk->yMin)) && - fabs(word1->fontSize - fontSize) < - maxBlockFontSizeDelta1 * fontSize) { - word2 = word1; - if (word0) { - word0->next = word1->next; - } else { - pool->setPool(baseIdx, word1->next); - } - word1 = word1->next; - word2->next = NULL; - blk->addWord(word2); - found = gTrue; - newMinBase = word2->base; - } else { - word0 = word1; - word1 = word1->next; - } - } - } - minBase = newMinBase; - - // look for words on the line below the current bottom edge of - // the block - newMaxBase = maxBase; - for (baseIdx = pool->getBaseIdx(maxBase); - baseIdx <= pool->getBaseIdx(maxBase + lineSpace); - ++baseIdx) { - word0 = NULL; - word1 = pool->getPool(baseIdx); - while (word1) { - if (word1->base > maxBase && - word1->base <= maxBase + lineSpace && - ((rot == 0 || rot == 2) - ? (word1->xMin < blk->xMax && word1->xMax > blk->xMin) - : (word1->yMin < blk->yMax && word1->yMax > blk->yMin)) && - fabs(word1->fontSize - fontSize) < - maxBlockFontSizeDelta1 * fontSize) { - word2 = word1; - if (word0) { - word0->next = word1->next; - } else { - pool->setPool(baseIdx, word1->next); - } - word1 = word1->next; - word2->next = NULL; - blk->addWord(word2); - found = gTrue; - newMaxBase = word2->base; - } else { - word0 = word1; - word1 = word1->next; - } - } - } - maxBase = newMaxBase; - - // look for words that are on lines already in the block, and - // that overlap the block horizontally - for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace); - baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace); - ++baseIdx) { - word0 = NULL; - word1 = pool->getPool(baseIdx); - while (word1) { - if (word1->base >= minBase - intraLineSpace && - word1->base <= maxBase + intraLineSpace && - ((rot == 0 || rot == 2) - ? (word1->xMin < blk->xMax + colSpace1 && - word1->xMax > blk->xMin - colSpace1) - : (word1->yMin < blk->yMax + colSpace1 && - word1->yMax > blk->yMin - colSpace1)) && - fabs(word1->fontSize - fontSize) < - maxBlockFontSizeDelta2 * fontSize) { - word2 = word1; - if (word0) { - word0->next = word1->next; - } else { - pool->setPool(baseIdx, word1->next); - } - word1 = word1->next; - word2->next = NULL; - blk->addWord(word2); - found = gTrue; - } else { - word0 = word1; - word1 = word1->next; - } - } - } - - // only check for outlying words (the next two chunks of code) - // if we didn't find anything else - if (found) { - continue; - } - - // scan down the left side of the block, looking for words - // that are near (but not overlapping) the block; if there are - // three or fewer, add them to the block - n = 0; - for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace); - baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace); - ++baseIdx) { - word1 = pool->getPool(baseIdx); - while (word1) { - if (word1->base >= minBase - intraLineSpace && - word1->base <= maxBase + intraLineSpace && - ((rot == 0 || rot == 2) - ? (word1->xMax <= blk->xMin && - word1->xMax > blk->xMin - colSpace2) - : (word1->yMax <= blk->yMin && - word1->yMax > blk->yMin - colSpace2)) && - fabs(word1->fontSize - fontSize) < - maxBlockFontSizeDelta3 * fontSize) { - ++n; - break; - } - word1 = word1->next; - } - } - if (n > 0 && n <= 3) { - for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace); - baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace); - ++baseIdx) { - word0 = NULL; - word1 = pool->getPool(baseIdx); - while (word1) { - if (word1->base >= minBase - intraLineSpace && - word1->base <= maxBase + intraLineSpace && - ((rot == 0 || rot == 2) - ? (word1->xMax <= blk->xMin && - word1->xMax > blk->xMin - colSpace2) - : (word1->yMax <= blk->yMin && - word1->yMax > blk->yMin - colSpace2)) && - fabs(word1->fontSize - fontSize) < - maxBlockFontSizeDelta3 * fontSize) { - word2 = word1; - if (word0) { - word0->next = word1->next; - } else { - pool->setPool(baseIdx, word1->next); - } - word1 = word1->next; - word2->next = NULL; - blk->addWord(word2); - if (word2->base < minBase) { - minBase = word2->base; - } else if (word2->base > maxBase) { - maxBase = word2->base; - } - found = gTrue; - break; - } else { - word0 = word1; - word1 = word1->next; - } - } - } - } - - // scan down the right side of the block, looking for words - // that are near (but not overlapping) the block; if there are - // three or fewer, add them to the block - n = 0; - for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace); - baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace); - ++baseIdx) { - word1 = pool->getPool(baseIdx); - while (word1) { - if (word1->base >= minBase - intraLineSpace && - word1->base <= maxBase + intraLineSpace && - ((rot == 0 || rot == 2) - ? (word1->xMin >= blk->xMax && - word1->xMin < blk->xMax + colSpace2) - : (word1->yMin >= blk->yMax && - word1->yMin < blk->yMax + colSpace2)) && - fabs(word1->fontSize - fontSize) < - maxBlockFontSizeDelta3 * fontSize) { - ++n; - break; - } - word1 = word1->next; - } - } - if (n > 0 && n <= 3) { - for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace); - baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace); - ++baseIdx) { - word0 = NULL; - word1 = pool->getPool(baseIdx); - while (word1) { - if (word1->base >= minBase - intraLineSpace && - word1->base <= maxBase + intraLineSpace && - ((rot == 0 || rot == 2) - ? (word1->xMin >= blk->xMax && - word1->xMin < blk->xMax + colSpace2) - : (word1->yMin >= blk->yMax && - word1->yMin < blk->yMax + colSpace2)) && - fabs(word1->fontSize - fontSize) < - maxBlockFontSizeDelta3 * fontSize) { - word2 = word1; - if (word0) { - word0->next = word1->next; - } else { - pool->setPool(baseIdx, word1->next); - } - word1 = word1->next; - word2->next = NULL; - blk->addWord(word2); - if (word2->base < minBase) { - minBase = word2->base; - } else if (word2->base > maxBase) { - maxBase = word2->base; - } - found = gTrue; - break; - } else { - word0 = word1; - word1 = word1->next; - } - } - } - } - - } while (found); - - //~ need to compute the primary writing mode (horiz/vert) in - //~ addition to primary rotation - - // coalesce the block, and add it to the list - blk->coalesce(uMap); - if (lastBlk) { - lastBlk->next = blk; - } else { - blkList = blk; - } - lastBlk = blk; - count[rot] += blk->charCount; - if (primaryRot < 0 || count[rot] > count[primaryRot]) { - primaryRot = rot; - } - ++nBlocks; - } - } - -#if 0 // for debugging - printf("*** rotation ***\n"); - for (rot = 0; rot < 4; ++rot) { - printf(" %d: %6d\n", rot, count[rot]); - } - printf(" primary rot = %d\n", primaryRot); - printf("\n"); -#endif - -#if 0 // for debugging - printf("*** blocks ***\n"); - for (blk = blkList; blk; blk = blk->next) { - printf("block: rot=%d x=%.2f..%.2f y=%.2f..%.2f\n", - blk->rot, blk->xMin, blk->xMax, blk->yMin, blk->yMax); - for (line = blk->lines; line; line = line->next) { - printf(" line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f\n", - line->xMin, line->xMax, line->yMin, line->yMax, line->base); - for (word0 = line->words; word0; word0 = word0->next) { - printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSize=%.2f space=%d: '", - word0->xMin, word0->xMax, word0->yMin, word0->yMax, - word0->base, word0->fontSize, word0->spaceAfter); - for (i = 0; i < word0->len; ++i) { - fputc(word0->text[i] & 0xff, stdout); - } - printf("'\n"); - } - } - } - printf("\n"); -#endif - - // determine the primary direction - lrCount = 0; - for (blk = blkList; blk; blk = blk->next) { - for (line = blk->lines; line; line = line->next) { - for (word0 = line->words; word0; word0 = word0->next) { - for (i = 0; i < word0->len; ++i) { - if (unicodeTypeL(word0->text[i])) { - ++lrCount; - } else if (unicodeTypeR(word0->text[i])) { - --lrCount; - } - } - } - } - } - primaryLR = lrCount >= 0; - -#if 0 // for debugging - printf("*** direction ***\n"); - printf("lrCount = %d\n", lrCount); - printf("primaryLR = %d\n", primaryLR); -#endif - - //----- column assignment - - // sort blocks into xy order for column assignment - blocks = (TextBlock **)gmalloc(nBlocks * sizeof(TextBlock *)); - for (blk = blkList, i = 0; blk; blk = blk->next, ++i) { - blocks[i] = blk; - } - qsort(blocks, nBlocks, sizeof(TextBlock *), &TextBlock::cmpXYPrimaryRot); - - // column assignment - for (i = 0; i < nBlocks; ++i) { - blk0 = blocks[i]; - col1 = 0; - for (j = 0; j < i; ++j) { - blk1 = blocks[j]; - col2 = 0; // make gcc happy - switch (primaryRot) { - case 0: - if (blk0->xMin > blk1->xMax) { - col2 = blk1->col + blk1->nColumns + 3; - } else { - col2 = blk1->col + (int)(((blk0->xMin - blk1->xMin) / - (blk1->xMax - blk1->xMin)) * - blk1->nColumns); - } - break; - case 1: - if (blk0->yMin > blk1->yMax) { - col2 = blk1->col + blk1->nColumns + 3; - } else { - col2 = blk1->col + (int)(((blk0->yMin - blk1->yMin) / - (blk1->yMax - blk1->yMin)) * - blk1->nColumns); - } - break; - case 2: - if (blk0->xMax < blk1->xMin) { - col2 = blk1->col + blk1->nColumns + 3; - } else { - col2 = blk1->col + (int)(((blk0->xMax - blk1->xMax) / - (blk1->xMin - blk1->xMax)) * - blk1->nColumns); - } - break; - case 3: - if (blk0->yMax < blk1->yMin) { - col2 = blk1->col + blk1->nColumns + 3; - } else { - col2 = blk1->col + (int)(((blk0->yMax - blk1->yMax) / - (blk1->yMin - blk1->yMax)) * - blk1->nColumns); - } - break; - } - if (col2 > col1) { - col1 = col2; - } - } - blk0->col = col1; - for (line = blk0->lines; line; line = line->next) { - for (j = 0; j <= line->len; ++j) { - line->col[j] += col1; - } - } - } - -#if 0 // for debugging - printf("*** blocks, after column assignment ***\n"); - for (blk = blkList; blk; blk = blk->next) { - printf("block: rot=%d x=%.2f..%.2f y=%.2f..%.2f col=%d nCols=%d\n", - blk->rot, blk->xMin, blk->xMax, blk->yMin, blk->yMax, blk->col, - blk->nColumns); - for (line = blk->lines; line; line = line->next) { - printf(" line:\n"); - for (word0 = line->words; word0; word0 = word0->next) { - printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSize=%.2f space=%d: '", - word0->xMin, word0->xMax, word0->yMin, word0->yMax, - word0->base, word0->fontSize, word0->spaceAfter); - for (i = 0; i < word0->len; ++i) { - fputc(word0->text[i] & 0xff, stdout); - } - printf("'\n"); - } - } - } - printf("\n"); -#endif - - //----- reading order sort - - // sort blocks into yx order (in preparation for reading order sort) - qsort(blocks, nBlocks, sizeof(TextBlock *), &TextBlock::cmpYXPrimaryRot); - - // compute space on left and right sides of each block - for (i = 0; i < nBlocks; ++i) { - blk0 = blocks[i]; - for (j = 0; j < nBlocks; ++j) { - blk1 = blocks[j]; - if (blk1 != blk0) { - blk0->updatePriMinMax(blk1); - } - } - } - -#if 0 // for debugging - printf("*** blocks, after yx sort ***\n"); - for (i = 0; i < nBlocks; ++i) { - blk = blocks[i]; - printf("block: rot=%d x=%.2f..%.2f y=%.2f..%.2f space=%.2f..%.2f\n", - blk->rot, blk->xMin, blk->xMax, blk->yMin, blk->yMax, - blk->priMin, blk->priMax); - for (line = blk->lines; line; line = line->next) { - printf(" line:\n"); - for (word0 = line->words; word0; word0 = word0->next) { - printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSize=%.2f space=%d: '", - word0->xMin, word0->xMax, word0->yMin, word0->yMax, - word0->base, word0->fontSize, word0->spaceAfter); - for (j = 0; j < word0->len; ++j) { - fputc(word0->text[j] & 0xff, stdout); - } - printf("'\n"); - } - } - } - printf("\n"); -#endif - - // build the flows - //~ this needs to be adjusted for writing mode (vertical text) - //~ this also needs to account for right-to-left column ordering - blkArray = (TextBlock **)gmalloc(nBlocks * sizeof(TextBlock *)); - memcpy(blkArray, blocks, nBlocks * sizeof(TextBlock *)); - flows = lastFlow = NULL; - firstBlkIdx = 0; - nBlocksLeft = nBlocks; - while (nBlocksLeft > 0) { - - // find the upper-left-most block - for (; !blkArray[firstBlkIdx]; ++firstBlkIdx) ; - i = firstBlkIdx; - blk = blkArray[i]; - for (j = firstBlkIdx + 1; j < nBlocks; ++j) { - blk1 = blkArray[j]; - if (blk1) { - if (blk && blk->secondaryDelta(blk1) > 0) { - break; - } - if (blk1->primaryCmp(blk) < 0) { - i = j; - blk = blk1; - } - } - } - blkArray[i] = NULL; - --nBlocksLeft; - blk->next = NULL; - - // create a new flow, starting with the upper-left-most block - flow = new TextFlow(this, blk); - if (lastFlow) { - lastFlow->next = flow; - } else { - flows = flow; - } - lastFlow = flow; - fontSize = blk->lines->words->fontSize; - - // push the upper-left-most block on the stack - blk->stackNext = NULL; - blkStack = blk; - - // find the other blocks in this flow - while (blkStack) { - - // find the upper-left-most block under (but within - // maxBlockSpacing of) the top block on the stack - blkSpace = maxBlockSpacing * blkStack->lines->words->fontSize; - blk = NULL; - i = -1; - for (j = firstBlkIdx; j < nBlocks; ++j) { - blk1 = blkArray[j]; - if (blk1) { - if (blkStack->secondaryDelta(blk1) > blkSpace) { - break; - } - if (blk && blk->secondaryDelta(blk1) > 0) { - break; - } - if (blk1->isBelow(blkStack) && - (!blk || blk1->primaryCmp(blk) < 0)) { - i = j; - blk = blk1; - } - } - } - - // if a suitable block was found, add it to the flow and push it - // onto the stack - if (blk && flow->blockFits(blk, blkStack)) { - blkArray[i] = NULL; - --nBlocksLeft; - blk->next = NULL; - flow->addBlock(blk); - fontSize = blk->lines->words->fontSize; - blk->stackNext = blkStack; - blkStack = blk; - - // otherwise (if there is no block under the top block or the - // block is not suitable), pop the stack - } else { - blkStack = blkStack->stackNext; - } - } - } - gfree(blkArray); - -#if 0 // for debugging - printf("*** flows ***\n"); - for (flow = flows; flow; flow = flow->next) { - printf("flow: x=%.2f..%.2f y=%.2f..%.2f pri:%.2f..%.2f\n", - flow->xMin, flow->xMax, flow->yMin, flow->yMax, - flow->priMin, flow->priMax); - for (blk = flow->blocks; blk; blk = blk->next) { - printf(" block: rot=%d x=%.2f..%.2f y=%.2f..%.2f pri=%.2f..%.2f\n", - blk->rot, blk->xMin, blk->xMax, blk->yMin, blk->yMax, - blk->priMin, blk->priMax); - for (line = blk->lines; line; line = line->next) { - printf(" line:\n"); - for (word0 = line->words; word0; word0 = word0->next) { - printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSize=%.2f space=%d: '", - word0->xMin, word0->xMax, word0->yMin, word0->yMax, - word0->base, word0->fontSize, word0->spaceAfter); - for (i = 0; i < word0->len; ++i) { - fputc(word0->text[i] & 0xff, stdout); - } - printf("'\n"); - } - } - } - } - printf("\n"); -#endif - - if (uMap) { - uMap->decRefCnt(); - } -} - -GBool TextPage::findText(Unicode *s, int len, - GBool startAtTop, GBool stopAtBottom, - GBool startAtLast, GBool stopAtLast, - double *xMin, double *yMin, - double *xMax, double *yMax) { - TextBlock *blk; - TextLine *line; - Unicode *p; - Unicode u1, u2; - int m, i, j, k; - double xStart, yStart, xStop, yStop; - double xMin0, yMin0, xMax0, yMax0; - double xMin1, yMin1, xMax1, yMax1; - GBool found; - - //~ needs to handle right-to-left text - - if (rawOrder) { - return gFalse; - } - - xStart = yStart = xStop = yStop = 0; - if (startAtLast && haveLastFind) { - xStart = lastFindXMin; - yStart = lastFindYMin; - } else if (!startAtTop) { - xStart = *xMin; - yStart = *yMin; - } - if (stopAtLast && haveLastFind) { - xStop = lastFindXMin; - yStop = lastFindYMin; - } else if (!stopAtBottom) { - xStop = *xMax; - yStop = *yMax; - } - - found = gFalse; - xMin0 = xMax0 = yMin0 = yMax0 = 0; // make gcc happy - xMin1 = xMax1 = yMin1 = yMax1 = 0; // make gcc happy - - for (i = 0; i < nBlocks; ++i) { - blk = blocks[i]; - - // check: is the block above the top limit? - if (!startAtTop && blk->yMax < yStart) { - continue; - } - - // check: is the block below the bottom limit? - if (!stopAtBottom && blk->yMin > yStop) { - break; - } - - for (line = blk->lines; line; line = line->next) { - - // check: is the line above the top limit? - if (!startAtTop && line->yMin < yStart) { - continue; - } - - // check: is the line below the bottom limit? - if (!stopAtBottom && line->yMin > yStop) { - continue; - } - - // search each position in this line - m = line->len; - for (j = 0, p = line->text; j <= m - len; ++j, ++p) { - - // compare the strings - for (k = 0; k < len; ++k) { -#if 1 //~ this lowercases Latin A-Z only -- this will eventually be - //~ extended to handle other character sets - if (p[k] >= 0x41 && p[k] <= 0x5a) { - u1 = p[k] + 0x20; - } else { - u1 = p[k]; - } - if (s[k] >= 0x41 && s[k] <= 0x5a) { - u2 = s[k] + 0x20; - } else { - u2 = s[k]; - } -#endif - if (u1 != u2) { - break; - } - } - - // found it - if (k == len) { - switch (line->rot) { - case 0: - xMin1 = line->edge[j]; - xMax1 = line->edge[j + len]; - yMin1 = line->yMin; - yMax1 = line->yMax; - break; - case 1: - xMin1 = line->xMin; - xMax1 = line->xMax; - yMin1 = line->edge[j]; - yMax1 = line->edge[j + len]; - break; - case 2: - xMin1 = line->edge[j + len]; - xMax1 = line->edge[j]; - yMin1 = line->yMin; - yMax1 = line->yMax; - break; - case 3: - xMin1 = line->xMin; - xMax1 = line->xMax; - yMin1 = line->edge[j + len]; - yMax1 = line->edge[j]; - break; - } - if ((startAtTop || - yMin1 > yStart || (yMin1 == yStart && xMin1 > xStart)) && - (stopAtBottom || - yMin1 < yStop || (yMin1 == yStop && xMin1 < yStop))) { - if (!found || yMin1 < yMin0 || (yMin1 == yMin0 && xMin1 < xMin0)) { - xMin0 = xMin1; - xMax0 = xMax1; - yMin0 = yMin1; - yMax0 = yMax1; - found = gTrue; - } - } - } - } - } - } - - if (found) { - *xMin = xMin0; - *xMax = xMax0; - *yMin = yMin0; - *yMax = yMax0; - lastFindXMin = xMin0; - lastFindYMin = yMin0; - haveLastFind = gTrue; - return gTrue; - } - - return gFalse; -} - -GString *TextPage::getText(double xMin, double yMin, - double xMax, double yMax) { - GString *s; - UnicodeMap *uMap; - GBool isUnicode; - TextBlock *blk; - TextLine *line; - TextLineFrag *frags; - int nFrags, fragsSize; - TextLineFrag *frag; - char space[8], eol[16]; - int spaceLen, eolLen; - int lastRot; - double x, y; - int col, idx0, idx1, i, j; - GBool multiLine, oneRot; - - s = new GString(); - - if (rawOrder) { - return s; - } - - // get the output encoding - if (!(uMap = globalParams->getTextEncoding())) { - return s; - } - isUnicode = uMap->isUnicode(); - spaceLen = uMap->mapUnicode(0x20, space, sizeof(space)); - eolLen = 0; // make gcc happy - switch (globalParams->getTextEOL()) { - case eolUnix: - eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol)); - break; - case eolDOS: - eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol)); - eolLen += uMap->mapUnicode(0x0a, eol + eolLen, sizeof(eol) - eolLen); - break; - case eolMac: - eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol)); - break; - } - - //~ writing mode (horiz/vert) - - // collect the line fragments that are in the rectangle - fragsSize = 256; - frags = (TextLineFrag *)gmalloc(fragsSize * sizeof(TextLineFrag)); - nFrags = 0; - lastRot = -1; - oneRot = gTrue; - for (i = 0; i < nBlocks; ++i) { - blk = blocks[i]; - if (xMin < blk->xMax && blk->xMin < xMax && - yMin < blk->yMax && blk->yMin < yMax) { - for (line = blk->lines; line; line = line->next) { - if (xMin < line->xMax && line->xMin < xMax && - yMin < line->yMax && line->yMin < yMax) { - idx0 = idx1 = -1; - switch (line->rot) { - case 0: - y = 0.5 * (line->yMin + line->yMax); - if (yMin < y && y < yMax) { - j = 0; - while (j < line->len) { - if (0.5 * (line->edge[j] + line->edge[j+1]) > xMin) { - idx0 = j; - break; - } - ++j; - } - j = line->len - 1; - while (j >= 0) { - if (0.5 * (line->edge[j] + line->edge[j+1]) < xMax) { - idx1 = j; - break; - } - --j; - } - } - break; - case 1: - x = 0.5 * (line->xMin + line->xMax); - if (xMin < x && x < xMax) { - j = 0; - while (j < line->len) { - if (0.5 * (line->edge[j] + line->edge[j+1]) > yMin) { - idx0 = j; - break; - } - ++j; - } - j = line->len - 1; - while (j >= 0) { - if (0.5 * (line->edge[j] + line->edge[j+1]) < yMax) { - idx1 = j; - break; - } - --j; - } - } - break; - case 2: - y = 0.5 * (line->yMin + line->yMax); - if (yMin < y && y < yMax) { - j = 0; - while (j < line->len) { - if (0.5 * (line->edge[j] + line->edge[j+1]) < xMax) { - idx0 = j; - break; - } - ++j; - } - j = line->len - 1; - while (j >= 0) { - if (0.5 * (line->edge[j] + line->edge[j+1]) > xMin) { - idx1 = j; - break; - } - --j; - } - } - break; - case 3: - x = 0.5 * (line->xMin + line->xMax); - if (xMin < x && x < xMax) { - j = 0; - while (j < line->len) { - if (0.5 * (line->edge[j] + line->edge[j+1]) < yMax) { - idx0 = j; - break; - } - ++j; - } - j = line->len - 1; - while (j >= 0) { - if (0.5 * (line->edge[j] + line->edge[j+1]) > yMin) { - idx1 = j; - break; - } - --j; - } - } - break; - } - if (idx0 >= 0 && idx1 >= 0) { - if (nFrags == fragsSize) { - fragsSize *= 2; - frags = (TextLineFrag *) - grealloc(frags, fragsSize * sizeof(TextLineFrag)); - } - frags[nFrags].init(line, idx0, idx1 - idx0 + 1); - ++nFrags; - if (lastRot >= 0 && line->rot != lastRot) { - oneRot = gFalse; - } - lastRot = line->rot; - } - } - } - } - } - - // sort the fragments and generate the string - if (nFrags > 0) { - - for (i = 0; i < nFrags; ++i) { - frags[i].computeCoords(oneRot); - } - assignColumns(frags, nFrags, oneRot); - - // if all lines in the region have the same rotation, use it; - // otherwise, use the page's primary rotation - if (oneRot) { - qsort(frags, nFrags, sizeof(TextLineFrag), - &TextLineFrag::cmpYXLineRot); - } else { - qsort(frags, nFrags, sizeof(TextLineFrag), - &TextLineFrag::cmpYXPrimaryRot); - } - - col = 0; - multiLine = gFalse; - for (i = 0; i < nFrags; ++i) { - frag = &frags[i]; - - // insert a return - if (frag->col < col || - (i > 0 && fabs(frag->base - frags[i-1].base) > - maxIntraLineDelta * frags[i-1].line->words->fontSize)) { - s->append(eol, eolLen); - col = 0; - multiLine = gTrue; - } - - // column alignment - for (; col < frag->col; ++col) { - s->append(space, spaceLen); - } - - // get the fragment text - col += dumpFragment(frag->line->text + frag->start, frag->len, uMap, s); - } - - if (multiLine) { - s->append(eol, eolLen); - } - } - - gfree(frags); - uMap->decRefCnt(); - - return s; -} - -GBool TextPage::findCharRange(int pos, int length, - double *xMin, double *yMin, - double *xMax, double *yMax) { - TextBlock *blk; - TextLine *line; - TextWord *word; - double xMin0, xMax0, yMin0, yMax0; - double xMin1, xMax1, yMin1, yMax1; - GBool first; - int i, j0, j1; - - if (rawOrder) { - return gFalse; - } - - //~ this doesn't correctly handle: - //~ - ranges split across multiple lines (the highlighted region - //~ is the bounding box of all the parts of the range) - //~ - cases where characters don't convert one-to-one into Unicode - first = gTrue; - xMin0 = xMax0 = yMin0 = yMax0 = 0; // make gcc happy - xMin1 = xMax1 = yMin1 = yMax1 = 0; // make gcc happy - for (i = 0; i < nBlocks; ++i) { - blk = blocks[i]; - for (line = blk->lines; line; line = line->next) { - for (word = line->words; word; word = word->next) { - if (pos < word->charPos + word->charLen && - word->charPos < pos + length) { - j0 = pos - word->charPos; - if (j0 < 0) { - j0 = 0; - } - j1 = pos + length - 1 - word->charPos; - if (j1 >= word->len) { - j1 = word->len - 1; - } - switch (line->rot) { - case 0: - xMin1 = word->edge[j0]; - xMax1 = word->edge[j1 + 1]; - yMin1 = word->yMin; - yMax1 = word->yMax; - break; - case 1: - xMin1 = word->xMin; - xMax1 = word->xMax; - yMin1 = word->edge[j0]; - yMax1 = word->edge[j1 + 1]; - break; - case 2: - xMin1 = word->edge[j1 + 1]; - xMax1 = word->edge[j0]; - yMin1 = word->yMin; - yMax1 = word->yMax; - break; - case 3: - xMin1 = word->xMin; - xMax1 = word->xMax; - yMin1 = word->edge[j1 + 1]; - yMax1 = word->edge[j0]; - break; - } - if (first || xMin1 < xMin0) { - xMin0 = xMin1; - } - if (first || xMax1 > xMax0) { - xMax0 = xMax1; - } - if (first || yMin1 < yMin0) { - yMin0 = yMin1; - } - if (first || yMax1 > yMax0) { - yMax0 = yMax1; - } - first = gFalse; - } - } - } - } - if (!first) { - *xMin = xMin0; - *xMax = xMax0; - *yMin = yMin0; - *yMax = yMax0; - return gTrue; - } - return gFalse; -} - -void TextPage::dump(void *outputStream, TextOutputFunc outputFunc, - GBool physLayout) { - UnicodeMap *uMap; - TextFlow *flow; - TextBlock *blk; - TextLine *line; - TextLineFrag *frags; - TextWord *word; - int nFrags, fragsSize; - TextLineFrag *frag; - char space[8], eol[16], eop[8]; - int spaceLen, eolLen, eopLen; - GBool pageBreaks; - GString *s; - int col, i, d, n; - - // get the output encoding - if (!(uMap = globalParams->getTextEncoding())) { - return; - } - spaceLen = uMap->mapUnicode(0x20, space, sizeof(space)); - eolLen = 0; // make gcc happy - switch (globalParams->getTextEOL()) { - case eolUnix: - eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol)); - break; - case eolDOS: - eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol)); - eolLen += uMap->mapUnicode(0x0a, eol + eolLen, sizeof(eol) - eolLen); - break; - case eolMac: - eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol)); - break; - } - eopLen = uMap->mapUnicode(0x0c, eop, sizeof(eop)); - pageBreaks = globalParams->getTextPageBreaks(); - - //~ writing mode (horiz/vert) - - // output the page in raw (content stream) order - if (rawOrder) { - - for (word = rawWords; word; word = word->next) { - s = new GString(); - dumpFragment(word->text, word->len, uMap, s); - (*outputFunc)(outputStream, s->getCString(), s->getLength()); - delete s; - if (word->next && - fabs(word->next->base - word->base) < - maxIntraLineDelta * word->fontSize) { - if (word->next->xMin > word->xMax + minWordSpacing * word->fontSize) { - (*outputFunc)(outputStream, space, spaceLen); - } - } else { - (*outputFunc)(outputStream, eol, eolLen); - } - } - - // output the page, maintaining the original physical layout - } else if (physLayout) { - - // collect the line fragments for the page and sort them - fragsSize = 256; - frags = (TextLineFrag *)gmalloc(fragsSize * sizeof(TextLineFrag)); - nFrags = 0; - for (i = 0; i < nBlocks; ++i) { - blk = blocks[i]; - for (line = blk->lines; line; line = line->next) { - if (nFrags == fragsSize) { - fragsSize *= 2; - frags = (TextLineFrag *)grealloc(frags, - fragsSize * sizeof(TextLineFrag)); - } - frags[nFrags].init(line, 0, line->len); - frags[nFrags].computeCoords(gTrue); - ++nFrags; - } - } - qsort(frags, nFrags, sizeof(TextLineFrag), &TextLineFrag::cmpYXPrimaryRot); - - // generate output - col = 0; - for (i = 0; i < nFrags; ++i) { - frag = &frags[i]; - - // column alignment - for (; col < frag->col; ++col) { - (*outputFunc)(outputStream, space, spaceLen); - } - - // print the line - s = new GString(); - col += dumpFragment(frag->line->text + frag->start, frag->len, uMap, s); - (*outputFunc)(outputStream, s->getCString(), s->getLength()); - delete s; - - // print one or more returns if necessary - if (i == nFrags - 1 || - frags[i+1].col < col || - fabs(frags[i+1].base - frag->base) > - maxIntraLineDelta * frag->line->words->fontSize) { - if (i < nFrags - 1) { - d = (int)((frags[i+1].base - frag->base) / - frag->line->words->fontSize); - if (d < 1) { - d = 1; - } else if (d > 5) { - d = 5; - } - } else { - d = 1; - } - for (; d > 0; --d) { - (*outputFunc)(outputStream, eol, eolLen); - } - col = 0; - } - } - - gfree(frags); - - // output the page, "undoing" the layout - } else { - for (flow = flows; flow; flow = flow->next) { - for (blk = flow->blocks; blk; blk = blk->next) { - for (line = blk->lines; line; line = line->next) { - n = line->len; - if (line->hyphenated && (line->next || blk->next)) { - --n; - } - s = new GString(); - dumpFragment(line->text, n, uMap, s); - (*outputFunc)(outputStream, s->getCString(), s->getLength()); - delete s; - if (!line->hyphenated) { - if (line->next) { - (*outputFunc)(outputStream, space, spaceLen); - } else if (blk->next) { - //~ this is a bit of a kludge - we should really do a more - //~ intelligent determination of paragraphs - if (blk->next->lines->words->fontSize == - blk->lines->words->fontSize) { - (*outputFunc)(outputStream, space, spaceLen); - } else { - (*outputFunc)(outputStream, eol, eolLen); - } - } - } - } - } - (*outputFunc)(outputStream, eol, eolLen); - (*outputFunc)(outputStream, eol, eolLen); - } - } - - // end of page - if (pageBreaks) { - (*outputFunc)(outputStream, eop, eopLen); - (*outputFunc)(outputStream, eol, eolLen); - } - - uMap->decRefCnt(); -} - -void TextPage::assignColumns(TextLineFrag *frags, int nFrags, GBool oneRot) { - TextLineFrag *frag0, *frag1; - int rot, col1, col2, i, j, k; - - // all text in the region has the same rotation -- recompute the - // column numbers based only on the text in the region - if (oneRot) { - qsort(frags, nFrags, sizeof(TextLineFrag), &TextLineFrag::cmpXYLineRot); - rot = frags[0].line->rot; - for (i = 0; i < nFrags; ++i) { - frag0 = &frags[i]; - col1 = 0; - for (j = 0; j < i; ++j) { - frag1 = &frags[j]; - col2 = 0; // make gcc happy - switch (rot) { - case 0: - if (frag0->xMin >= frag1->xMax) { - col2 = frag1->col + (frag1->line->col[frag1->start + frag1->len] - - frag1->line->col[frag1->start]) + 1; - } else { - for (k = frag1->start; - k < frag1->start + frag1->len && - frag0->xMin >= 0.5 * (frag1->line->edge[k] + - frag1->line->edge[k+1]); - ++k) ; - col2 = frag1->col + - frag1->line->col[k] - frag1->line->col[frag1->start]; - } - break; - case 1: - if (frag0->yMin >= frag1->yMax) { - col2 = frag1->col + (frag1->line->col[frag1->start + frag1->len] - - frag1->line->col[frag1->start]) + 1; - } else { - for (k = frag1->start; - k < frag1->start + frag1->len && - frag0->yMin >= 0.5 * (frag1->line->edge[k] + - frag1->line->edge[k+1]); - ++k) ; - col2 = frag1->col + - frag1->line->col[k] - frag1->line->col[frag1->start]; - } - break; - case 2: - if (frag0->xMax <= frag1->xMin) { - col2 = frag1->col + (frag1->line->col[frag1->start + frag1->len] - - frag1->line->col[frag1->start]) + 1; - } else { - for (k = frag1->start; - k < frag1->start + frag1->len && - frag0->xMax <= 0.5 * (frag1->line->edge[k] + - frag1->line->edge[k+1]); - ++k) ; - col2 = frag1->col + - frag1->line->col[k] - frag1->line->col[frag1->start]; - } - break; - case 3: - if (frag0->yMax <= frag1->yMin) { - col2 = frag1->col + (frag1->line->col[frag1->start + frag1->len] - - frag1->line->col[frag1->start]) + 1; - } else { - for (k = frag1->start; - k < frag1->start + frag1->len && - frag0->yMax <= 0.5 * (frag1->line->edge[k] + - frag1->line->edge[k+1]); - ++k) ; - col2 = frag1->col + - frag1->line->col[k] - frag1->line->col[frag1->start]; - } - break; - } - if (col2 > col1) { - col1 = col2; - } - } - frag0->col = col1; - } - - // the region includes text at different rotations -- use the - // globally assigned column numbers, offset by the minimum column - // number (i.e., shift everything over to column 0) - } else { - col1 = frags[0].col; - for (i = 1; i < nFrags; ++i) { - if (frags[i].col < col1) { - col1 = frags[i].col; - } - } - for (i = 0; i < nFrags; ++i) { - frags[i].col -= col1; - } - } -} - -int TextPage::dumpFragment(Unicode *text, int len, UnicodeMap *uMap, - GString *s) { - char lre[8], rle[8], popdf[8], buf[8]; - int lreLen, rleLen, popdfLen, n; - int nCols, i, j, k; - - nCols = 0; - - if (uMap->isUnicode()) { - - lreLen = uMap->mapUnicode(0x202a, lre, sizeof(lre)); - rleLen = uMap->mapUnicode(0x202b, rle, sizeof(rle)); - popdfLen = uMap->mapUnicode(0x202c, popdf, sizeof(popdf)); - - if (primaryLR) { - - i = 0; - while (i < len) { - // output a left-to-right section - for (j = i; j < len && !unicodeTypeR(text[j]); ++j) ; - for (k = i; k < j; ++k) { - n = uMap->mapUnicode(text[k], buf, sizeof(buf)); - s->append(buf, n); - ++nCols; - } - i = j; - // output a right-to-left section - for (j = i; j < len && !unicodeTypeL(text[j]); ++j) ; - if (j > i) { - s->append(rle, rleLen); - for (k = j - 1; k >= i; --k) { - n = uMap->mapUnicode(text[k], buf, sizeof(buf)); - s->append(buf, n); - ++nCols; - } - s->append(popdf, popdfLen); - i = j; - } - } - - } else { - - s->append(rle, rleLen); - i = len - 1; - while (i >= 0) { - // output a right-to-left section - for (j = i; j >= 0 && !unicodeTypeL(text[j]); --j) ; - for (k = i; k > j; --k) { - n = uMap->mapUnicode(text[k], buf, sizeof(buf)); - s->append(buf, n); - ++nCols; - } - i = j; - // output a left-to-right section - for (j = i; j >= 0 && !unicodeTypeR(text[j]); --j) ; - if (j < i) { - s->append(lre, lreLen); - for (k = j + 1; k <= i; ++k) { - n = uMap->mapUnicode(text[k], buf, sizeof(buf)); - s->append(buf, n); - ++nCols; - } - s->append(popdf, popdfLen); - i = j; - } - } - s->append(popdf, popdfLen); - - } - - } else { - for (i = 0; i < len; ++i) { - n = uMap->mapUnicode(text[i], buf, sizeof(buf)); - s->append(buf, n); - nCols += n; - } - } - - return nCols; -} - -#if TEXTOUT_WORD_LIST -TextWordList *TextPage::makeWordList(GBool physLayout) { - return new TextWordList(this, physLayout); -} -#endif - -//------------------------------------------------------------------------ -// TextOutputDev -//------------------------------------------------------------------------ - -static void outputToFile(void *stream, char *text, int len) { - fwrite(text, 1, len, (FILE *)stream); -} - -TextOutputDev::TextOutputDev(char *fileName, GBool physLayoutA, - GBool rawOrderA, GBool append) { - text = NULL; - physLayout = physLayoutA; - rawOrder = rawOrderA; - ok = gTrue; - - // open file - needClose = gFalse; - if (fileName) { - if (!strcmp(fileName, "-")) { - outputStream = stdout; -#ifdef WIN32 - // keep DOS from munging the end-of-line characters - setmode(fileno(stdout), O_BINARY); -#endif - } else if ((outputStream = fopen(fileName, append ? "ab" : "wb"))) { - needClose = gTrue; - } else { - error(-1, "Couldn't open text file '%s'", fileName); - ok = gFalse; - return; - } - outputFunc = &outputToFile; - } else { - outputStream = NULL; - } - - // set up text object - text = new TextPage(rawOrderA); -} - -TextOutputDev::TextOutputDev(TextOutputFunc func, void *stream, - GBool physLayoutA, GBool rawOrderA) { - outputFunc = func; - outputStream = stream; - needClose = gFalse; - physLayout = physLayoutA; - rawOrder = rawOrderA; - text = new TextPage(rawOrderA); - ok = gTrue; -} - -TextOutputDev::~TextOutputDev() { - if (needClose) { -#ifdef MACOS - ICS_MapRefNumAndAssign((short)((FILE *)outputStream)->handle); -#endif - fclose((FILE *)outputStream); - } - if (text) { - delete text; - } -} - -void TextOutputDev::startPage(int pageNum, GfxState *state) { - text->startPage(state); -} - -void TextOutputDev::endPage() { - text->endPage(); - text->coalesce(physLayout); - if (outputStream) { - text->dump(outputStream, outputFunc, physLayout); - } -} - -void TextOutputDev::updateFont(GfxState *state) { - text->updateFont(state); -} - -void TextOutputDev::beginString(GfxState *state, GString *s) { -} - -void TextOutputDev::endString(GfxState *state) { -} - -void TextOutputDev::drawChar(GfxState *state, double x, double y, - double dx, double dy, - double originX, double originY, - CharCode c, Unicode *u, int uLen) { - text->addChar(state, x, y, dx, dy, c, u, uLen); -} - -GBool TextOutputDev::findText(Unicode *s, int len, - GBool startAtTop, GBool stopAtBottom, - GBool startAtLast, GBool stopAtLast, - double *xMin, double *yMin, - double *xMax, double *yMax) { - return text->findText(s, len, startAtTop, stopAtBottom, - startAtLast, stopAtLast, xMin, yMin, xMax, yMax); -} - -GString *TextOutputDev::getText(double xMin, double yMin, - double xMax, double yMax) { - return text->getText(xMin, yMin, xMax, yMax); -} - -GBool TextOutputDev::findCharRange(int pos, int length, - double *xMin, double *yMin, - double *xMax, double *yMax) { - return text->findCharRange(pos, length, xMin, yMin, xMax, yMax); -} - -#if TEXTOUT_WORD_LIST -TextWordList *TextOutputDev::makeWordList() { - return text->makeWordList(physLayout); -} -#endif -- cgit v0.9.1