Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/pdf/xpdf/TextOutputDev.cc
diff options
context:
space:
mode:
authorKristian Høgsberg <krh@redhat.com>2005-03-01 22:24:10 (GMT)
committer Kristian Høgsberg <krh@src.gnome.org>2005-03-01 22:24:10 (GMT)
commit02eb16fef45712a91e24f6471b9e2f31249c888e (patch)
treed5579258f1d7fbd5a280fb2f1b97b74004b1dea9 /pdf/xpdf/TextOutputDev.cc
parent332afee84ff4eb7df326d96f07efd6f82a87c0a2 (diff)
Use poppler instead of including xpdf source code. Poppler is a fork of
2005-03-01 Kristian Høgsberg <krh@redhat.com> Use poppler instead of including xpdf source code. Poppler is a fork of xpdf to build it as a shared library. See http://freedesktop.org/wiki/Software/poppler. * pdf/xpdf/*, pdf/goo/*, pdf/splash/*, pdf/fofi/*: Remove included xpdf fork. * pdf/Makefile.am: Build libpdfdocument.a here. * pdf/GDKSplashOutputDev.cc: * pdf/GDKSplashOutputDev.h: * pdf/GnomeVFSStream.cc: * pdf/GnomeVFSStream.h: * pdf-document.cc: * pdf-document.h: * test-gdk-output-dev.cc * Thumb.cc: * Thumb.h: Pull these files out of pdf/xpdf and adjust to compile against poppler.
Diffstat (limited to 'pdf/xpdf/TextOutputDev.cc')
-rw-r--r--pdf/xpdf/TextOutputDev.cc3529
1 files changed, 0 insertions, 3529 deletions
diff --git a/pdf/xpdf/TextOutputDev.cc b/pdf/xpdf/TextOutputDev.cc
deleted file mode 100644
index a492e7f..0000000
--- a/pdf/xpdf/TextOutputDev.cc
+++ /dev/null
@@ -1,3529 +0,0 @@
-//========================================================================
-//
-// TextOutputDev.cc
-//
-// Copyright 1997-2003 Glyph & Cog, LLC
-//
-//========================================================================
-
-#include <aconf.h>
-
-#ifdef USE_GCC_PRAGMAS
-#pragma implementation
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include <math.h>
-#include <ctype.h>
-#ifdef WIN32
-#include <fcntl.h> // for O_BINARY
-#include <io.h> // for setmode
-#endif
-#include "gmem.h"
-#include "GString.h"
-#include "GList.h"
-#include "xpdfconfig.h"
-#include "Error.h"
-#include "GlobalParams.h"
-#include "UnicodeMap.h"
-#include "UnicodeTypeTable.h"
-#include "GfxState.h"
-#include "TextOutputDev.h"
-
-#ifdef MACOS
-// needed for setting type/creator of MacOS files
-#include "ICSupport.h"
-#endif
-
-//------------------------------------------------------------------------
-// parameters
-//------------------------------------------------------------------------
-
-// Each bucket in a text pool includes baselines within a range of
-// this many points.
-#define textPoolStep 4
-
-// Inter-character space width which will cause addChar to start a new
-// word.
-#define minWordBreakSpace 0.1
-
-// Negative inter-character space width, i.e., overlap, which will
-// cause addChar to start a new word.
-#define minDupBreakOverlap 0.2
-
-// Max distance between baselines of two lines within a block, as a
-// fraction of the font size.
-#define maxLineSpacingDelta 1.5
-
-// Max difference in primary font sizes on two lines in the same
-// block. Delta1 is used when examining new lines above and below the
-// current block; delta2 is used when examining text that overlaps the
-// current block; delta3 is used when examining text to the left and
-// right of the current block.
-#define maxBlockFontSizeDelta1 0.05
-#define maxBlockFontSizeDelta2 0.6
-#define maxBlockFontSizeDelta3 0.2
-
-// Max difference in font sizes inside a word.
-#define maxWordFontSizeDelta 0.05
-
-// Maximum distance between baselines of two words on the same line,
-// e.g., distance between subscript or superscript and the primary
-// baseline, as a fraction of the font size.
-#define maxIntraLineDelta 0.5
-
-// Minimum inter-word spacing, as a fraction of the font size. (Only
-// used for raw ordering.)
-#define minWordSpacing 0.15
-
-// Maximum inter-word spacing, as a fraction of the font size.
-#define maxWordSpacing 1.5
-
-// Maximum horizontal spacing which will allow a word to be pulled
-// into a block.
-#define minColSpacing1 0.3
-
-// Minimum spacing between columns, as a fraction of the font size.
-#define minColSpacing2 1.0
-
-// Maximum vertical spacing between blocks within a flow, as a
-// multiple of the font size.
-#define maxBlockSpacing 2.5
-
-// Minimum spacing between characters within a word, as a fraction of
-// the font size.
-#define minCharSpacing -0.2
-
-// Maximum spacing between characters within a word, as a fraction of
-// the font size, when there is no obvious extra-wide character
-// spacing.
-#define maxCharSpacing 0.03
-
-// When extra-wide character spacing is detected, the inter-character
-// space threshold is set to the minimum inter-character space
-// multiplied by this constant.
-#define maxWideCharSpacingMul 1.3
-
-// Max difference in primary,secondary coordinates (as a fraction of
-// the font size) allowed for duplicated text (fake boldface, drop
-// shadows) which is to be discarded.
-#define dupMaxPriDelta 0.1
-#define dupMaxSecDelta 0.2
-
-//------------------------------------------------------------------------
-// TextFontInfo
-//------------------------------------------------------------------------
-
-TextFontInfo::TextFontInfo(GfxState *state) {
- gfxFont = state->getFont();
-#if TEXTOUT_WORD_LIST
- fontName = (gfxFont && gfxFont->getOrigName())
- ? gfxFont->getOrigName()->copy()
- : (GString *)NULL;
-#endif
-}
-
-TextFontInfo::~TextFontInfo() {
-#if TEXTOUT_WORD_LIST
- if (fontName) {
- delete fontName;
- }
-#endif
-}
-
-GBool TextFontInfo::matches(GfxState *state) {
- return state->getFont() == gfxFont;
-}
-
-//------------------------------------------------------------------------
-// TextWord
-//------------------------------------------------------------------------
-
-TextWord::TextWord(GfxState *state, int rotA, double x0, double y0,
- int charPosA, TextFontInfo *fontA, double fontSizeA) {
- GfxFont *gfxFont;
- double x, y, ascent, descent;
-
- rot = rotA;
- charPos = charPosA;
- charLen = 0;
- font = fontA;
- fontSize = fontSizeA;
- state->transform(x0, y0, &x, &y);
- if ((gfxFont = font->gfxFont)) {
- ascent = gfxFont->getAscent() * fontSize;
- descent = gfxFont->getDescent() * fontSize;
- } else {
- // this means that the PDF file draws text without a current font,
- // which should never happen
- ascent = 0.95 * fontSize;
- descent = -0.35 * fontSize;
- }
- switch (rot) {
- case 0:
- yMin = y - ascent;
- yMax = y - descent;
- if (yMin == yMax) {
- // this is a sanity check for a case that shouldn't happen -- but
- // if it does happen, we want to avoid dividing by zero later
- yMin = y;
- yMax = y + 1;
- }
- base = y;
- break;
- case 1:
- xMin = x + descent;
- xMax = x + ascent;
- if (xMin == xMax) {
- // this is a sanity check for a case that shouldn't happen -- but
- // if it does happen, we want to avoid dividing by zero later
- xMin = x;
- xMax = x + 1;
- }
- base = x;
- break;
- case 2:
- yMin = y + descent;
- yMax = y + ascent;
- if (yMin == yMax) {
- // this is a sanity check for a case that shouldn't happen -- but
- // if it does happen, we want to avoid dividing by zero later
- yMin = y;
- yMax = y + 1;
- }
- base = y;
- break;
- case 3:
- xMin = x - ascent;
- xMax = x - descent;
- if (xMin == xMax) {
- // this is a sanity check for a case that shouldn't happen -- but
- // if it does happen, we want to avoid dividing by zero later
- xMin = x;
- xMax = x + 1;
- }
- base = x;
- break;
- }
- text = NULL;
- edge = NULL;
- len = size = 0;
- spaceAfter = gFalse;
- next = NULL;
-
-#if TEXTOUT_WORD_LIST
- GfxRGB rgb;
-
- if ((state->getRender() & 3) == 1) {
- state->getStrokeRGB(&rgb);
- } else {
- state->getFillRGB(&rgb);
- }
- colorR = rgb.r;
- colorG = rgb.g;
- colorB = rgb.b;
-#endif
-}
-
-TextWord::~TextWord() {
- gfree(text);
- gfree(edge);
-}
-
-void TextWord::addChar(GfxState *state, double x, double y,
- double dx, double dy, Unicode u) {
- if (len == size) {
- size += 16;
- text = (Unicode *)grealloc(text, size * sizeof(Unicode));
- edge = (double *)grealloc(edge, (size + 1) * sizeof(double));
- }
- text[len] = u;
- switch (rot) {
- case 0:
- if (len == 0) {
- xMin = x;
- }
- edge[len] = x;
- xMax = edge[len+1] = x + dx;
- break;
- case 1:
- if (len == 0) {
- yMin = y;
- }
- edge[len] = y;
- yMax = edge[len+1] = y + dy;
- break;
- case 2:
- if (len == 0) {
- xMax = x;
- }
- edge[len] = x;
- xMin = edge[len+1] = x + dx;
- break;
- case 3:
- if (len == 0) {
- yMax = y;
- }
- edge[len] = y;
- yMin = edge[len+1] = y + dy;
- break;
- }
- ++len;
-}
-
-void TextWord::merge(TextWord *word) {
- int i;
-
- if (word->xMin < xMin) {
- xMin = word->xMin;
- }
- if (word->yMin < yMin) {
- yMin = word->yMin;
- }
- if (word->xMax > xMax) {
- xMax = word->xMax;
- }
- if (word->yMax > yMax) {
- yMax = word->yMax;
- }
- if (len + word->len > size) {
- size = len + word->len;
- text = (Unicode *)grealloc(text, size * sizeof(Unicode));
- edge = (double *)grealloc(edge, (size + 1) * sizeof(double));
- }
- for (i = 0; i < word->len; ++i) {
- text[len + i] = word->text[i];
- edge[len + i] = word->edge[i];
- }
- edge[len + word->len] = word->edge[word->len];
- len += word->len;
- charLen += word->charLen;
-}
-
-inline int TextWord::primaryCmp(TextWord *word) {
- double cmp;
-
- cmp = 0; // make gcc happy
- switch (rot) {
- case 0:
- cmp = xMin - word->xMin;
- break;
- case 1:
- cmp = yMin - word->yMin;
- break;
- case 2:
- cmp = word->xMax - xMax;
- break;
- case 3:
- cmp = word->yMax - yMax;
- break;
- }
- return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
-}
-
-double TextWord::primaryDelta(TextWord *word) {
- double delta;
-
- delta = 0; // make gcc happy
- switch (rot) {
- case 0:
- delta = word->xMin - xMax;
- break;
- case 1:
- delta = word->yMin - yMax;
- break;
- case 2:
- delta = xMin - word->xMax;
- break;
- case 3:
- delta = yMin - word->yMax;
- break;
- }
- return delta;
-}
-
-int TextWord::cmpYX(const void *p1, const void *p2) {
- TextWord *word1 = *(TextWord **)p1;
- TextWord *word2 = *(TextWord **)p2;
- double cmp;
-
- cmp = word1->yMin - word2->yMin;
- if (cmp == 0) {
- cmp = word1->xMin - word2->xMin;
- }
- return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
-}
-
-#if TEXTOUT_WORD_LIST
-
-GString *TextWord::getText() {
- GString *s;
- UnicodeMap *uMap;
- char buf[8];
- int n, i;
-
- s = new GString();
- if (!(uMap = globalParams->getTextEncoding())) {
- return s;
- }
- for (i = 0; i < len; ++i) {
- n = uMap->mapUnicode(text[i], buf, sizeof(buf));
- s->append(buf, n);
- }
- uMap->decRefCnt();
- return s;
-}
-
-#endif // TEXTOUT_WORD_LIST
-
-//------------------------------------------------------------------------
-// TextPool
-//------------------------------------------------------------------------
-
-TextPool::TextPool() {
- minBaseIdx = 0;
- maxBaseIdx = -1;
- pool = NULL;
- cursor = NULL;
- cursorBaseIdx = -1;
-}
-
-TextPool::~TextPool() {
- int baseIdx;
- TextWord *word, *word2;
-
- for (baseIdx = minBaseIdx; baseIdx <= maxBaseIdx; ++baseIdx) {
- for (word = pool[baseIdx - minBaseIdx]; word; word = word2) {
- word2 = word->next;
- delete word;
- }
- }
- gfree(pool);
-}
-
-int TextPool::getBaseIdx(double base) {
- int baseIdx;
-
- baseIdx = (int)(base / textPoolStep);
- if (baseIdx < minBaseIdx) {
- return minBaseIdx;
- }
- if (baseIdx > maxBaseIdx) {
- return maxBaseIdx;
- }
- return baseIdx;
-}
-
-void TextPool::addWord(TextWord *word) {
- TextWord **newPool;
- int wordBaseIdx, newMinBaseIdx, newMaxBaseIdx, baseIdx;
- TextWord *w0, *w1;
-
- // expand the array if needed
- wordBaseIdx = (int)(word->base / textPoolStep);
- if (minBaseIdx > maxBaseIdx) {
- minBaseIdx = wordBaseIdx - 128;
- maxBaseIdx = wordBaseIdx + 128;
- pool = (TextWord **)gmalloc((maxBaseIdx - minBaseIdx + 1) *
- sizeof(TextWord *));
- for (baseIdx = minBaseIdx; baseIdx <= maxBaseIdx; ++baseIdx) {
- pool[baseIdx - minBaseIdx] = NULL;
- }
- } else if (wordBaseIdx < minBaseIdx) {
- newMinBaseIdx = wordBaseIdx - 128;
- newPool = (TextWord **)gmalloc((maxBaseIdx - newMinBaseIdx + 1) *
- sizeof(TextWord *));
- for (baseIdx = newMinBaseIdx; baseIdx < minBaseIdx; ++baseIdx) {
- newPool[baseIdx - newMinBaseIdx] = NULL;
- }
- memcpy(&newPool[minBaseIdx - newMinBaseIdx], pool,
- (maxBaseIdx - minBaseIdx + 1) * sizeof(TextWord *));
- gfree(pool);
- pool = newPool;
- minBaseIdx = newMinBaseIdx;
- } else if (wordBaseIdx > maxBaseIdx) {
- newMaxBaseIdx = wordBaseIdx + 128;
- pool = (TextWord **)grealloc(pool, (newMaxBaseIdx - minBaseIdx + 1) *
- sizeof(TextWord *));
- for (baseIdx = maxBaseIdx + 1; baseIdx <= newMaxBaseIdx; ++baseIdx) {
- pool[baseIdx - minBaseIdx] = NULL;
- }
- maxBaseIdx = newMaxBaseIdx;
- }
-
- // insert the new word
- if (cursor && wordBaseIdx == cursorBaseIdx &&
- word->primaryCmp(cursor) > 0) {
- w0 = cursor;
- w1 = cursor->next;
- } else {
- w0 = NULL;
- w1 = pool[wordBaseIdx - minBaseIdx];
- }
- for (; w1 && word->primaryCmp(w1) > 0; w0 = w1, w1 = w1->next) ;
- word->next = w1;
- if (w0) {
- w0->next = word;
- } else {
- pool[wordBaseIdx - minBaseIdx] = word;
- }
- cursor = word;
- cursorBaseIdx = wordBaseIdx;
-}
-
-//------------------------------------------------------------------------
-// TextLine
-//------------------------------------------------------------------------
-
-TextLine::TextLine(TextBlock *blkA, int rotA, double baseA) {
- blk = blkA;
- rot = rotA;
- xMin = yMin = 0;
- xMax = yMax = -1;
- base = baseA;
- words = lastWord = NULL;
- text = NULL;
- edge = NULL;
- col = NULL;
- len = 0;
- convertedLen = 0;
- hyphenated = gFalse;
- next = NULL;
-}
-
-TextLine::~TextLine() {
- TextWord *word;
-
- while (words) {
- word = words;
- words = words->next;
- delete word;
- }
- gfree(text);
- gfree(edge);
- gfree(col);
-}
-
-void TextLine::addWord(TextWord *word) {
- if (lastWord) {
- lastWord->next = word;
- } else {
- words = word;
- }
- lastWord = word;
-
- if (xMin > xMax) {
- xMin = word->xMin;
- xMax = word->xMax;
- yMin = word->yMin;
- yMax = word->yMax;
- } else {
- if (word->xMin < xMin) {
- xMin = word->xMin;
- }
- if (word->xMax > xMax) {
- xMax = word->xMax;
- }
- if (word->yMin < yMin) {
- yMin = word->yMin;
- }
- if (word->yMax > yMax) {
- yMax = word->yMax;
- }
- }
-}
-
-double TextLine::primaryDelta(TextLine *line) {
- double delta;
-
- delta = 0; // make gcc happy
- switch (rot) {
- case 0:
- delta = line->xMin - xMax;
- break;
- case 1:
- delta = line->yMin - yMax;
- break;
- case 2:
- delta = xMin - line->xMax;
- break;
- case 3:
- delta = yMin - line->yMax;
- break;
- }
- return delta;
-}
-
-int TextLine::primaryCmp(TextLine *line) {
- double cmp;
-
- cmp = 0; // make gcc happy
- switch (rot) {
- case 0:
- cmp = xMin - line->xMin;
- break;
- case 1:
- cmp = yMin - line->yMin;
- break;
- case 2:
- cmp = line->xMax - xMax;
- break;
- case 3:
- cmp = line->yMax - yMax;
- break;
- }
- return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
-}
-
-int TextLine::secondaryCmp(TextLine *line) {
- double cmp;
-
- cmp = (rot == 0 || rot == 3) ? base - line->base : line->base - base;
- return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
-}
-
-int TextLine::cmpYX(TextLine *line) {
- int cmp;
-
- if ((cmp = secondaryCmp(line))) {
- return cmp;
- }
- return primaryCmp(line);
-}
-
-int TextLine::cmpXY(const void *p1, const void *p2) {
- TextLine *line1 = *(TextLine **)p1;
- TextLine *line2 = *(TextLine **)p2;
- int cmp;
-
- if ((cmp = line1->primaryCmp(line2))) {
- return cmp;
- }
- return line1->secondaryCmp(line2);
-}
-
-void TextLine::coalesce(UnicodeMap *uMap) {
- TextWord *word0, *word1;
- double space, delta, minSpace;
- GBool isUnicode;
- char buf[8];
- int i, j;
-
- if (words->next) {
-
- // compute the inter-word space threshold
- if (words->len > 1 || words->next->len > 1) {
- minSpace = 0;
- } else {
- minSpace = words->primaryDelta(words->next);
- for (word0 = words->next, word1 = word0->next;
- word1 && minSpace > 0;
- word0 = word1, word1 = word0->next) {
- if (word1->len > 1) {
- minSpace = 0;
- }
- delta = word0->primaryDelta(word1);
- if (delta < minSpace) {
- minSpace = delta;
- }
- }
- }
- if (minSpace <= 0) {
- space = maxCharSpacing * words->fontSize;
- } else {
- space = maxWideCharSpacingMul * minSpace;
- }
-
- // merge words
- word0 = words;
- word1 = words->next;
- while (word1) {
- if (word0->primaryDelta(word1) >= space) {
- word0->spaceAfter = gTrue;
- word0 = word1;
- word1 = word1->next;
- } else if (word0->font == word1->font &&
- fabs(word0->fontSize - word1->fontSize) <
- maxWordFontSizeDelta * words->fontSize &&
- word1->charPos == word0->charPos + word0->charLen) {
- word0->merge(word1);
- word0->next = word1->next;
- delete word1;
- word1 = word0->next;
- } else {
- word0 = word1;
- word1 = word1->next;
- }
- }
- }
-
- // build the line text
- isUnicode = uMap ? uMap->isUnicode() : gFalse;
- len = 0;
- for (word1 = words; word1; word1 = word1->next) {
- len += word1->len;
- if (word1->spaceAfter) {
- ++len;
- }
- }
- text = (Unicode *)gmalloc(len * sizeof(Unicode));
- edge = (double *)gmalloc((len + 1) * sizeof(double));
- i = 0;
- for (word1 = words; word1; word1 = word1->next) {
- for (j = 0; j < word1->len; ++j) {
- text[i] = word1->text[j];
- edge[i] = word1->edge[j];
- ++i;
- }
- edge[i] = word1->edge[word1->len];
- if (word1->spaceAfter) {
- text[i] = (Unicode)0x0020;
- ++i;
- }
- }
-
- // compute convertedLen and set up the col array
- col = (int *)gmalloc((len + 1) * sizeof(int));
- convertedLen = 0;
- for (i = 0; i < len; ++i) {
- col[i] = convertedLen;
- if (isUnicode) {
- ++convertedLen;
- } else if (uMap) {
- convertedLen += uMap->mapUnicode(text[i], buf, sizeof(buf));
- }
- }
- col[len] = convertedLen;
-
- // check for hyphen at end of line
- //~ need to check for other chars used as hyphens
- hyphenated = text[len - 1] == (Unicode)'-';
-}
-
-//------------------------------------------------------------------------
-// TextLineFrag
-//------------------------------------------------------------------------
-
-class TextLineFrag {
-public:
-
- TextLine *line; // the line object
- int start, len; // offset and length of this fragment
- // (in Unicode chars)
- double xMin, xMax; // bounding box coordinates
- double yMin, yMax;
- double base; // baseline virtual coordinate
- int col; // first column
-
- void init(TextLine *lineA, int startA, int lenA);
- void computeCoords(GBool oneRot);
-
- static int cmpYXPrimaryRot(const void *p1, const void *p2);
- static int cmpYXLineRot(const void *p1, const void *p2);
- static int cmpXYLineRot(const void *p1, const void *p2);
-};
-
-void TextLineFrag::init(TextLine *lineA, int startA, int lenA) {
- line = lineA;
- start = startA;
- len = lenA;
- col = line->col[start];
-}
-
-void TextLineFrag::computeCoords(GBool oneRot) {
- TextBlock *blk;
- double d0, d1, d2, d3, d4;
-
- if (oneRot) {
-
- switch (line->rot) {
- case 0:
- xMin = line->edge[start];
- xMax = line->edge[start + len];
- yMin = line->yMin;
- yMax = line->yMax;
- break;
- case 1:
- xMin = line->xMin;
- xMax = line->xMax;
- yMin = line->edge[start];
- yMax = line->edge[start + len];
- break;
- case 2:
- xMin = line->edge[start + len];
- xMax = line->edge[start];
- yMin = line->yMin;
- yMax = line->yMax;
- break;
- case 3:
- xMin = line->xMin;
- xMax = line->xMax;
- yMin = line->edge[start + len];
- yMax = line->edge[start];
- break;
- }
- base = line->base;
-
- } else {
-
- if (line->rot == 0 && line->blk->page->primaryRot == 0) {
-
- xMin = line->edge[start];
- xMax = line->edge[start + len];
- yMin = line->yMin;
- yMax = line->yMax;
- base = line->base;
-
- } else {
-
- blk = line->blk;
- d0 = line->edge[start];
- d1 = line->edge[start + len];
- d2 = d3 = d4 = 0; // make gcc happy
-
- switch (line->rot) {
- case 0:
- d2 = line->yMin;
- d3 = line->yMax;
- d4 = line->base;
- d0 = (d0 - blk->xMin) / (blk->xMax - blk->xMin);
- d1 = (d1 - blk->xMin) / (blk->xMax - blk->xMin);
- d2 = (d2 - blk->yMin) / (blk->yMax - blk->yMin);
- d3 = (d3 - blk->yMin) / (blk->yMax - blk->yMin);
- d4 = (d4 - blk->yMin) / (blk->yMax - blk->yMin);
- break;
- case 1:
- d2 = line->xMax;
- d3 = line->xMin;
- d4 = line->base;
- d0 = (d0 - blk->yMin) / (blk->yMax - blk->yMin);
- d1 = (d1 - blk->yMin) / (blk->yMax - blk->yMin);
- d2 = (blk->xMax - d2) / (blk->xMax - blk->xMin);
- d3 = (blk->xMax - d3) / (blk->xMax - blk->xMin);
- d4 = (blk->xMax - d4) / (blk->xMax - blk->xMin);
- break;
- case 2:
- d2 = line->yMax;
- d3 = line->yMin;
- d4 = line->base;
- d0 = (blk->xMax - d0) / (blk->xMax - blk->xMin);
- d1 = (blk->xMax - d1) / (blk->xMax - blk->xMin);
- d2 = (blk->yMax - d2) / (blk->yMax - blk->yMin);
- d3 = (blk->yMax - d3) / (blk->yMax - blk->yMin);
- d4 = (blk->yMax - d4) / (blk->yMax - blk->yMin);
- break;
- case 3:
- d2 = line->xMin;
- d3 = line->xMax;
- d4 = line->base;
- d0 = (blk->yMax - d0) / (blk->yMax - blk->yMin);
- d1 = (blk->yMax - d1) / (blk->yMax - blk->yMin);
- d2 = (d2 - blk->xMin) / (blk->xMax - blk->xMin);
- d3 = (d3 - blk->xMin) / (blk->xMax - blk->xMin);
- d4 = (d4 - blk->xMin) / (blk->xMax - blk->xMin);
- break;
- }
-
- switch (line->blk->page->primaryRot) {
- case 0:
- xMin = blk->xMin + d0 * (blk->xMax - blk->xMin);
- xMax = blk->xMin + d1 * (blk->xMax - blk->xMin);
- yMin = blk->yMin + d2 * (blk->yMax - blk->yMin);
- yMax = blk->yMin + d3 * (blk->yMax - blk->yMin);
- base = blk->yMin + base * (blk->yMax - blk->yMin);
- break;
- case 1:
- xMin = blk->xMax - d3 * (blk->xMax - blk->xMin);
- xMax = blk->xMax - d2 * (blk->xMax - blk->xMin);
- yMin = blk->yMin + d0 * (blk->yMax - blk->yMin);
- yMax = blk->yMin + d1 * (blk->yMax - blk->yMin);
- base = blk->xMax - d4 * (blk->xMax - blk->xMin);
- break;
- case 2:
- xMin = blk->xMax - d1 * (blk->xMax - blk->xMin);
- xMax = blk->xMax - d0 * (blk->xMax - blk->xMin);
- yMin = blk->yMax - d3 * (blk->yMax - blk->yMin);
- yMax = blk->yMax - d2 * (blk->yMax - blk->yMin);
- base = blk->yMax - d4 * (blk->yMax - blk->yMin);
- break;
- case 3:
- xMin = blk->xMin + d2 * (blk->xMax - blk->xMin);
- xMax = blk->xMin + d3 * (blk->xMax - blk->xMin);
- yMin = blk->yMax - d1 * (blk->yMax - blk->yMin);
- yMax = blk->yMax - d0 * (blk->yMax - blk->yMin);
- base = blk->xMin + d4 * (blk->xMax - blk->xMin);
- break;
- }
-
- }
- }
-}
-
-int TextLineFrag::cmpYXPrimaryRot(const void *p1, const void *p2) {
- TextLineFrag *frag1 = (TextLineFrag *)p1;
- TextLineFrag *frag2 = (TextLineFrag *)p2;
- double cmp;
-
- cmp = 0; // make gcc happy
- switch (frag1->line->blk->page->primaryRot) {
- case 0:
- if ((cmp = frag1->yMin - frag2->yMin) == 0) {
- cmp = frag1->xMin - frag2->xMin;
- }
- break;
- case 1:
- if ((cmp = frag2->xMax - frag1->xMax) == 0) {
- cmp = frag1->yMin - frag2->yMin;
- }
- break;
- case 2:
- if ((cmp = frag2->yMin - frag1->yMin) == 0) {
- cmp = frag2->xMax - frag1->xMax;
- }
- break;
- case 3:
- if ((cmp = frag1->xMax - frag2->xMax) == 0) {
- cmp = frag2->yMax - frag1->yMax;
- }
- break;
- }
- return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
-}
-
-int TextLineFrag::cmpYXLineRot(const void *p1, const void *p2) {
- TextLineFrag *frag1 = (TextLineFrag *)p1;
- TextLineFrag *frag2 = (TextLineFrag *)p2;
- double cmp;
-
- cmp = 0; // make gcc happy
- switch (frag1->line->rot) {
- case 0:
- if ((cmp = frag1->yMin - frag2->yMin) == 0) {
- cmp = frag1->xMin - frag2->xMin;
- }
- break;
- case 1:
- if ((cmp = frag2->xMax - frag1->xMax) == 0) {
- cmp = frag1->yMin - frag2->yMin;
- }
- break;
- case 2:
- if ((cmp = frag2->yMin - frag1->yMin) == 0) {
- cmp = frag2->xMax - frag1->xMax;
- }
- break;
- case 3:
- if ((cmp = frag1->xMax - frag2->xMax) == 0) {
- cmp = frag2->yMax - frag1->yMax;
- }
- break;
- }
- return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
-}
-
-int TextLineFrag::cmpXYLineRot(const void *p1, const void *p2) {
- TextLineFrag *frag1 = (TextLineFrag *)p1;
- TextLineFrag *frag2 = (TextLineFrag *)p2;
- double cmp;
-
- cmp = 0; // make gcc happy
- switch (frag1->line->rot) {
- case 0:
- if ((cmp = frag1->xMin - frag2->xMin) == 0) {
- cmp = frag1->yMin - frag2->yMin;
- }
- break;
- case 1:
- if ((cmp = frag1->yMin - frag2->yMin) == 0) {
- cmp = frag2->xMax - frag1->xMax;
- }
- break;
- case 2:
- if ((cmp = frag2->xMax - frag1->xMax) == 0) {
- cmp = frag2->yMin - frag1->yMin;
- }
- break;
- case 3:
- if ((cmp = frag2->yMax - frag1->yMax) == 0) {
- cmp = frag1->xMax - frag2->xMax;
- }
- break;
- }
- return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
-}
-
-//------------------------------------------------------------------------
-// TextBlock
-//------------------------------------------------------------------------
-
-TextBlock::TextBlock(TextPage *pageA, int rotA) {
- page = pageA;
- rot = rotA;
- xMin = yMin = 0;
- xMax = yMax = -1;
- priMin = 0;
- priMax = page->pageWidth;
- pool = new TextPool();
- lines = NULL;
- curLine = NULL;
- next = NULL;
- stackNext = NULL;
-}
-
-TextBlock::~TextBlock() {
- TextLine *line;
-
- delete pool;
- while (lines) {
- line = lines;
- lines = lines->next;
- delete line;
- }
-}
-
-void TextBlock::addWord(TextWord *word) {
- pool->addWord(word);
- if (xMin > xMax) {
- xMin = word->xMin;
- xMax = word->xMax;
- yMin = word->yMin;
- yMax = word->yMax;
- } else {
- if (word->xMin < xMin) {
- xMin = word->xMin;
- }
- if (word->xMax > xMax) {
- xMax = word->xMax;
- }
- if (word->yMin < yMin) {
- yMin = word->yMin;
- }
- if (word->yMax > yMax) {
- yMax = word->yMax;
- }
- }
-}
-
-void TextBlock::coalesce(UnicodeMap *uMap) {
- TextWord *word0, *word1, *word2, *bestWord0, *bestWord1, *lastWord;
- TextLine *line, *line0, *line1;
- int poolMinBaseIdx, startBaseIdx, minBaseIdx, maxBaseIdx;
- int baseIdx, bestWordBaseIdx, idx0, idx1;
- double minBase, maxBase;
- double fontSize, delta, priDelta, secDelta;
- TextLine **lineArray;
- GBool found;
- int col1, col2;
- int i, j, k;
-
- // discard duplicated text (fake boldface, drop shadows)
- for (idx0 = pool->minBaseIdx; idx0 <= pool->maxBaseIdx; ++idx0) {
- word0 = pool->getPool(idx0);
- while (word0) {
- priDelta = dupMaxPriDelta * word0->fontSize;
- secDelta = dupMaxSecDelta * word0->fontSize;
- if (rot == 0 || rot == 3) {
- maxBaseIdx = pool->getBaseIdx(word0->base + secDelta);
- } else {
- maxBaseIdx = pool->getBaseIdx(word0->base - secDelta);
- }
- found = gFalse;
- word1 = word2 = NULL; // make gcc happy
- for (idx1 = idx0; idx1 <= maxBaseIdx; ++idx1) {
- if (idx1 == idx0) {
- word1 = word0;
- word2 = word0->next;
- } else {
- word1 = NULL;
- word2 = pool->getPool(idx1);
- }
- for (; word2; word1 = word2, word2 = word2->next) {
- if (word2->len == word0->len &&
- !memcmp(word2->text, word0->text,
- word0->len * sizeof(Unicode))) {
- switch (rot) {
- case 0:
- case 2:
- found = fabs(word0->xMin - word2->xMin) < priDelta &&
- fabs(word0->xMax - word2->xMax) < priDelta &&
- fabs(word0->yMin - word2->yMin) < secDelta &&
- fabs(word0->yMax - word2->yMax) < secDelta;
- break;
- case 1:
- case 3:
- found = fabs(word0->xMin - word2->xMin) < secDelta &&
- fabs(word0->xMax - word2->xMax) < secDelta &&
- fabs(word0->yMin - word2->yMin) < priDelta &&
- fabs(word0->yMax - word2->yMax) < priDelta;
- break;
- }
- }
- if (found) {
- break;
- }
- }
- if (found) {
- break;
- }
- }
- if (found) {
- if (word1) {
- word1->next = word2->next;
- } else {
- pool->setPool(idx1, word2->next);
- }
- delete word2;
- } else {
- word0 = word0->next;
- }
- }
- }
-
- // build the lines
- curLine = NULL;
- poolMinBaseIdx = pool->minBaseIdx;
- charCount = 0;
- nLines = 0;
- while (1) {
-
- // find the first non-empty line in the pool
- for (;
- poolMinBaseIdx <= pool->maxBaseIdx && !pool->getPool(poolMinBaseIdx);
- ++poolMinBaseIdx) ;
- if (poolMinBaseIdx > pool->maxBaseIdx) {
- break;
- }
-
- // look for the left-most word in the first four lines of the
- // pool -- this avoids starting with a superscript word
- startBaseIdx = poolMinBaseIdx;
- for (baseIdx = poolMinBaseIdx + 1;
- baseIdx < poolMinBaseIdx + 4 && baseIdx <= pool->maxBaseIdx;
- ++baseIdx) {
- if (!pool->getPool(baseIdx)) {
- continue;
- }
- if (pool->getPool(baseIdx)->primaryCmp(pool->getPool(startBaseIdx))
- < 0) {
- startBaseIdx = baseIdx;
- }
- }
-
- // create a new line
- word0 = pool->getPool(startBaseIdx);
- pool->setPool(startBaseIdx, word0->next);
- word0->next = NULL;
- line = new TextLine(this, word0->rot, word0->base);
- line->addWord(word0);
- lastWord = word0;
-
- // compute the search range
- fontSize = word0->fontSize;
- minBase = word0->base - maxIntraLineDelta * fontSize;
- maxBase = word0->base + maxIntraLineDelta * fontSize;
- minBaseIdx = pool->getBaseIdx(minBase);
- maxBaseIdx = pool->getBaseIdx(maxBase);
-
- // find the rest of the words in this line
- while (1) {
-
- // find the left-most word whose baseline is in the range for
- // this line
- bestWordBaseIdx = 0;
- bestWord0 = bestWord1 = NULL;
- for (baseIdx = minBaseIdx; baseIdx <= maxBaseIdx; ++baseIdx) {
- for (word0 = NULL, word1 = pool->getPool(baseIdx);
- word1;
- word0 = word1, word1 = word1->next) {
- if (word1->base >= minBase &&
- word1->base <= maxBase &&
- (delta = lastWord->primaryDelta(word1)) >=
- minCharSpacing * fontSize) {
- if (delta < maxWordSpacing * fontSize &&
- (!bestWord1 || word1->primaryCmp(bestWord1) < 0)) {
- bestWordBaseIdx = baseIdx;
- bestWord0 = word0;
- bestWord1 = word1;
- }
- break;
- }
- }
- }
- if (!bestWord1) {
- break;
- }
-
- // remove it from the pool, and add it to the line
- if (bestWord0) {
- bestWord0->next = bestWord1->next;
- } else {
- pool->setPool(bestWordBaseIdx, bestWord1->next);
- }
- bestWord1->next = NULL;
- line->addWord(bestWord1);
- lastWord = bestWord1;
- }
-
- // add the line
- if (curLine && line->cmpYX(curLine) > 0) {
- line0 = curLine;
- line1 = curLine->next;
- } else {
- line0 = NULL;
- line1 = lines;
- }
- for (;
- line1 && line->cmpYX(line1) > 0;
- line0 = line1, line1 = line1->next) ;
- if (line0) {
- line0->next = line;
- } else {
- lines = line;
- }
- line->next = line1;
- curLine = line;
- line->coalesce(uMap);
- charCount += line->len;
- ++nLines;
- }
-
- // sort lines into xy order for column assignment
- lineArray = (TextLine **)gmalloc(nLines * sizeof(TextLine *));
- for (line = lines, i = 0; line; line = line->next, ++i) {
- lineArray[i] = line;
- }
- qsort(lineArray, nLines, sizeof(TextLine *), &TextLine::cmpXY);
-
- // column assignment
- nColumns = 0;
- for (i = 0; i < nLines; ++i) {
- line0 = lineArray[i];
- col1 = 0;
- for (j = 0; j < i; ++j) {
- line1 = lineArray[j];
- if (line1->primaryDelta(line0) >= 0) {
- col2 = line1->col[line1->len] + 1;
- } else {
- k = 0; // make gcc happy
- switch (rot) {
- case 0:
- for (k = 0;
- k < line1->len &&
- line0->xMin >= 0.5 * (line1->edge[k] + line1->edge[k+1]);
- ++k) ;
- break;
- case 1:
- for (k = 0;
- k < line1->len &&
- line0->yMin >= 0.5 * (line1->edge[k] + line1->edge[k+1]);
- ++k) ;
- break;
- case 2:
- for (k = 0;
- k < line1->len &&
- line0->xMax <= 0.5 * (line1->edge[k] + line1->edge[k+1]);
- ++k) ;
- break;
- case 3:
- for (k = 0;
- k < line1->len &&
- line0->yMax <= 0.5 * (line1->edge[k] + line1->edge[k+1]);
- ++k) ;
- break;
- }
- col2 = line1->col[k];
- }
- if (col2 > col1) {
- col1 = col2;
- }
- }
- for (k = 0; k <= line0->len; ++k) {
- line0->col[k] += col1;
- }
- if (line0->col[line0->len] > nColumns) {
- nColumns = line0->col[line0->len];
- }
- }
- gfree(lineArray);
-}
-
-void TextBlock::updatePriMinMax(TextBlock *blk) {
- double newPriMin, newPriMax;
- GBool gotPriMin, gotPriMax;
-
- gotPriMin = gotPriMax = gFalse;
- newPriMin = newPriMax = 0; // make gcc happy
- switch (page->primaryRot) {
- case 0:
- case 2:
- if (blk->yMin < yMax && blk->yMax > yMin) {
- if (blk->xMin < xMin) {
- newPriMin = blk->xMax;
- gotPriMin = gTrue;
- }
- if (blk->xMax > xMax) {
- newPriMax = blk->xMin;
- gotPriMax = gTrue;
- }
- }
- break;
- case 1:
- case 3:
- if (blk->xMin < xMax && blk->xMax > xMin) {
- if (blk->yMin < yMin) {
- newPriMin = blk->yMax;
- gotPriMin = gTrue;
- }
- if (blk->yMax > yMax) {
- newPriMax = blk->yMin;
- gotPriMax = gTrue;
- }
- }
- break;
- }
- if (gotPriMin) {
- if (newPriMin > xMin) {
- newPriMin = xMin;
- }
- if (newPriMin > priMin) {
- priMin = newPriMin;
- }
- }
- if (gotPriMax) {
- if (newPriMax < xMax) {
- newPriMax = xMax;
- }
- if (newPriMax < priMax) {
- priMax = newPriMax;
- }
- }
-}
-
-int TextBlock::cmpXYPrimaryRot(const void *p1, const void *p2) {
- TextBlock *blk1 = *(TextBlock **)p1;
- TextBlock *blk2 = *(TextBlock **)p2;
- double cmp;
-
- cmp = 0; // make gcc happy
- switch (blk1->page->primaryRot) {
- case 0:
- if ((cmp = blk1->xMin - blk2->xMin) == 0) {
- cmp = blk1->yMin - blk2->yMin;
- }
- break;
- case 1:
- if ((cmp = blk1->yMin - blk2->yMin) == 0) {
- cmp = blk2->xMax - blk1->xMax;
- }
- break;
- case 2:
- if ((cmp = blk2->xMax - blk1->xMax) == 0) {
- cmp = blk2->yMin - blk1->yMin;
- }
- break;
- case 3:
- if ((cmp = blk2->yMax - blk1->yMax) == 0) {
- cmp = blk1->xMax - blk2->xMax;
- }
- break;
- }
- return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
-}
-
-int TextBlock::cmpYXPrimaryRot(const void *p1, const void *p2) {
- TextBlock *blk1 = *(TextBlock **)p1;
- TextBlock *blk2 = *(TextBlock **)p2;
- double cmp;
-
- cmp = 0; // make gcc happy
- switch (blk1->page->primaryRot) {
- case 0:
- if ((cmp = blk1->yMin - blk2->yMin) == 0) {
- cmp = blk1->xMin - blk2->xMin;
- }
- break;
- case 1:
- if ((cmp = blk2->xMax - blk1->xMax) == 0) {
- cmp = blk1->yMin - blk2->yMin;
- }
- break;
- case 2:
- if ((cmp = blk2->yMin - blk1->yMin) == 0) {
- cmp = blk2->xMax - blk1->xMax;
- }
- break;
- case 3:
- if ((cmp = blk1->xMax - blk2->xMax) == 0) {
- cmp = blk2->yMax - blk1->yMax;
- }
- break;
- }
- return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
-}
-
-int TextBlock::primaryCmp(TextBlock *blk) {
- double cmp;
-
- cmp = 0; // make gcc happy
- switch (rot) {
- case 0:
- cmp = xMin - blk->xMin;
- break;
- case 1:
- cmp = yMin - blk->yMin;
- break;
- case 2:
- cmp = blk->xMax - xMax;
- break;
- case 3:
- cmp = blk->yMax - yMax;
- break;
- }
- return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
-}
-
-double TextBlock::secondaryDelta(TextBlock *blk) {
- double delta;
-
- delta = 0; // make gcc happy
- switch (rot) {
- case 0:
- delta = blk->yMin - yMax;
- break;
- case 1:
- delta = xMin - blk->xMax;
- break;
- case 2:
- delta = yMin - blk->yMax;
- break;
- case 3:
- delta = blk->xMin - xMax;
- break;
- }
- return delta;
-}
-
-GBool TextBlock::isBelow(TextBlock *blk) {
- GBool below;
-
- below = gFalse; // make gcc happy
- switch (page->primaryRot) {
- case 0:
- below = xMin >= blk->priMin && xMax <= blk->priMax &&
- yMin > blk->yMin;
- break;
- case 1:
- below = yMin >= blk->priMin && yMax <= blk->priMax &&
- xMax < blk->xMax;
- break;
- case 2:
- below = xMin >= blk->priMin && xMax <= blk->priMax &&
- yMax < blk->yMax;
- break;
- case 3:
- below = yMin >= blk->priMin && yMax <= blk->priMax &&
- xMin > blk->xMin;
- break;
- }
-
- return below;
-}
-
-//------------------------------------------------------------------------
-// TextFlow
-//------------------------------------------------------------------------
-
-TextFlow::TextFlow(TextPage *pageA, TextBlock *blk) {
- page = pageA;
- xMin = blk->xMin;
- xMax = blk->xMax;
- yMin = blk->yMin;
- yMax = blk->yMax;
- priMin = blk->priMin;
- priMax = blk->priMax;
- blocks = lastBlk = blk;
- next = NULL;
-}
-
-TextFlow::~TextFlow() {
- TextBlock *blk;
-
- while (blocks) {
- blk = blocks;
- blocks = blocks->next;
- delete blk;
- }
-}
-
-void TextFlow::addBlock(TextBlock *blk) {
- if (lastBlk) {
- lastBlk->next = blk;
- } else {
- blocks = blk;
- }
- lastBlk = blk;
- if (blk->xMin < xMin) {
- xMin = blk->xMin;
- }
- if (blk->xMax > xMax) {
- xMax = blk->xMax;
- }
- if (blk->yMin < yMin) {
- yMin = blk->yMin;
- }
- if (blk->yMax > yMax) {
- yMax = blk->yMax;
- }
-}
-
-GBool TextFlow::blockFits(TextBlock *blk, TextBlock *prevBlk) {
- GBool fits;
-
- // lower blocks must use smaller fonts
- if (blk->lines->words->fontSize > lastBlk->lines->words->fontSize) {
- return gFalse;
- }
-
- fits = gFalse; // make gcc happy
- switch (page->primaryRot) {
- case 0:
- fits = blk->xMin >= priMin && blk->xMax <= priMax;
- break;
- case 1:
- fits = blk->yMin >= priMin && blk->yMax <= priMax;
- break;
- case 2:
- fits = blk->xMin >= priMin && blk->xMax <= priMax;
- break;
- case 3:
- fits = blk->yMin >= priMin && blk->yMax <= priMax;
- break;
- }
- return fits;
-}
-
-#if TEXTOUT_WORD_LIST
-
-//------------------------------------------------------------------------
-// TextWordList
-//------------------------------------------------------------------------
-
-TextWordList::TextWordList(TextPage *text, GBool physLayout) {
- TextFlow *flow;
- TextBlock *blk;
- TextLine *line;
- TextWord *word;
- TextWord **wordArray;
- int nWords, i;
-
- words = new GList();
-
- if (text->rawOrder) {
- for (word = text->rawWords; word; word = word->next) {
- words->append(word);
- }
-
- } else if (physLayout) {
- // this is inefficient, but it's also the least useful of these
- // three cases
- nWords = 0;
- for (flow = text->flows; flow; flow = flow->next) {
- for (blk = flow->blocks; blk; blk = blk->next) {
- for (line = blk->lines; line; line = line->next) {
- for (word = line->words; word; word = word->next) {
- ++nWords;
- }
- }
- }
- }
- wordArray = (TextWord **)gmalloc(nWords * sizeof(TextWord *));
- i = 0;
- for (flow = text->flows; flow; flow = flow->next) {
- for (blk = flow->blocks; blk; blk = blk->next) {
- for (line = blk->lines; line; line = line->next) {
- for (word = line->words; word; word = word->next) {
- wordArray[i++] = word;
- }
- }
- }
- }
- qsort(wordArray, nWords, sizeof(TextWord *), &TextWord::cmpYX);
- for (i = 0; i < nWords; ++i) {
- words->append(wordArray[i]);
- }
- gfree(wordArray);
-
- } else {
- for (flow = text->flows; flow; flow = flow->next) {
- for (blk = flow->blocks; blk; blk = blk->next) {
- for (line = blk->lines; line; line = line->next) {
- for (word = line->words; word; word = word->next) {
- words->append(word);
- }
- }
- }
- }
- }
-}
-
-TextWordList::~TextWordList() {
- delete words;
-}
-
-int TextWordList::getLength() {
- return words->getLength();
-}
-
-TextWord *TextWordList::get(int idx) {
- if (idx < 0 || idx >= words->getLength()) {
- return NULL;
- }
- return (TextWord *)words->get(idx);
-}
-
-#endif // TEXTOUT_WORD_LIST
-
-//------------------------------------------------------------------------
-// TextPage
-//------------------------------------------------------------------------
-
-TextPage::TextPage(GBool rawOrderA) {
- int rot;
-
- rawOrder = rawOrderA;
- curWord = NULL;
- charPos = 0;
- curFont = NULL;
- curFontSize = 0;
- nest = 0;
- nTinyChars = 0;
- lastCharOverlap = gFalse;
- if (!rawOrder) {
- for (rot = 0; rot < 4; ++rot) {
- pools[rot] = new TextPool();
- }
- }
- flows = NULL;
- blocks = NULL;
- rawWords = NULL;
- rawLastWord = NULL;
- fonts = new GList();
- lastFindXMin = lastFindYMin = 0;
- haveLastFind = gFalse;
-}
-
-TextPage::~TextPage() {
- int rot;
-
- clear();
- if (!rawOrder) {
- for (rot = 0; rot < 4; ++rot) {
- delete pools[rot];
- }
- }
- delete fonts;
-}
-
-void TextPage::startPage(GfxState *state) {
- clear();
- if (state) {
- pageWidth = state->getPageWidth();
- pageHeight = state->getPageHeight();
- } else {
- pageWidth = pageHeight = 0;
- }
-}
-
-void TextPage::endPage() {
- if (curWord) {
- endWord();
- }
-}
-
-void TextPage::clear() {
- int rot;
- TextFlow *flow;
- TextWord *word;
-
- if (curWord) {
- delete curWord;
- curWord = NULL;
- }
- if (rawOrder) {
- while (rawWords) {
- word = rawWords;
- rawWords = rawWords->next;
- delete word;
- }
- } else {
- for (rot = 0; rot < 4; ++rot) {
- delete pools[rot];
- }
- while (flows) {
- flow = flows;
- flows = flows->next;
- delete flow;
- }
- gfree(blocks);
- }
- deleteGList(fonts, TextFontInfo);
-
- curWord = NULL;
- charPos = 0;
- curFont = NULL;
- curFontSize = 0;
- nest = 0;
- nTinyChars = 0;
- if (!rawOrder) {
- for (rot = 0; rot < 4; ++rot) {
- pools[rot] = new TextPool();
- }
- }
- flows = NULL;
- blocks = NULL;
- rawWords = NULL;
- rawLastWord = NULL;
- fonts = new GList();
-}
-
-void TextPage::updateFont(GfxState *state) {
- GfxFont *gfxFont;
- double *fm;
- char *name;
- int code, mCode, letterCode, anyCode;
- double w;
- int i;
-
- // get the font info object
- curFont = NULL;
- for (i = 0; i < fonts->getLength(); ++i) {
- curFont = (TextFontInfo *)fonts->get(i);
- if (curFont->matches(state)) {
- break;
- }
- curFont = NULL;
- }
- if (!curFont) {
- curFont = new TextFontInfo(state);
- fonts->append(curFont);
- }
-
- // adjust the font size
- gfxFont = state->getFont();
- curFontSize = state->getTransformedFontSize();
- if (gfxFont && gfxFont->getType() == fontType3) {
- // This is a hack which makes it possible to deal with some Type 3
- // fonts. The problem is that it's impossible to know what the
- // base coordinate system used in the font is without actually
- // rendering the font. This code tries to guess by looking at the
- // width of the character 'm' (which breaks if the font is a
- // subset that doesn't contain 'm').
- mCode = letterCode = anyCode = -1;
- for (code = 0; code < 256; ++code) {
- name = ((Gfx8BitFont *)gfxFont)->getCharName(code);
- if (name && name[0] == 'm' && name[1] == '\0') {
- mCode = code;
- }
- if (letterCode < 0 && name && name[1] == '\0' &&
- ((name[0] >= 'A' && name[0] <= 'Z') ||
- (name[0] >= 'a' && name[0] <= 'z'))) {
- letterCode = code;
- }
- if (anyCode < 0 && name &&
- ((Gfx8BitFont *)gfxFont)->getWidth(code) > 0) {
- anyCode = code;
- }
- }
- if (mCode >= 0 &&
- (w = ((Gfx8BitFont *)gfxFont)->getWidth(mCode)) > 0) {
- // 0.6 is a generic average 'm' width -- yes, this is a hack
- curFontSize *= w / 0.6;
- } else if (letterCode >= 0 &&
- (w = ((Gfx8BitFont *)gfxFont)->getWidth(letterCode)) > 0) {
- // even more of a hack: 0.5 is a generic letter width
- curFontSize *= w / 0.5;
- } else if (anyCode >= 0 &&
- (w = ((Gfx8BitFont *)gfxFont)->getWidth(anyCode)) > 0) {
- // better than nothing: 0.5 is a generic character width
- curFontSize *= w / 0.5;
- }
- fm = gfxFont->getFontMatrix();
- if (fm[0] != 0) {
- curFontSize *= fabs(fm[3] / fm[0]);
- }
- }
-}
-
-void TextPage::beginWord(GfxState *state, double x0, double y0) {
- double *txtm, *ctm, *fontm;
- double m[4], m2[4];
- int rot;
-
- // This check is needed because Type 3 characters can contain
- // text-drawing operations (when TextPage is being used via
- // {X,Win}SplashOutputDev rather than TextOutputDev).
- if (curWord) {
- ++nest;
- return;
- }
-
- // compute the rotation
- txtm = state->getTextMat();
- ctm = state->getCTM();
- m[0] = txtm[0] * ctm[0] + txtm[1] * ctm[2];
- m[1] = txtm[0] * ctm[1] + txtm[1] * ctm[3];
- m[2] = txtm[2] * ctm[0] + txtm[3] * ctm[2];
- m[3] = txtm[2] * ctm[1] + txtm[3] * ctm[3];
- if (state->getFont()->getType() == fontType3) {
- fontm = state->getFont()->getFontMatrix();
- m2[0] = fontm[0] * m[0] + fontm[1] * m[2];
- m2[1] = fontm[0] * m[1] + fontm[1] * m[3];
- m2[2] = fontm[2] * m[0] + fontm[3] * m[2];
- m2[3] = fontm[2] * m[1] + fontm[3] * m[3];
- m[0] = m2[0];
- m[1] = m2[1];
- m[2] = m2[2];
- m[3] = m2[3];
- }
- if (fabs(m[0] * m[3]) > fabs(m[1] * m[2])) {
- rot = (m[3] < 0) ? 0 : 2;
- } else {
- rot = (m[2] > 0) ? 1 : 3;
- }
-
- curWord = new TextWord(state, rot, x0, y0, charPos, curFont, curFontSize);
-}
-
-void TextPage::addChar(GfxState *state, double x, double y,
- double dx, double dy,
- CharCode c, Unicode *u, int uLen) {
- double x1, y1, w1, h1, dx2, dy2, base, sp;
- int i;
-
- // if the previous char was a space, addChar will have called
- // endWord, so we need to start a new word
- if (!curWord) {
- beginWord(state, x, y);
- }
-
- // throw away chars that aren't inside the page bounds
- state->transform(x, y, &x1, &y1);
- if (x1 < 0 || x1 > pageWidth ||
- y1 < 0 || y1 > pageHeight) {
- return;
- }
-
- // subtract char and word spacing from the dx,dy values
- sp = state->getCharSpace();
- if (c == (CharCode)0x20) {
- sp += state->getWordSpace();
- }
- state->textTransformDelta(sp * state->getHorizScaling(), 0, &dx2, &dy2);
- dx -= dx2;
- dy -= dy2;
- state->transformDelta(dx, dy, &w1, &h1);
-
- // check the tiny chars limit
- if (!globalParams->getTextKeepTinyChars() &&
- fabs(w1) < 3 && fabs(h1) < 3) {
- if (++nTinyChars > 50000) {
- return;
- }
- }
-
- // break words at space character
- if (uLen == 1 && u[0] == (Unicode)0x20) {
- ++curWord->charLen;
- ++charPos;
- endWord();
- return;
- }
-
- // start a new word if:
- // (1) this character's baseline doesn't match the current word's
- // baseline, or
- // (2) there is space between the end of the current word and this
- // character, or
- // (3) this character overlaps the previous one (duplicated text), or
- // (4) the previous character was an overlap (we want each duplicated
- // characters to be in a word by itself)
- base = sp = 0; // make gcc happy
- if (curWord->len > 0) {
- switch (curWord->rot) {
- case 0:
- base = y1;
- sp = x1 - curWord->xMax;
- break;
- case 1:
- base = x1;
- sp = y1 - curWord->yMax;
- break;
- case 2:
- base = y1;
- sp = curWord->xMin - x1;
- break;
- case 3:
- base = x1;
- sp = curWord->yMin - y1;
- break;
- }
- if (fabs(base - curWord->base) > 0.5 ||
- sp > minWordBreakSpace * curWord->fontSize ||
- sp < -minDupBreakOverlap * curWord->fontSize ||
- lastCharOverlap) {
- lastCharOverlap = gTrue;
- endWord();
- beginWord(state, x, y);
- } else {
- lastCharOverlap = gFalse;
- }
- } else {
- lastCharOverlap = gFalse;
- }
-
- // page rotation and/or transform matrices can cause text to be
- // drawn in reverse order -- in this case, swap the begin/end
- // coordinates and break text into individual chars
- if ((curWord->rot == 0 && w1 < 0) ||
- (curWord->rot == 1 && h1 < 0) ||
- (curWord->rot == 2 && w1 > 0) ||
- (curWord->rot == 3 && h1 > 0)) {
- endWord();
- beginWord(state, x + dx, y + dy);
- x1 += w1;
- y1 += h1;
- w1 = -w1;
- h1 = -h1;
- }
-
- // add the characters to the current word
- if (uLen != 0) {
- w1 /= uLen;
- h1 /= uLen;
- }
- for (i = 0; i < uLen; ++i) {
- curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u[i]);
- }
- ++curWord->charLen;
- ++charPos;
-}
-
-void TextPage::endWord() {
- // This check is needed because Type 3 characters can contain
- // text-drawing operations (when TextPage is being used via
- // {X,Win}SplashOutputDev rather than TextOutputDev).
- if (nest > 0) {
- --nest;
- return;
- }
-
- if (curWord) {
- addWord(curWord);
- curWord = NULL;
- }
-}
-
-void TextPage::addWord(TextWord *word) {
- // throw away zero-length words -- they don't have valid xMin/xMax
- // values, and they're useless anyway
- if (word->len == 0) {
- delete word;
- return;
- }
-
- if (rawOrder) {
- if (rawLastWord) {
- rawLastWord->next = word;
- } else {
- rawWords = word;
- }
- rawLastWord = word;
- } else {
- pools[word->rot]->addWord(word);
- }
-}
-
-void TextPage::coalesce(GBool physLayout) {
- UnicodeMap *uMap;
- TextPool *pool;
- TextWord *word0, *word1, *word2;
- TextLine *line;
- TextBlock *blkList, *blkStack, *blk, *lastBlk, *blk0, *blk1;
- TextBlock **blkArray;
- TextFlow *flow, *lastFlow;
- int rot, poolMinBaseIdx, baseIdx, startBaseIdx;
- double minBase, maxBase, newMinBase, newMaxBase;
- double fontSize, colSpace1, colSpace2, lineSpace, intraLineSpace, blkSpace;
- GBool found;
- int count[4];
- int lrCount;
- int firstBlkIdx, nBlocksLeft;
- int col1, col2;
- int i, j, n;
-
- if (rawOrder) {
- primaryRot = 0;
- primaryLR = gTrue;
- return;
- }
-
- uMap = globalParams->getTextEncoding();
- blkList = NULL;
- lastBlk = NULL;
- nBlocks = 0;
- primaryRot = -1;
-
-#if 0 // for debugging
- printf("*** initial words ***\n");
- for (rot = 0; rot < 4; ++rot) {
- pool = pools[rot];
- for (baseIdx = pool->minBaseIdx; baseIdx <= pool->maxBaseIdx; ++baseIdx) {
- for (word0 = pool->getPool(baseIdx); word0; word0 = word0->next) {
- printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSize=%.2f '",
- word0->xMin, word0->xMax, word0->yMin, word0->yMax,
- word0->base, word0->fontSize);
- for (i = 0; i < word0->len; ++i) {
- fputc(word0->text[i] & 0xff, stdout);
- }
- printf("'\n");
- }
- }
- }
- printf("\n");
-#endif
-
- //----- assemble the blocks
-
- //~ add an outer loop for writing mode (vertical text)
-
- // build blocks for each rotation value
- for (rot = 0; rot < 4; ++rot) {
- pool = pools[rot];
- poolMinBaseIdx = pool->minBaseIdx;
- count[rot] = 0;
-
- // add blocks until no more words are left
- while (1) {
-
- // find the first non-empty line in the pool
- for (;
- poolMinBaseIdx <= pool->maxBaseIdx &&
- !pool->getPool(poolMinBaseIdx);
- ++poolMinBaseIdx) ;
- if (poolMinBaseIdx > pool->maxBaseIdx) {
- break;
- }
-
- // look for the left-most word in the first four lines of the
- // pool -- this avoids starting with a superscript word
- startBaseIdx = poolMinBaseIdx;
- for (baseIdx = poolMinBaseIdx + 1;
- baseIdx < poolMinBaseIdx + 4 && baseIdx <= pool->maxBaseIdx;
- ++baseIdx) {
- if (!pool->getPool(baseIdx)) {
- continue;
- }
- if (pool->getPool(baseIdx)->primaryCmp(pool->getPool(startBaseIdx))
- < 0) {
- startBaseIdx = baseIdx;
- }
- }
-
- // create a new block
- word0 = pool->getPool(startBaseIdx);
- pool->setPool(startBaseIdx, word0->next);
- word0->next = NULL;
- blk = new TextBlock(this, rot);
- blk->addWord(word0);
-
- fontSize = word0->fontSize;
- minBase = maxBase = word0->base;
- colSpace1 = minColSpacing1 * fontSize;
- colSpace2 = minColSpacing2 * fontSize;
- lineSpace = maxLineSpacingDelta * fontSize;
- intraLineSpace = maxIntraLineDelta * fontSize;
-
- // add words to the block
- do {
- found = gFalse;
-
- // look for words on the line above the current top edge of
- // the block
- newMinBase = minBase;
- for (baseIdx = pool->getBaseIdx(minBase);
- baseIdx >= pool->getBaseIdx(minBase - lineSpace);
- --baseIdx) {
- word0 = NULL;
- word1 = pool->getPool(baseIdx);
- while (word1) {
- if (word1->base < minBase &&
- word1->base >= minBase - lineSpace &&
- ((rot == 0 || rot == 2)
- ? (word1->xMin < blk->xMax && word1->xMax > blk->xMin)
- : (word1->yMin < blk->yMax && word1->yMax > blk->yMin)) &&
- fabs(word1->fontSize - fontSize) <
- maxBlockFontSizeDelta1 * fontSize) {
- word2 = word1;
- if (word0) {
- word0->next = word1->next;
- } else {
- pool->setPool(baseIdx, word1->next);
- }
- word1 = word1->next;
- word2->next = NULL;
- blk->addWord(word2);
- found = gTrue;
- newMinBase = word2->base;
- } else {
- word0 = word1;
- word1 = word1->next;
- }
- }
- }
- minBase = newMinBase;
-
- // look for words on the line below the current bottom edge of
- // the block
- newMaxBase = maxBase;
- for (baseIdx = pool->getBaseIdx(maxBase);
- baseIdx <= pool->getBaseIdx(maxBase + lineSpace);
- ++baseIdx) {
- word0 = NULL;
- word1 = pool->getPool(baseIdx);
- while (word1) {
- if (word1->base > maxBase &&
- word1->base <= maxBase + lineSpace &&
- ((rot == 0 || rot == 2)
- ? (word1->xMin < blk->xMax && word1->xMax > blk->xMin)
- : (word1->yMin < blk->yMax && word1->yMax > blk->yMin)) &&
- fabs(word1->fontSize - fontSize) <
- maxBlockFontSizeDelta1 * fontSize) {
- word2 = word1;
- if (word0) {
- word0->next = word1->next;
- } else {
- pool->setPool(baseIdx, word1->next);
- }
- word1 = word1->next;
- word2->next = NULL;
- blk->addWord(word2);
- found = gTrue;
- newMaxBase = word2->base;
- } else {
- word0 = word1;
- word1 = word1->next;
- }
- }
- }
- maxBase = newMaxBase;
-
- // look for words that are on lines already in the block, and
- // that overlap the block horizontally
- for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace);
- baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace);
- ++baseIdx) {
- word0 = NULL;
- word1 = pool->getPool(baseIdx);
- while (word1) {
- if (word1->base >= minBase - intraLineSpace &&
- word1->base <= maxBase + intraLineSpace &&
- ((rot == 0 || rot == 2)
- ? (word1->xMin < blk->xMax + colSpace1 &&
- word1->xMax > blk->xMin - colSpace1)
- : (word1->yMin < blk->yMax + colSpace1 &&
- word1->yMax > blk->yMin - colSpace1)) &&
- fabs(word1->fontSize - fontSize) <
- maxBlockFontSizeDelta2 * fontSize) {
- word2 = word1;
- if (word0) {
- word0->next = word1->next;
- } else {
- pool->setPool(baseIdx, word1->next);
- }
- word1 = word1->next;
- word2->next = NULL;
- blk->addWord(word2);
- found = gTrue;
- } else {
- word0 = word1;
- word1 = word1->next;
- }
- }
- }
-
- // only check for outlying words (the next two chunks of code)
- // if we didn't find anything else
- if (found) {
- continue;
- }
-
- // scan down the left side of the block, looking for words
- // that are near (but not overlapping) the block; if there are
- // three or fewer, add them to the block
- n = 0;
- for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace);
- baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace);
- ++baseIdx) {
- word1 = pool->getPool(baseIdx);
- while (word1) {
- if (word1->base >= minBase - intraLineSpace &&
- word1->base <= maxBase + intraLineSpace &&
- ((rot == 0 || rot == 2)
- ? (word1->xMax <= blk->xMin &&
- word1->xMax > blk->xMin - colSpace2)
- : (word1->yMax <= blk->yMin &&
- word1->yMax > blk->yMin - colSpace2)) &&
- fabs(word1->fontSize - fontSize) <
- maxBlockFontSizeDelta3 * fontSize) {
- ++n;
- break;
- }
- word1 = word1->next;
- }
- }
- if (n > 0 && n <= 3) {
- for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace);
- baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace);
- ++baseIdx) {
- word0 = NULL;
- word1 = pool->getPool(baseIdx);
- while (word1) {
- if (word1->base >= minBase - intraLineSpace &&
- word1->base <= maxBase + intraLineSpace &&
- ((rot == 0 || rot == 2)
- ? (word1->xMax <= blk->xMin &&
- word1->xMax > blk->xMin - colSpace2)
- : (word1->yMax <= blk->yMin &&
- word1->yMax > blk->yMin - colSpace2)) &&
- fabs(word1->fontSize - fontSize) <
- maxBlockFontSizeDelta3 * fontSize) {
- word2 = word1;
- if (word0) {
- word0->next = word1->next;
- } else {
- pool->setPool(baseIdx, word1->next);
- }
- word1 = word1->next;
- word2->next = NULL;
- blk->addWord(word2);
- if (word2->base < minBase) {
- minBase = word2->base;
- } else if (word2->base > maxBase) {
- maxBase = word2->base;
- }
- found = gTrue;
- break;
- } else {
- word0 = word1;
- word1 = word1->next;
- }
- }
- }
- }
-
- // scan down the right side of the block, looking for words
- // that are near (but not overlapping) the block; if there are
- // three or fewer, add them to the block
- n = 0;
- for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace);
- baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace);
- ++baseIdx) {
- word1 = pool->getPool(baseIdx);
- while (word1) {
- if (word1->base >= minBase - intraLineSpace &&
- word1->base <= maxBase + intraLineSpace &&
- ((rot == 0 || rot == 2)
- ? (word1->xMin >= blk->xMax &&
- word1->xMin < blk->xMax + colSpace2)
- : (word1->yMin >= blk->yMax &&
- word1->yMin < blk->yMax + colSpace2)) &&
- fabs(word1->fontSize - fontSize) <
- maxBlockFontSizeDelta3 * fontSize) {
- ++n;
- break;
- }
- word1 = word1->next;
- }
- }
- if (n > 0 && n <= 3) {
- for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace);
- baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace);
- ++baseIdx) {
- word0 = NULL;
- word1 = pool->getPool(baseIdx);
- while (word1) {
- if (word1->base >= minBase - intraLineSpace &&
- word1->base <= maxBase + intraLineSpace &&
- ((rot == 0 || rot == 2)
- ? (word1->xMin >= blk->xMax &&
- word1->xMin < blk->xMax + colSpace2)
- : (word1->yMin >= blk->yMax &&
- word1->yMin < blk->yMax + colSpace2)) &&
- fabs(word1->fontSize - fontSize) <
- maxBlockFontSizeDelta3 * fontSize) {
- word2 = word1;
- if (word0) {
- word0->next = word1->next;
- } else {
- pool->setPool(baseIdx, word1->next);
- }
- word1 = word1->next;
- word2->next = NULL;
- blk->addWord(word2);
- if (word2->base < minBase) {
- minBase = word2->base;
- } else if (word2->base > maxBase) {
- maxBase = word2->base;
- }
- found = gTrue;
- break;
- } else {
- word0 = word1;
- word1 = word1->next;
- }
- }
- }
- }
-
- } while (found);
-
- //~ need to compute the primary writing mode (horiz/vert) in
- //~ addition to primary rotation
-
- // coalesce the block, and add it to the list
- blk->coalesce(uMap);
- if (lastBlk) {
- lastBlk->next = blk;
- } else {
- blkList = blk;
- }
- lastBlk = blk;
- count[rot] += blk->charCount;
- if (primaryRot < 0 || count[rot] > count[primaryRot]) {
- primaryRot = rot;
- }
- ++nBlocks;
- }
- }
-
-#if 0 // for debugging
- printf("*** rotation ***\n");
- for (rot = 0; rot < 4; ++rot) {
- printf(" %d: %6d\n", rot, count[rot]);
- }
- printf(" primary rot = %d\n", primaryRot);
- printf("\n");
-#endif
-
-#if 0 // for debugging
- printf("*** blocks ***\n");
- for (blk = blkList; blk; blk = blk->next) {
- printf("block: rot=%d x=%.2f..%.2f y=%.2f..%.2f\n",
- blk->rot, blk->xMin, blk->xMax, blk->yMin, blk->yMax);
- for (line = blk->lines; line; line = line->next) {
- printf(" line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f\n",
- line->xMin, line->xMax, line->yMin, line->yMax, line->base);
- for (word0 = line->words; word0; word0 = word0->next) {
- printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSize=%.2f space=%d: '",
- word0->xMin, word0->xMax, word0->yMin, word0->yMax,
- word0->base, word0->fontSize, word0->spaceAfter);
- for (i = 0; i < word0->len; ++i) {
- fputc(word0->text[i] & 0xff, stdout);
- }
- printf("'\n");
- }
- }
- }
- printf("\n");
-#endif
-
- // determine the primary direction
- lrCount = 0;
- for (blk = blkList; blk; blk = blk->next) {
- for (line = blk->lines; line; line = line->next) {
- for (word0 = line->words; word0; word0 = word0->next) {
- for (i = 0; i < word0->len; ++i) {
- if (unicodeTypeL(word0->text[i])) {
- ++lrCount;
- } else if (unicodeTypeR(word0->text[i])) {
- --lrCount;
- }
- }
- }
- }
- }
- primaryLR = lrCount >= 0;
-
-#if 0 // for debugging
- printf("*** direction ***\n");
- printf("lrCount = %d\n", lrCount);
- printf("primaryLR = %d\n", primaryLR);
-#endif
-
- //----- column assignment
-
- // sort blocks into xy order for column assignment
- blocks = (TextBlock **)gmalloc(nBlocks * sizeof(TextBlock *));
- for (blk = blkList, i = 0; blk; blk = blk->next, ++i) {
- blocks[i] = blk;
- }
- qsort(blocks, nBlocks, sizeof(TextBlock *), &TextBlock::cmpXYPrimaryRot);
-
- // column assignment
- for (i = 0; i < nBlocks; ++i) {
- blk0 = blocks[i];
- col1 = 0;
- for (j = 0; j < i; ++j) {
- blk1 = blocks[j];
- col2 = 0; // make gcc happy
- switch (primaryRot) {
- case 0:
- if (blk0->xMin > blk1->xMax) {
- col2 = blk1->col + blk1->nColumns + 3;
- } else {
- col2 = blk1->col + (int)(((blk0->xMin - blk1->xMin) /
- (blk1->xMax - blk1->xMin)) *
- blk1->nColumns);
- }
- break;
- case 1:
- if (blk0->yMin > blk1->yMax) {
- col2 = blk1->col + blk1->nColumns + 3;
- } else {
- col2 = blk1->col + (int)(((blk0->yMin - blk1->yMin) /
- (blk1->yMax - blk1->yMin)) *
- blk1->nColumns);
- }
- break;
- case 2:
- if (blk0->xMax < blk1->xMin) {
- col2 = blk1->col + blk1->nColumns + 3;
- } else {
- col2 = blk1->col + (int)(((blk0->xMax - blk1->xMax) /
- (blk1->xMin - blk1->xMax)) *
- blk1->nColumns);
- }
- break;
- case 3:
- if (blk0->yMax < blk1->yMin) {
- col2 = blk1->col + blk1->nColumns + 3;
- } else {
- col2 = blk1->col + (int)(((blk0->yMax - blk1->yMax) /
- (blk1->yMin - blk1->yMax)) *
- blk1->nColumns);
- }
- break;
- }
- if (col2 > col1) {
- col1 = col2;
- }
- }
- blk0->col = col1;
- for (line = blk0->lines; line; line = line->next) {
- for (j = 0; j <= line->len; ++j) {
- line->col[j] += col1;
- }
- }
- }
-
-#if 0 // for debugging
- printf("*** blocks, after column assignment ***\n");
- for (blk = blkList; blk; blk = blk->next) {
- printf("block: rot=%d x=%.2f..%.2f y=%.2f..%.2f col=%d nCols=%d\n",
- blk->rot, blk->xMin, blk->xMax, blk->yMin, blk->yMax, blk->col,
- blk->nColumns);
- for (line = blk->lines; line; line = line->next) {
- printf(" line:\n");
- for (word0 = line->words; word0; word0 = word0->next) {
- printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSize=%.2f space=%d: '",
- word0->xMin, word0->xMax, word0->yMin, word0->yMax,
- word0->base, word0->fontSize, word0->spaceAfter);
- for (i = 0; i < word0->len; ++i) {
- fputc(word0->text[i] & 0xff, stdout);
- }
- printf("'\n");
- }
- }
- }
- printf("\n");
-#endif
-
- //----- reading order sort
-
- // sort blocks into yx order (in preparation for reading order sort)
- qsort(blocks, nBlocks, sizeof(TextBlock *), &TextBlock::cmpYXPrimaryRot);
-
- // compute space on left and right sides of each block
- for (i = 0; i < nBlocks; ++i) {
- blk0 = blocks[i];
- for (j = 0; j < nBlocks; ++j) {
- blk1 = blocks[j];
- if (blk1 != blk0) {
- blk0->updatePriMinMax(blk1);
- }
- }
- }
-
-#if 0 // for debugging
- printf("*** blocks, after yx sort ***\n");
- for (i = 0; i < nBlocks; ++i) {
- blk = blocks[i];
- printf("block: rot=%d x=%.2f..%.2f y=%.2f..%.2f space=%.2f..%.2f\n",
- blk->rot, blk->xMin, blk->xMax, blk->yMin, blk->yMax,
- blk->priMin, blk->priMax);
- for (line = blk->lines; line; line = line->next) {
- printf(" line:\n");
- for (word0 = line->words; word0; word0 = word0->next) {
- printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSize=%.2f space=%d: '",
- word0->xMin, word0->xMax, word0->yMin, word0->yMax,
- word0->base, word0->fontSize, word0->spaceAfter);
- for (j = 0; j < word0->len; ++j) {
- fputc(word0->text[j] & 0xff, stdout);
- }
- printf("'\n");
- }
- }
- }
- printf("\n");
-#endif
-
- // build the flows
- //~ this needs to be adjusted for writing mode (vertical text)
- //~ this also needs to account for right-to-left column ordering
- blkArray = (TextBlock **)gmalloc(nBlocks * sizeof(TextBlock *));
- memcpy(blkArray, blocks, nBlocks * sizeof(TextBlock *));
- flows = lastFlow = NULL;
- firstBlkIdx = 0;
- nBlocksLeft = nBlocks;
- while (nBlocksLeft > 0) {
-
- // find the upper-left-most block
- for (; !blkArray[firstBlkIdx]; ++firstBlkIdx) ;
- i = firstBlkIdx;
- blk = blkArray[i];
- for (j = firstBlkIdx + 1; j < nBlocks; ++j) {
- blk1 = blkArray[j];
- if (blk1) {
- if (blk && blk->secondaryDelta(blk1) > 0) {
- break;
- }
- if (blk1->primaryCmp(blk) < 0) {
- i = j;
- blk = blk1;
- }
- }
- }
- blkArray[i] = NULL;
- --nBlocksLeft;
- blk->next = NULL;
-
- // create a new flow, starting with the upper-left-most block
- flow = new TextFlow(this, blk);
- if (lastFlow) {
- lastFlow->next = flow;
- } else {
- flows = flow;
- }
- lastFlow = flow;
- fontSize = blk->lines->words->fontSize;
-
- // push the upper-left-most block on the stack
- blk->stackNext = NULL;
- blkStack = blk;
-
- // find the other blocks in this flow
- while (blkStack) {
-
- // find the upper-left-most block under (but within
- // maxBlockSpacing of) the top block on the stack
- blkSpace = maxBlockSpacing * blkStack->lines->words->fontSize;
- blk = NULL;
- i = -1;
- for (j = firstBlkIdx; j < nBlocks; ++j) {
- blk1 = blkArray[j];
- if (blk1) {
- if (blkStack->secondaryDelta(blk1) > blkSpace) {
- break;
- }
- if (blk && blk->secondaryDelta(blk1) > 0) {
- break;
- }
- if (blk1->isBelow(blkStack) &&
- (!blk || blk1->primaryCmp(blk) < 0)) {
- i = j;
- blk = blk1;
- }
- }
- }
-
- // if a suitable block was found, add it to the flow and push it
- // onto the stack
- if (blk && flow->blockFits(blk, blkStack)) {
- blkArray[i] = NULL;
- --nBlocksLeft;
- blk->next = NULL;
- flow->addBlock(blk);
- fontSize = blk->lines->words->fontSize;
- blk->stackNext = blkStack;
- blkStack = blk;
-
- // otherwise (if there is no block under the top block or the
- // block is not suitable), pop the stack
- } else {
- blkStack = blkStack->stackNext;
- }
- }
- }
- gfree(blkArray);
-
-#if 0 // for debugging
- printf("*** flows ***\n");
- for (flow = flows; flow; flow = flow->next) {
- printf("flow: x=%.2f..%.2f y=%.2f..%.2f pri:%.2f..%.2f\n",
- flow->xMin, flow->xMax, flow->yMin, flow->yMax,
- flow->priMin, flow->priMax);
- for (blk = flow->blocks; blk; blk = blk->next) {
- printf(" block: rot=%d x=%.2f..%.2f y=%.2f..%.2f pri=%.2f..%.2f\n",
- blk->rot, blk->xMin, blk->xMax, blk->yMin, blk->yMax,
- blk->priMin, blk->priMax);
- for (line = blk->lines; line; line = line->next) {
- printf(" line:\n");
- for (word0 = line->words; word0; word0 = word0->next) {
- printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSize=%.2f space=%d: '",
- word0->xMin, word0->xMax, word0->yMin, word0->yMax,
- word0->base, word0->fontSize, word0->spaceAfter);
- for (i = 0; i < word0->len; ++i) {
- fputc(word0->text[i] & 0xff, stdout);
- }
- printf("'\n");
- }
- }
- }
- }
- printf("\n");
-#endif
-
- if (uMap) {
- uMap->decRefCnt();
- }
-}
-
-GBool TextPage::findText(Unicode *s, int len,
- GBool startAtTop, GBool stopAtBottom,
- GBool startAtLast, GBool stopAtLast,
- double *xMin, double *yMin,
- double *xMax, double *yMax) {
- TextBlock *blk;
- TextLine *line;
- Unicode *p;
- Unicode u1, u2;
- int m, i, j, k;
- double xStart, yStart, xStop, yStop;
- double xMin0, yMin0, xMax0, yMax0;
- double xMin1, yMin1, xMax1, yMax1;
- GBool found;
-
- //~ needs to handle right-to-left text
-
- if (rawOrder) {
- return gFalse;
- }
-
- xStart = yStart = xStop = yStop = 0;
- if (startAtLast && haveLastFind) {
- xStart = lastFindXMin;
- yStart = lastFindYMin;
- } else if (!startAtTop) {
- xStart = *xMin;
- yStart = *yMin;
- }
- if (stopAtLast && haveLastFind) {
- xStop = lastFindXMin;
- yStop = lastFindYMin;
- } else if (!stopAtBottom) {
- xStop = *xMax;
- yStop = *yMax;
- }
-
- found = gFalse;
- xMin0 = xMax0 = yMin0 = yMax0 = 0; // make gcc happy
- xMin1 = xMax1 = yMin1 = yMax1 = 0; // make gcc happy
-
- for (i = 0; i < nBlocks; ++i) {
- blk = blocks[i];
-
- // check: is the block above the top limit?
- if (!startAtTop && blk->yMax < yStart) {
- continue;
- }
-
- // check: is the block below the bottom limit?
- if (!stopAtBottom && blk->yMin > yStop) {
- break;
- }
-
- for (line = blk->lines; line; line = line->next) {
-
- // check: is the line above the top limit?
- if (!startAtTop && line->yMin < yStart) {
- continue;
- }
-
- // check: is the line below the bottom limit?
- if (!stopAtBottom && line->yMin > yStop) {
- continue;
- }
-
- // search each position in this line
- m = line->len;
- for (j = 0, p = line->text; j <= m - len; ++j, ++p) {
-
- // compare the strings
- for (k = 0; k < len; ++k) {
-#if 1 //~ this lowercases Latin A-Z only -- this will eventually be
- //~ extended to handle other character sets
- if (p[k] >= 0x41 && p[k] <= 0x5a) {
- u1 = p[k] + 0x20;
- } else {
- u1 = p[k];
- }
- if (s[k] >= 0x41 && s[k] <= 0x5a) {
- u2 = s[k] + 0x20;
- } else {
- u2 = s[k];
- }
-#endif
- if (u1 != u2) {
- break;
- }
- }
-
- // found it
- if (k == len) {
- switch (line->rot) {
- case 0:
- xMin1 = line->edge[j];
- xMax1 = line->edge[j + len];
- yMin1 = line->yMin;
- yMax1 = line->yMax;
- break;
- case 1:
- xMin1 = line->xMin;
- xMax1 = line->xMax;
- yMin1 = line->edge[j];
- yMax1 = line->edge[j + len];
- break;
- case 2:
- xMin1 = line->edge[j + len];
- xMax1 = line->edge[j];
- yMin1 = line->yMin;
- yMax1 = line->yMax;
- break;
- case 3:
- xMin1 = line->xMin;
- xMax1 = line->xMax;
- yMin1 = line->edge[j + len];
- yMax1 = line->edge[j];
- break;
- }
- if ((startAtTop ||
- yMin1 > yStart || (yMin1 == yStart && xMin1 > xStart)) &&
- (stopAtBottom ||
- yMin1 < yStop || (yMin1 == yStop && xMin1 < yStop))) {
- if (!found || yMin1 < yMin0 || (yMin1 == yMin0 && xMin1 < xMin0)) {
- xMin0 = xMin1;
- xMax0 = xMax1;
- yMin0 = yMin1;
- yMax0 = yMax1;
- found = gTrue;
- }
- }
- }
- }
- }
- }
-
- if (found) {
- *xMin = xMin0;
- *xMax = xMax0;
- *yMin = yMin0;
- *yMax = yMax0;
- lastFindXMin = xMin0;
- lastFindYMin = yMin0;
- haveLastFind = gTrue;
- return gTrue;
- }
-
- return gFalse;
-}
-
-GString *TextPage::getText(double xMin, double yMin,
- double xMax, double yMax) {
- GString *s;
- UnicodeMap *uMap;
- GBool isUnicode;
- TextBlock *blk;
- TextLine *line;
- TextLineFrag *frags;
- int nFrags, fragsSize;
- TextLineFrag *frag;
- char space[8], eol[16];
- int spaceLen, eolLen;
- int lastRot;
- double x, y;
- int col, idx0, idx1, i, j;
- GBool multiLine, oneRot;
-
- s = new GString();
-
- if (rawOrder) {
- return s;
- }
-
- // get the output encoding
- if (!(uMap = globalParams->getTextEncoding())) {
- return s;
- }
- isUnicode = uMap->isUnicode();
- spaceLen = uMap->mapUnicode(0x20, space, sizeof(space));
- eolLen = 0; // make gcc happy
- switch (globalParams->getTextEOL()) {
- case eolUnix:
- eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol));
- break;
- case eolDOS:
- eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
- eolLen += uMap->mapUnicode(0x0a, eol + eolLen, sizeof(eol) - eolLen);
- break;
- case eolMac:
- eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
- break;
- }
-
- //~ writing mode (horiz/vert)
-
- // collect the line fragments that are in the rectangle
- fragsSize = 256;
- frags = (TextLineFrag *)gmalloc(fragsSize * sizeof(TextLineFrag));
- nFrags = 0;
- lastRot = -1;
- oneRot = gTrue;
- for (i = 0; i < nBlocks; ++i) {
- blk = blocks[i];
- if (xMin < blk->xMax && blk->xMin < xMax &&
- yMin < blk->yMax && blk->yMin < yMax) {
- for (line = blk->lines; line; line = line->next) {
- if (xMin < line->xMax && line->xMin < xMax &&
- yMin < line->yMax && line->yMin < yMax) {
- idx0 = idx1 = -1;
- switch (line->rot) {
- case 0:
- y = 0.5 * (line->yMin + line->yMax);
- if (yMin < y && y < yMax) {
- j = 0;
- while (j < line->len) {
- if (0.5 * (line->edge[j] + line->edge[j+1]) > xMin) {
- idx0 = j;
- break;
- }
- ++j;
- }
- j = line->len - 1;
- while (j >= 0) {
- if (0.5 * (line->edge[j] + line->edge[j+1]) < xMax) {
- idx1 = j;
- break;
- }
- --j;
- }
- }
- break;
- case 1:
- x = 0.5 * (line->xMin + line->xMax);
- if (xMin < x && x < xMax) {
- j = 0;
- while (j < line->len) {
- if (0.5 * (line->edge[j] + line->edge[j+1]) > yMin) {
- idx0 = j;
- break;
- }
- ++j;
- }
- j = line->len - 1;
- while (j >= 0) {
- if (0.5 * (line->edge[j] + line->edge[j+1]) < yMax) {
- idx1 = j;
- break;
- }
- --j;
- }
- }
- break;
- case 2:
- y = 0.5 * (line->yMin + line->yMax);
- if (yMin < y && y < yMax) {
- j = 0;
- while (j < line->len) {
- if (0.5 * (line->edge[j] + line->edge[j+1]) < xMax) {
- idx0 = j;
- break;
- }
- ++j;
- }
- j = line->len - 1;
- while (j >= 0) {
- if (0.5 * (line->edge[j] + line->edge[j+1]) > xMin) {
- idx1 = j;
- break;
- }
- --j;
- }
- }
- break;
- case 3:
- x = 0.5 * (line->xMin + line->xMax);
- if (xMin < x && x < xMax) {
- j = 0;
- while (j < line->len) {
- if (0.5 * (line->edge[j] + line->edge[j+1]) < yMax) {
- idx0 = j;
- break;
- }
- ++j;
- }
- j = line->len - 1;
- while (j >= 0) {
- if (0.5 * (line->edge[j] + line->edge[j+1]) > yMin) {
- idx1 = j;
- break;
- }
- --j;
- }
- }
- break;
- }
- if (idx0 >= 0 && idx1 >= 0) {
- if (nFrags == fragsSize) {
- fragsSize *= 2;
- frags = (TextLineFrag *)
- grealloc(frags, fragsSize * sizeof(TextLineFrag));
- }
- frags[nFrags].init(line, idx0, idx1 - idx0 + 1);
- ++nFrags;
- if (lastRot >= 0 && line->rot != lastRot) {
- oneRot = gFalse;
- }
- lastRot = line->rot;
- }
- }
- }
- }
- }
-
- // sort the fragments and generate the string
- if (nFrags > 0) {
-
- for (i = 0; i < nFrags; ++i) {
- frags[i].computeCoords(oneRot);
- }
- assignColumns(frags, nFrags, oneRot);
-
- // if all lines in the region have the same rotation, use it;
- // otherwise, use the page's primary rotation
- if (oneRot) {
- qsort(frags, nFrags, sizeof(TextLineFrag),
- &TextLineFrag::cmpYXLineRot);
- } else {
- qsort(frags, nFrags, sizeof(TextLineFrag),
- &TextLineFrag::cmpYXPrimaryRot);
- }
-
- col = 0;
- multiLine = gFalse;
- for (i = 0; i < nFrags; ++i) {
- frag = &frags[i];
-
- // insert a return
- if (frag->col < col ||
- (i > 0 && fabs(frag->base - frags[i-1].base) >
- maxIntraLineDelta * frags[i-1].line->words->fontSize)) {
- s->append(eol, eolLen);
- col = 0;
- multiLine = gTrue;
- }
-
- // column alignment
- for (; col < frag->col; ++col) {
- s->append(space, spaceLen);
- }
-
- // get the fragment text
- col += dumpFragment(frag->line->text + frag->start, frag->len, uMap, s);
- }
-
- if (multiLine) {
- s->append(eol, eolLen);
- }
- }
-
- gfree(frags);
- uMap->decRefCnt();
-
- return s;
-}
-
-GBool TextPage::findCharRange(int pos, int length,
- double *xMin, double *yMin,
- double *xMax, double *yMax) {
- TextBlock *blk;
- TextLine *line;
- TextWord *word;
- double xMin0, xMax0, yMin0, yMax0;
- double xMin1, xMax1, yMin1, yMax1;
- GBool first;
- int i, j0, j1;
-
- if (rawOrder) {
- return gFalse;
- }
-
- //~ this doesn't correctly handle:
- //~ - ranges split across multiple lines (the highlighted region
- //~ is the bounding box of all the parts of the range)
- //~ - cases where characters don't convert one-to-one into Unicode
- first = gTrue;
- xMin0 = xMax0 = yMin0 = yMax0 = 0; // make gcc happy
- xMin1 = xMax1 = yMin1 = yMax1 = 0; // make gcc happy
- for (i = 0; i < nBlocks; ++i) {
- blk = blocks[i];
- for (line = blk->lines; line; line = line->next) {
- for (word = line->words; word; word = word->next) {
- if (pos < word->charPos + word->charLen &&
- word->charPos < pos + length) {
- j0 = pos - word->charPos;
- if (j0 < 0) {
- j0 = 0;
- }
- j1 = pos + length - 1 - word->charPos;
- if (j1 >= word->len) {
- j1 = word->len - 1;
- }
- switch (line->rot) {
- case 0:
- xMin1 = word->edge[j0];
- xMax1 = word->edge[j1 + 1];
- yMin1 = word->yMin;
- yMax1 = word->yMax;
- break;
- case 1:
- xMin1 = word->xMin;
- xMax1 = word->xMax;
- yMin1 = word->edge[j0];
- yMax1 = word->edge[j1 + 1];
- break;
- case 2:
- xMin1 = word->edge[j1 + 1];
- xMax1 = word->edge[j0];
- yMin1 = word->yMin;
- yMax1 = word->yMax;
- break;
- case 3:
- xMin1 = word->xMin;
- xMax1 = word->xMax;
- yMin1 = word->edge[j1 + 1];
- yMax1 = word->edge[j0];
- break;
- }
- if (first || xMin1 < xMin0) {
- xMin0 = xMin1;
- }
- if (first || xMax1 > xMax0) {
- xMax0 = xMax1;
- }
- if (first || yMin1 < yMin0) {
- yMin0 = yMin1;
- }
- if (first || yMax1 > yMax0) {
- yMax0 = yMax1;
- }
- first = gFalse;
- }
- }
- }
- }
- if (!first) {
- *xMin = xMin0;
- *xMax = xMax0;
- *yMin = yMin0;
- *yMax = yMax0;
- return gTrue;
- }
- return gFalse;
-}
-
-void TextPage::dump(void *outputStream, TextOutputFunc outputFunc,
- GBool physLayout) {
- UnicodeMap *uMap;
- TextFlow *flow;
- TextBlock *blk;
- TextLine *line;
- TextLineFrag *frags;
- TextWord *word;
- int nFrags, fragsSize;
- TextLineFrag *frag;
- char space[8], eol[16], eop[8];
- int spaceLen, eolLen, eopLen;
- GBool pageBreaks;
- GString *s;
- int col, i, d, n;
-
- // get the output encoding
- if (!(uMap = globalParams->getTextEncoding())) {
- return;
- }
- spaceLen = uMap->mapUnicode(0x20, space, sizeof(space));
- eolLen = 0; // make gcc happy
- switch (globalParams->getTextEOL()) {
- case eolUnix:
- eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol));
- break;
- case eolDOS:
- eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
- eolLen += uMap->mapUnicode(0x0a, eol + eolLen, sizeof(eol) - eolLen);
- break;
- case eolMac:
- eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
- break;
- }
- eopLen = uMap->mapUnicode(0x0c, eop, sizeof(eop));
- pageBreaks = globalParams->getTextPageBreaks();
-
- //~ writing mode (horiz/vert)
-
- // output the page in raw (content stream) order
- if (rawOrder) {
-
- for (word = rawWords; word; word = word->next) {
- s = new GString();
- dumpFragment(word->text, word->len, uMap, s);
- (*outputFunc)(outputStream, s->getCString(), s->getLength());
- delete s;
- if (word->next &&
- fabs(word->next->base - word->base) <
- maxIntraLineDelta * word->fontSize) {
- if (word->next->xMin > word->xMax + minWordSpacing * word->fontSize) {
- (*outputFunc)(outputStream, space, spaceLen);
- }
- } else {
- (*outputFunc)(outputStream, eol, eolLen);
- }
- }
-
- // output the page, maintaining the original physical layout
- } else if (physLayout) {
-
- // collect the line fragments for the page and sort them
- fragsSize = 256;
- frags = (TextLineFrag *)gmalloc(fragsSize * sizeof(TextLineFrag));
- nFrags = 0;
- for (i = 0; i < nBlocks; ++i) {
- blk = blocks[i];
- for (line = blk->lines; line; line = line->next) {
- if (nFrags == fragsSize) {
- fragsSize *= 2;
- frags = (TextLineFrag *)grealloc(frags,
- fragsSize * sizeof(TextLineFrag));
- }
- frags[nFrags].init(line, 0, line->len);
- frags[nFrags].computeCoords(gTrue);
- ++nFrags;
- }
- }
- qsort(frags, nFrags, sizeof(TextLineFrag), &TextLineFrag::cmpYXPrimaryRot);
-
- // generate output
- col = 0;
- for (i = 0; i < nFrags; ++i) {
- frag = &frags[i];
-
- // column alignment
- for (; col < frag->col; ++col) {
- (*outputFunc)(outputStream, space, spaceLen);
- }
-
- // print the line
- s = new GString();
- col += dumpFragment(frag->line->text + frag->start, frag->len, uMap, s);
- (*outputFunc)(outputStream, s->getCString(), s->getLength());
- delete s;
-
- // print one or more returns if necessary
- if (i == nFrags - 1 ||
- frags[i+1].col < col ||
- fabs(frags[i+1].base - frag->base) >
- maxIntraLineDelta * frag->line->words->fontSize) {
- if (i < nFrags - 1) {
- d = (int)((frags[i+1].base - frag->base) /
- frag->line->words->fontSize);
- if (d < 1) {
- d = 1;
- } else if (d > 5) {
- d = 5;
- }
- } else {
- d = 1;
- }
- for (; d > 0; --d) {
- (*outputFunc)(outputStream, eol, eolLen);
- }
- col = 0;
- }
- }
-
- gfree(frags);
-
- // output the page, "undoing" the layout
- } else {
- for (flow = flows; flow; flow = flow->next) {
- for (blk = flow->blocks; blk; blk = blk->next) {
- for (line = blk->lines; line; line = line->next) {
- n = line->len;
- if (line->hyphenated && (line->next || blk->next)) {
- --n;
- }
- s = new GString();
- dumpFragment(line->text, n, uMap, s);
- (*outputFunc)(outputStream, s->getCString(), s->getLength());
- delete s;
- if (!line->hyphenated) {
- if (line->next) {
- (*outputFunc)(outputStream, space, spaceLen);
- } else if (blk->next) {
- //~ this is a bit of a kludge - we should really do a more
- //~ intelligent determination of paragraphs
- if (blk->next->lines->words->fontSize ==
- blk->lines->words->fontSize) {
- (*outputFunc)(outputStream, space, spaceLen);
- } else {
- (*outputFunc)(outputStream, eol, eolLen);
- }
- }
- }
- }
- }
- (*outputFunc)(outputStream, eol, eolLen);
- (*outputFunc)(outputStream, eol, eolLen);
- }
- }
-
- // end of page
- if (pageBreaks) {
- (*outputFunc)(outputStream, eop, eopLen);
- (*outputFunc)(outputStream, eol, eolLen);
- }
-
- uMap->decRefCnt();
-}
-
-void TextPage::assignColumns(TextLineFrag *frags, int nFrags, GBool oneRot) {
- TextLineFrag *frag0, *frag1;
- int rot, col1, col2, i, j, k;
-
- // all text in the region has the same rotation -- recompute the
- // column numbers based only on the text in the region
- if (oneRot) {
- qsort(frags, nFrags, sizeof(TextLineFrag), &TextLineFrag::cmpXYLineRot);
- rot = frags[0].line->rot;
- for (i = 0; i < nFrags; ++i) {
- frag0 = &frags[i];
- col1 = 0;
- for (j = 0; j < i; ++j) {
- frag1 = &frags[j];
- col2 = 0; // make gcc happy
- switch (rot) {
- case 0:
- if (frag0->xMin >= frag1->xMax) {
- col2 = frag1->col + (frag1->line->col[frag1->start + frag1->len] -
- frag1->line->col[frag1->start]) + 1;
- } else {
- for (k = frag1->start;
- k < frag1->start + frag1->len &&
- frag0->xMin >= 0.5 * (frag1->line->edge[k] +
- frag1->line->edge[k+1]);
- ++k) ;
- col2 = frag1->col +
- frag1->line->col[k] - frag1->line->col[frag1->start];
- }
- break;
- case 1:
- if (frag0->yMin >= frag1->yMax) {
- col2 = frag1->col + (frag1->line->col[frag1->start + frag1->len] -
- frag1->line->col[frag1->start]) + 1;
- } else {
- for (k = frag1->start;
- k < frag1->start + frag1->len &&
- frag0->yMin >= 0.5 * (frag1->line->edge[k] +
- frag1->line->edge[k+1]);
- ++k) ;
- col2 = frag1->col +
- frag1->line->col[k] - frag1->line->col[frag1->start];
- }
- break;
- case 2:
- if (frag0->xMax <= frag1->xMin) {
- col2 = frag1->col + (frag1->line->col[frag1->start + frag1->len] -
- frag1->line->col[frag1->start]) + 1;
- } else {
- for (k = frag1->start;
- k < frag1->start + frag1->len &&
- frag0->xMax <= 0.5 * (frag1->line->edge[k] +
- frag1->line->edge[k+1]);
- ++k) ;
- col2 = frag1->col +
- frag1->line->col[k] - frag1->line->col[frag1->start];
- }
- break;
- case 3:
- if (frag0->yMax <= frag1->yMin) {
- col2 = frag1->col + (frag1->line->col[frag1->start + frag1->len] -
- frag1->line->col[frag1->start]) + 1;
- } else {
- for (k = frag1->start;
- k < frag1->start + frag1->len &&
- frag0->yMax <= 0.5 * (frag1->line->edge[k] +
- frag1->line->edge[k+1]);
- ++k) ;
- col2 = frag1->col +
- frag1->line->col[k] - frag1->line->col[frag1->start];
- }
- break;
- }
- if (col2 > col1) {
- col1 = col2;
- }
- }
- frag0->col = col1;
- }
-
- // the region includes text at different rotations -- use the
- // globally assigned column numbers, offset by the minimum column
- // number (i.e., shift everything over to column 0)
- } else {
- col1 = frags[0].col;
- for (i = 1; i < nFrags; ++i) {
- if (frags[i].col < col1) {
- col1 = frags[i].col;
- }
- }
- for (i = 0; i < nFrags; ++i) {
- frags[i].col -= col1;
- }
- }
-}
-
-int TextPage::dumpFragment(Unicode *text, int len, UnicodeMap *uMap,
- GString *s) {
- char lre[8], rle[8], popdf[8], buf[8];
- int lreLen, rleLen, popdfLen, n;
- int nCols, i, j, k;
-
- nCols = 0;
-
- if (uMap->isUnicode()) {
-
- lreLen = uMap->mapUnicode(0x202a, lre, sizeof(lre));
- rleLen = uMap->mapUnicode(0x202b, rle, sizeof(rle));
- popdfLen = uMap->mapUnicode(0x202c, popdf, sizeof(popdf));
-
- if (primaryLR) {
-
- i = 0;
- while (i < len) {
- // output a left-to-right section
- for (j = i; j < len && !unicodeTypeR(text[j]); ++j) ;
- for (k = i; k < j; ++k) {
- n = uMap->mapUnicode(text[k], buf, sizeof(buf));
- s->append(buf, n);
- ++nCols;
- }
- i = j;
- // output a right-to-left section
- for (j = i; j < len && !unicodeTypeL(text[j]); ++j) ;
- if (j > i) {
- s->append(rle, rleLen);
- for (k = j - 1; k >= i; --k) {
- n = uMap->mapUnicode(text[k], buf, sizeof(buf));
- s->append(buf, n);
- ++nCols;
- }
- s->append(popdf, popdfLen);
- i = j;
- }
- }
-
- } else {
-
- s->append(rle, rleLen);
- i = len - 1;
- while (i >= 0) {
- // output a right-to-left section
- for (j = i; j >= 0 && !unicodeTypeL(text[j]); --j) ;
- for (k = i; k > j; --k) {
- n = uMap->mapUnicode(text[k], buf, sizeof(buf));
- s->append(buf, n);
- ++nCols;
- }
- i = j;
- // output a left-to-right section
- for (j = i; j >= 0 && !unicodeTypeR(text[j]); --j) ;
- if (j < i) {
- s->append(lre, lreLen);
- for (k = j + 1; k <= i; ++k) {
- n = uMap->mapUnicode(text[k], buf, sizeof(buf));
- s->append(buf, n);
- ++nCols;
- }
- s->append(popdf, popdfLen);
- i = j;
- }
- }
- s->append(popdf, popdfLen);
-
- }
-
- } else {
- for (i = 0; i < len; ++i) {
- n = uMap->mapUnicode(text[i], buf, sizeof(buf));
- s->append(buf, n);
- nCols += n;
- }
- }
-
- return nCols;
-}
-
-#if TEXTOUT_WORD_LIST
-TextWordList *TextPage::makeWordList(GBool physLayout) {
- return new TextWordList(this, physLayout);
-}
-#endif
-
-//------------------------------------------------------------------------
-// TextOutputDev
-//------------------------------------------------------------------------
-
-static void outputToFile(void *stream, char *text, int len) {
- fwrite(text, 1, len, (FILE *)stream);
-}
-
-TextOutputDev::TextOutputDev(char *fileName, GBool physLayoutA,
- GBool rawOrderA, GBool append) {
- text = NULL;
- physLayout = physLayoutA;
- rawOrder = rawOrderA;
- ok = gTrue;
-
- // open file
- needClose = gFalse;
- if (fileName) {
- if (!strcmp(fileName, "-")) {
- outputStream = stdout;
-#ifdef WIN32
- // keep DOS from munging the end-of-line characters
- setmode(fileno(stdout), O_BINARY);
-#endif
- } else if ((outputStream = fopen(fileName, append ? "ab" : "wb"))) {
- needClose = gTrue;
- } else {
- error(-1, "Couldn't open text file '%s'", fileName);
- ok = gFalse;
- return;
- }
- outputFunc = &outputToFile;
- } else {
- outputStream = NULL;
- }
-
- // set up text object
- text = new TextPage(rawOrderA);
-}
-
-TextOutputDev::TextOutputDev(TextOutputFunc func, void *stream,
- GBool physLayoutA, GBool rawOrderA) {
- outputFunc = func;
- outputStream = stream;
- needClose = gFalse;
- physLayout = physLayoutA;
- rawOrder = rawOrderA;
- text = new TextPage(rawOrderA);
- ok = gTrue;
-}
-
-TextOutputDev::~TextOutputDev() {
- if (needClose) {
-#ifdef MACOS
- ICS_MapRefNumAndAssign((short)((FILE *)outputStream)->handle);
-#endif
- fclose((FILE *)outputStream);
- }
- if (text) {
- delete text;
- }
-}
-
-void TextOutputDev::startPage(int pageNum, GfxState *state) {
- text->startPage(state);
-}
-
-void TextOutputDev::endPage() {
- text->endPage();
- text->coalesce(physLayout);
- if (outputStream) {
- text->dump(outputStream, outputFunc, physLayout);
- }
-}
-
-void TextOutputDev::updateFont(GfxState *state) {
- text->updateFont(state);
-}
-
-void TextOutputDev::beginString(GfxState *state, GString *s) {
-}
-
-void TextOutputDev::endString(GfxState *state) {
-}
-
-void TextOutputDev::drawChar(GfxState *state, double x, double y,
- double dx, double dy,
- double originX, double originY,
- CharCode c, Unicode *u, int uLen) {
- text->addChar(state, x, y, dx, dy, c, u, uLen);
-}
-
-GBool TextOutputDev::findText(Unicode *s, int len,
- GBool startAtTop, GBool stopAtBottom,
- GBool startAtLast, GBool stopAtLast,
- double *xMin, double *yMin,
- double *xMax, double *yMax) {
- return text->findText(s, len, startAtTop, stopAtBottom,
- startAtLast, stopAtLast, xMin, yMin, xMax, yMax);
-}
-
-GString *TextOutputDev::getText(double xMin, double yMin,
- double xMax, double yMax) {
- return text->getText(xMin, yMin, xMax, yMax);
-}
-
-GBool TextOutputDev::findCharRange(int pos, int length,
- double *xMin, double *yMin,
- double *xMax, double *yMax) {
- return text->findCharRange(pos, length, xMin, yMin, xMax, yMax);
-}
-
-#if TEXTOUT_WORD_LIST
-TextWordList *TextOutputDev::makeWordList() {
- return text->makeWordList(physLayout);
-}
-#endif