Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/pdf/xpdf/TextOutputDev.h
diff options
context:
space:
mode:
Diffstat (limited to 'pdf/xpdf/TextOutputDev.h')
-rw-r--r--pdf/xpdf/TextOutputDev.h570
1 files changed, 0 insertions, 570 deletions
diff --git a/pdf/xpdf/TextOutputDev.h b/pdf/xpdf/TextOutputDev.h
deleted file mode 100644
index 2c62237..0000000
--- a/pdf/xpdf/TextOutputDev.h
+++ /dev/null
@@ -1,570 +0,0 @@
-//========================================================================
-//
-// TextOutputDev.h
-//
-// Copyright 1997-2003 Glyph & Cog, LLC
-//
-//========================================================================
-
-#ifndef TEXTOUTPUTDEV_H
-#define TEXTOUTPUTDEV_H
-
-#include <aconf.h>
-
-#ifdef USE_GCC_PRAGMAS
-#pragma interface
-#endif
-
-#include <stdio.h>
-#include "gtypes.h"
-#include "GfxFont.h"
-#include "OutputDev.h"
-
-class GString;
-class GList;
-class GfxFont;
-class GfxState;
-class UnicodeMap;
-
-//------------------------------------------------------------------------
-
-typedef void (*TextOutputFunc)(void *stream, char *text, int len);
-
-//------------------------------------------------------------------------
-// TextFontInfo
-//------------------------------------------------------------------------
-
-class TextFontInfo {
-public:
-
- TextFontInfo(GfxState *state);
- ~TextFontInfo();
-
- GBool matches(GfxState *state);
-
-private:
-
- GfxFont *gfxFont;
-#if TEXTOUT_WORD_LIST
- GString *fontName;
-#endif
-
- friend class TextWord;
- friend class TextPage;
-};
-
-//------------------------------------------------------------------------
-// TextWord
-//------------------------------------------------------------------------
-
-class TextWord {
-public:
-
- // Constructor.
- TextWord(GfxState *state, int rotA, double x0, double y0,
- int charPosA, TextFontInfo *fontA, double fontSize);
-
- // Destructor.
- ~TextWord();
-
- // Add a character to the word.
- void addChar(GfxState *state, double x, double y,
- double dx, double dy, Unicode u);
-
- // Merge <word> onto the end of <this>.
- void merge(TextWord *word);
-
- // Compares <this> to <word>, returning -1 (<), 0 (=), or +1 (>),
- // based on a primary-axis comparison, e.g., x ordering if rot=0.
- int primaryCmp(TextWord *word);
-
- // Return the distance along the primary axis between <this> and
- // <word>.
- double primaryDelta(TextWord *word);
-
- static int cmpYX(const void *p1, const void *p2);
-
-#if TEXTOUT_WORD_LIST
- int getLength() { return len; }
- Unicode getChar(int idx) { return text[idx]; }
- GString *getText();
- GString *getFontName() { return font->fontName; }
- void getColor(double *r, double *g, double *b)
- { *r = colorR; *g = colorG; *b = colorB; }
- void getBBox(double *xMinA, double *yMinA, double *xMaxA, double *yMaxA)
- { *xMinA = xMin; *yMinA = yMin; *xMaxA = xMax; *yMaxA = yMax; }
- int getCharPos() { return charPos; }
- int getCharLen() { return charLen; }
-#endif
-
-private:
-
- int rot; // rotation, multiple of 90 degrees
- // (0, 1, 2, or 3)
- double xMin, xMax; // bounding box x coordinates
- double yMin, yMax; // bounding box y coordinates
- double base; // baseline x or y coordinate
- Unicode *text; // the text
- double *edge; // "near" edge x or y coord of each char
- // (plus one extra entry for the last char)
- int len; // length of text and edge arrays
- int size; // size of text and edge arrays
- int charPos; // character position (within content stream)
- int charLen; // number of content stream characters in
- // this word
- TextFontInfo *font; // font information
- double fontSize; // font size
- GBool spaceAfter; // set if there is a space between this
- // word and the next word on the line
- TextWord *next; // next word in line
-
-#if TEXTOUT_WORD_LIST
- double colorR, // word color
- colorG,
- colorB;
-#endif
-
- friend class TextPool;
- friend class TextLine;
- friend class TextBlock;
- friend class TextFlow;
- friend class TextWordList;
- friend class TextPage;
-};
-
-//------------------------------------------------------------------------
-// TextPool
-//------------------------------------------------------------------------
-
-class TextPool {
-public:
-
- TextPool();
- ~TextPool();
-
- TextWord *getPool(int baseIdx) { return pool[baseIdx - minBaseIdx]; }
- void setPool(int baseIdx, TextWord *p) { pool[baseIdx - minBaseIdx] = p; }
-
- int getBaseIdx(double base);
-
- void addWord(TextWord *word);
-
-private:
-
- int minBaseIdx; // min baseline bucket index
- int maxBaseIdx; // max baseline bucket index
- TextWord **pool; // array of linked lists, one for each
- // baseline value (multiple of 4 pts)
- TextWord *cursor; // pointer to last-accessed word
- int cursorBaseIdx; // baseline bucket index of last-accessed word
-
- friend class TextBlock;
- friend class TextPage;
-};
-
-//------------------------------------------------------------------------
-// TextLine
-//------------------------------------------------------------------------
-
-class TextLine {
-public:
-
- TextLine(TextBlock *blkA, int rotA, double baseA);
- ~TextLine();
-
- void addWord(TextWord *word);
-
- // Return the distance along the primary axis between <this> and
- // <line>.
- double primaryDelta(TextLine *line);
-
- // Compares <this> to <line>, returning -1 (<), 0 (=), or +1 (>),
- // based on a primary-axis comparison, e.g., x ordering if rot=0.
- int primaryCmp(TextLine *line);
-
- // Compares <this> to <line>, returning -1 (<), 0 (=), or +1 (>),
- // based on a secondary-axis comparison of the baselines, e.g., y
- // ordering if rot=0.
- int secondaryCmp(TextLine *line);
-
- int cmpYX(TextLine *line);
-
- static int cmpXY(const void *p1, const void *p2);
-
- void coalesce(UnicodeMap *uMap);
-
-private:
-
- TextBlock *blk; // parent block
- int rot; // text rotation
- double xMin, xMax; // bounding box x coordinates
- double yMin, yMax; // bounding box y coordinates
- double base; // baseline x or y coordinate
- TextWord *words; // words in this line
- TextWord *lastWord; // last word in this line
- Unicode *text; // Unicode text of the line, including
- // spaces between words
- double *edge; // "near" edge x or y coord of each char
- // (plus one extra entry for the last char)
- int *col; // starting column number of each Unicode char
- int len; // number of Unicode chars
- int convertedLen; // total number of converted characters
- GBool hyphenated; // set if last char is a hyphen
- TextLine *next; // next line in block
-
- friend class TextLineFrag;
- friend class TextBlock;
- friend class TextFlow;
- friend class TextWordList;
- friend class TextPage;
-};
-
-//------------------------------------------------------------------------
-// TextBlock
-//------------------------------------------------------------------------
-
-class TextBlock {
-public:
-
- TextBlock(TextPage *pageA, int rotA);
- ~TextBlock();
-
- void addWord(TextWord *word);
-
- void coalesce(UnicodeMap *uMap);
-
- // Update this block's priMin and priMax values, looking at <blk>.
- void updatePriMinMax(TextBlock *blk);
-
- static int cmpXYPrimaryRot(const void *p1, const void *p2);
-
- static int cmpYXPrimaryRot(const void *p1, const void *p2);
-
- int primaryCmp(TextBlock *blk);
-
- double secondaryDelta(TextBlock *blk);
-
- // Returns true if <this> is below <blk>, relative to the page's
- // primary rotation.
- GBool isBelow(TextBlock *blk);
-
-private:
-
- TextPage *page; // the parent page
- int rot; // text rotation
- double xMin, xMax; // bounding box x coordinates
- double yMin, yMax; // bounding box y coordinates
- double priMin, priMax; // whitespace bounding box along primary axis
-
- TextPool *pool; // pool of words (used only until lines
- // are built)
- TextLine *lines; // linked list of lines
- TextLine *curLine; // most recently added line
- int nLines; // number of lines
- int charCount; // number of characters in the block
- int col; // starting column
- int nColumns; // number of columns in the block
-
- TextBlock *next;
- TextBlock *stackNext;
-
- friend class TextLine;
- friend class TextLineFrag;
- friend class TextFlow;
- friend class TextWordList;
- friend class TextPage;
-};
-
-//------------------------------------------------------------------------
-// TextFlow
-//------------------------------------------------------------------------
-
-class TextFlow {
-public:
-
- TextFlow(TextPage *pageA, TextBlock *blk);
- ~TextFlow();
-
- // Add a block to the end of this flow.
- void addBlock(TextBlock *blk);
-
- // Returns true if <blk> fits below <prevBlk> in the flow, i.e., (1)
- // it uses a font no larger than the last block added to the flow,
- // and (2) it fits within the flow's [priMin, priMax] along the
- // primary axis.
- GBool blockFits(TextBlock *blk, TextBlock *prevBlk);
-
-private:
-
- TextPage *page; // the parent page
- double xMin, xMax; // bounding box x coordinates
- double yMin, yMax; // bounding box y coordinates
- double priMin, priMax; // whitespace bounding box along primary axis
- TextBlock *blocks; // blocks in flow
- TextBlock *lastBlk; // last block in this flow
- TextFlow *next;
-
- friend class TextWordList;
- friend class TextPage;
-};
-
-#if TEXTOUT_WORD_LIST
-
-//------------------------------------------------------------------------
-// TextWordList
-//------------------------------------------------------------------------
-
-class TextWordList {
-public:
-
- // Build a flat word list, in content stream order (if
- // text->rawOrder is true), physical layout order (if <physLayout>
- // is true and text->rawOrder is false), or reading order (if both
- // flags are false).
- TextWordList(TextPage *text, GBool physLayout);
-
- ~TextWordList();
-
- // Return the number of words on the list.
- int getLength();
-
- // Return the <idx>th word from the list.
- TextWord *get(int idx);
-
-private:
-
- GList *words;
-};
-
-#endif // TEXTOUT_WORD_LIST
-
-//------------------------------------------------------------------------
-// TextPage
-//------------------------------------------------------------------------
-
-class TextPage {
-public:
-
- // Constructor.
- TextPage(GBool rawOrderA);
-
- // Destructor.
- ~TextPage();
-
- // Start a new page.
- void startPage(GfxState *state);
-
- // End the current page.
- void endPage();
-
- // Update the current font.
- void updateFont(GfxState *state);
-
- // Begin a new word.
- void beginWord(GfxState *state, double x0, double y0);
-
- // Add a character to the current word.
- void addChar(GfxState *state, double x, double y,
- double dx, double dy,
- CharCode c, Unicode *u, int uLen);
-
- // End the current word, sorting it into the list of words.
- void endWord();
-
- // Add a word, sorting it into the list of words.
- void addWord(TextWord *word);
-
- // Coalesce strings that look like parts of the same line.
- void coalesce(GBool physLayout);
-
- // Find a string. If <startAtTop> is true, starts looking at the
- // top of the page; else if <startAtLast> is true, starts looking
- // immediately after the last find result; else starts looking at
- // <xMin>,<yMin>. If <stopAtBottom> is true, stops looking at the
- // bottom of the page; else if <stopAtLast> is true, stops looking
- // just before the last find result; else stops looking at
- // <xMax>,<yMax>.
- GBool findText(Unicode *s, int len,
- GBool startAtTop, GBool stopAtBottom,
- GBool startAtLast, GBool stopAtLast,
- double *xMin, double *yMin,
- double *xMax, double *yMax);
-
- // Get the text which is inside the specified rectangle.
- GString *getText(double xMin, double yMin,
- double xMax, double yMax);
-
- // Find a string by character position and length. If found, sets
- // the text bounding rectangle and returns true; otherwise returns
- // false.
- GBool findCharRange(int pos, int length,
- double *xMin, double *yMin,
- double *xMax, double *yMax);
-
- // Dump contents of page to a file.
- void dump(void *outputStream, TextOutputFunc outputFunc,
- GBool physLayout);
-
-#if TEXTOUT_WORD_LIST
- // Build a flat word list, in content stream order (if
- // this->rawOrder is true), physical layout order (if <physLayout>
- // is true and this->rawOrder is false), or reading order (if both
- // flags are false).
- TextWordList *makeWordList(GBool physLayout);
-#endif
-
-private:
-
- void clear();
- void assignColumns(TextLineFrag *frags, int nFrags, int rot);
- int dumpFragment(Unicode *text, int len, UnicodeMap *uMap, GString *s);
-
- GBool rawOrder; // keep text in content stream order
-
- double pageWidth, pageHeight; // width and height of current page
- TextWord *curWord; // currently active string
- int charPos; // next character position (within content
- // stream)
- TextFontInfo *curFont; // current font
- double curFontSize; // current font size
- int nest; // current nesting level (for Type 3 fonts)
- int nTinyChars; // number of "tiny" chars seen so far
- GBool lastCharOverlap; // set if the last added char overlapped the
- // previous char
-
- TextPool *pools[4]; // a "pool" of TextWords for each rotation
- TextFlow *flows; // linked list of flows
- TextBlock **blocks; // array of blocks, in yx order
- int nBlocks; // number of blocks
- int primaryRot; // primary rotation
- GBool primaryLR; // primary direction (true means L-to-R,
- // false means R-to-L)
- TextWord *rawWords; // list of words, in raw order (only if
- // rawOrder is set)
- TextWord *rawLastWord; // last word on rawWords list
-
- GList *fonts; // all font info objects used on this
- // page [TextFontInfo]
-
- double lastFindXMin, // coordinates of the last "find" result
- lastFindYMin;
- GBool haveLastFind;
-
- friend class TextLine;
- friend class TextLineFrag;
- friend class TextBlock;
- friend class TextFlow;
- friend class TextWordList;
-};
-
-//------------------------------------------------------------------------
-// TextOutputDev
-//------------------------------------------------------------------------
-
-class TextOutputDev: public OutputDev {
-public:
-
- // Open a text output file. If <fileName> is NULL, no file is
- // written (this is useful, e.g., for searching text). If
- // <physLayoutA> is true, the original physical layout of the text
- // is maintained. If <rawOrder> is true, the text is kept in
- // content stream order.
- TextOutputDev(char *fileName, GBool physLayoutA,
- GBool rawOrderA, GBool append);
-
- // Create a TextOutputDev which will write to a generic stream. If
- // <physLayoutA> is true, the original physical layout of the text
- // is maintained. If <rawOrder> is true, the text is kept in
- // content stream order.
- TextOutputDev(TextOutputFunc func, void *stream,
- GBool physLayoutA, GBool rawOrderA);
-
- // Destructor.
- virtual ~TextOutputDev();
-
- // Check if file was successfully created.
- virtual GBool isOk() { return ok; }
-
- //---- get info about output device
-
- // Does this device use upside-down coordinates?
- // (Upside-down means (0,0) is the top left corner of the page.)
- virtual GBool upsideDown() { return gTrue; }
-
- // Does this device use drawChar() or drawString()?
- virtual GBool useDrawChar() { return gTrue; }
-
- // Does this device use beginType3Char/endType3Char? Otherwise,
- // text in Type 3 fonts will be drawn with drawChar/drawString.
- virtual GBool interpretType3Chars() { return gFalse; }
-
- // Does this device need non-text content?
- virtual GBool needNonText() { return gFalse; }
-
- //----- initialization and control
-
- // Start a page.
- virtual void startPage(int pageNum, GfxState *state);
-
- // End a page.
- virtual void endPage();
-
- //----- update text state
- virtual void updateFont(GfxState *state);
-
- //----- text drawing
- virtual void beginString(GfxState *state, GString *s);
- virtual void endString(GfxState *state);
- virtual void drawChar(GfxState *state, double x, double y,
- double dx, double dy,
- double originX, double originY,
- CharCode c, Unicode *u, int uLen);
-
- //----- special access
-
- // Find a string. If <startAtTop> is true, starts looking at the
- // top of the page; else if <startAtLast> is true, starts looking
- // immediately after the last find result; else starts looking at
- // <xMin>,<yMin>. If <stopAtBottom> is true, stops looking at the
- // bottom of the page; else if <stopAtLast> is true, stops looking
- // just before the last find result; else stops looking at
- // <xMax>,<yMax>.
- GBool findText(Unicode *s, int len,
- GBool startAtTop, GBool stopAtBottom,
- GBool startAtLast, GBool stopAtLast,
- double *xMin, double *yMin,
- double *xMax, double *yMax);
-
- // Get the text which is inside the specified rectangle.
- GString *getText(double xMin, double yMin,
- double xMax, double yMax);
-
- // Find a string by character position and length. If found, sets
- // the text bounding rectangle and returns true; otherwise returns
- // false.
- GBool findCharRange(int pos, int length,
- double *xMin, double *yMin,
- double *xMax, double *yMax);
-
-#if TEXTOUT_WORD_LIST
- // Build a flat word list, in content stream order (if
- // this->rawOrder is true), physical layout order (if
- // this->physLayout is true and this->rawOrder is false), or reading
- // order (if both flags are false).
- TextWordList *makeWordList();
-#endif
-
-private:
-
- TextOutputFunc outputFunc; // output function
- void *outputStream; // output stream
- GBool needClose; // need to close the output file?
- // (only if outputStream is a FILE*)
- TextPage *text; // text for the current page
- GBool physLayout; // maintain original physical layout when
- // dumping text
- GBool rawOrder; // keep text in content stream order
- GBool ok; // set up ok?
-};
-
-#endif