diff options
Diffstat (limited to 'pdf/xpdf/TextOutputDev.h')
-rw-r--r-- | pdf/xpdf/TextOutputDev.h | 570 |
1 files changed, 0 insertions, 570 deletions
diff --git a/pdf/xpdf/TextOutputDev.h b/pdf/xpdf/TextOutputDev.h deleted file mode 100644 index 2c62237..0000000 --- a/pdf/xpdf/TextOutputDev.h +++ /dev/null @@ -1,570 +0,0 @@ -//======================================================================== -// -// TextOutputDev.h -// -// Copyright 1997-2003 Glyph & Cog, LLC -// -//======================================================================== - -#ifndef TEXTOUTPUTDEV_H -#define TEXTOUTPUTDEV_H - -#include <aconf.h> - -#ifdef USE_GCC_PRAGMAS -#pragma interface -#endif - -#include <stdio.h> -#include "gtypes.h" -#include "GfxFont.h" -#include "OutputDev.h" - -class GString; -class GList; -class GfxFont; -class GfxState; -class UnicodeMap; - -//------------------------------------------------------------------------ - -typedef void (*TextOutputFunc)(void *stream, char *text, int len); - -//------------------------------------------------------------------------ -// TextFontInfo -//------------------------------------------------------------------------ - -class TextFontInfo { -public: - - TextFontInfo(GfxState *state); - ~TextFontInfo(); - - GBool matches(GfxState *state); - -private: - - GfxFont *gfxFont; -#if TEXTOUT_WORD_LIST - GString *fontName; -#endif - - friend class TextWord; - friend class TextPage; -}; - -//------------------------------------------------------------------------ -// TextWord -//------------------------------------------------------------------------ - -class TextWord { -public: - - // Constructor. - TextWord(GfxState *state, int rotA, double x0, double y0, - int charPosA, TextFontInfo *fontA, double fontSize); - - // Destructor. - ~TextWord(); - - // Add a character to the word. - void addChar(GfxState *state, double x, double y, - double dx, double dy, Unicode u); - - // Merge <word> onto the end of <this>. - void merge(TextWord *word); - - // Compares <this> to <word>, returning -1 (<), 0 (=), or +1 (>), - // based on a primary-axis comparison, e.g., x ordering if rot=0. - int primaryCmp(TextWord *word); - - // Return the distance along the primary axis between <this> and - // <word>. - double primaryDelta(TextWord *word); - - static int cmpYX(const void *p1, const void *p2); - -#if TEXTOUT_WORD_LIST - int getLength() { return len; } - Unicode getChar(int idx) { return text[idx]; } - GString *getText(); - GString *getFontName() { return font->fontName; } - void getColor(double *r, double *g, double *b) - { *r = colorR; *g = colorG; *b = colorB; } - void getBBox(double *xMinA, double *yMinA, double *xMaxA, double *yMaxA) - { *xMinA = xMin; *yMinA = yMin; *xMaxA = xMax; *yMaxA = yMax; } - int getCharPos() { return charPos; } - int getCharLen() { return charLen; } -#endif - -private: - - int rot; // rotation, multiple of 90 degrees - // (0, 1, 2, or 3) - double xMin, xMax; // bounding box x coordinates - double yMin, yMax; // bounding box y coordinates - double base; // baseline x or y coordinate - Unicode *text; // the text - double *edge; // "near" edge x or y coord of each char - // (plus one extra entry for the last char) - int len; // length of text and edge arrays - int size; // size of text and edge arrays - int charPos; // character position (within content stream) - int charLen; // number of content stream characters in - // this word - TextFontInfo *font; // font information - double fontSize; // font size - GBool spaceAfter; // set if there is a space between this - // word and the next word on the line - TextWord *next; // next word in line - -#if TEXTOUT_WORD_LIST - double colorR, // word color - colorG, - colorB; -#endif - - friend class TextPool; - friend class TextLine; - friend class TextBlock; - friend class TextFlow; - friend class TextWordList; - friend class TextPage; -}; - -//------------------------------------------------------------------------ -// TextPool -//------------------------------------------------------------------------ - -class TextPool { -public: - - TextPool(); - ~TextPool(); - - TextWord *getPool(int baseIdx) { return pool[baseIdx - minBaseIdx]; } - void setPool(int baseIdx, TextWord *p) { pool[baseIdx - minBaseIdx] = p; } - - int getBaseIdx(double base); - - void addWord(TextWord *word); - -private: - - int minBaseIdx; // min baseline bucket index - int maxBaseIdx; // max baseline bucket index - TextWord **pool; // array of linked lists, one for each - // baseline value (multiple of 4 pts) - TextWord *cursor; // pointer to last-accessed word - int cursorBaseIdx; // baseline bucket index of last-accessed word - - friend class TextBlock; - friend class TextPage; -}; - -//------------------------------------------------------------------------ -// TextLine -//------------------------------------------------------------------------ - -class TextLine { -public: - - TextLine(TextBlock *blkA, int rotA, double baseA); - ~TextLine(); - - void addWord(TextWord *word); - - // Return the distance along the primary axis between <this> and - // <line>. - double primaryDelta(TextLine *line); - - // Compares <this> to <line>, returning -1 (<), 0 (=), or +1 (>), - // based on a primary-axis comparison, e.g., x ordering if rot=0. - int primaryCmp(TextLine *line); - - // Compares <this> to <line>, returning -1 (<), 0 (=), or +1 (>), - // based on a secondary-axis comparison of the baselines, e.g., y - // ordering if rot=0. - int secondaryCmp(TextLine *line); - - int cmpYX(TextLine *line); - - static int cmpXY(const void *p1, const void *p2); - - void coalesce(UnicodeMap *uMap); - -private: - - TextBlock *blk; // parent block - int rot; // text rotation - double xMin, xMax; // bounding box x coordinates - double yMin, yMax; // bounding box y coordinates - double base; // baseline x or y coordinate - TextWord *words; // words in this line - TextWord *lastWord; // last word in this line - Unicode *text; // Unicode text of the line, including - // spaces between words - double *edge; // "near" edge x or y coord of each char - // (plus one extra entry for the last char) - int *col; // starting column number of each Unicode char - int len; // number of Unicode chars - int convertedLen; // total number of converted characters - GBool hyphenated; // set if last char is a hyphen - TextLine *next; // next line in block - - friend class TextLineFrag; - friend class TextBlock; - friend class TextFlow; - friend class TextWordList; - friend class TextPage; -}; - -//------------------------------------------------------------------------ -// TextBlock -//------------------------------------------------------------------------ - -class TextBlock { -public: - - TextBlock(TextPage *pageA, int rotA); - ~TextBlock(); - - void addWord(TextWord *word); - - void coalesce(UnicodeMap *uMap); - - // Update this block's priMin and priMax values, looking at <blk>. - void updatePriMinMax(TextBlock *blk); - - static int cmpXYPrimaryRot(const void *p1, const void *p2); - - static int cmpYXPrimaryRot(const void *p1, const void *p2); - - int primaryCmp(TextBlock *blk); - - double secondaryDelta(TextBlock *blk); - - // Returns true if <this> is below <blk>, relative to the page's - // primary rotation. - GBool isBelow(TextBlock *blk); - -private: - - TextPage *page; // the parent page - int rot; // text rotation - double xMin, xMax; // bounding box x coordinates - double yMin, yMax; // bounding box y coordinates - double priMin, priMax; // whitespace bounding box along primary axis - - TextPool *pool; // pool of words (used only until lines - // are built) - TextLine *lines; // linked list of lines - TextLine *curLine; // most recently added line - int nLines; // number of lines - int charCount; // number of characters in the block - int col; // starting column - int nColumns; // number of columns in the block - - TextBlock *next; - TextBlock *stackNext; - - friend class TextLine; - friend class TextLineFrag; - friend class TextFlow; - friend class TextWordList; - friend class TextPage; -}; - -//------------------------------------------------------------------------ -// TextFlow -//------------------------------------------------------------------------ - -class TextFlow { -public: - - TextFlow(TextPage *pageA, TextBlock *blk); - ~TextFlow(); - - // Add a block to the end of this flow. - void addBlock(TextBlock *blk); - - // Returns true if <blk> fits below <prevBlk> in the flow, i.e., (1) - // it uses a font no larger than the last block added to the flow, - // and (2) it fits within the flow's [priMin, priMax] along the - // primary axis. - GBool blockFits(TextBlock *blk, TextBlock *prevBlk); - -private: - - TextPage *page; // the parent page - double xMin, xMax; // bounding box x coordinates - double yMin, yMax; // bounding box y coordinates - double priMin, priMax; // whitespace bounding box along primary axis - TextBlock *blocks; // blocks in flow - TextBlock *lastBlk; // last block in this flow - TextFlow *next; - - friend class TextWordList; - friend class TextPage; -}; - -#if TEXTOUT_WORD_LIST - -//------------------------------------------------------------------------ -// TextWordList -//------------------------------------------------------------------------ - -class TextWordList { -public: - - // Build a flat word list, in content stream order (if - // text->rawOrder is true), physical layout order (if <physLayout> - // is true and text->rawOrder is false), or reading order (if both - // flags are false). - TextWordList(TextPage *text, GBool physLayout); - - ~TextWordList(); - - // Return the number of words on the list. - int getLength(); - - // Return the <idx>th word from the list. - TextWord *get(int idx); - -private: - - GList *words; -}; - -#endif // TEXTOUT_WORD_LIST - -//------------------------------------------------------------------------ -// TextPage -//------------------------------------------------------------------------ - -class TextPage { -public: - - // Constructor. - TextPage(GBool rawOrderA); - - // Destructor. - ~TextPage(); - - // Start a new page. - void startPage(GfxState *state); - - // End the current page. - void endPage(); - - // Update the current font. - void updateFont(GfxState *state); - - // Begin a new word. - void beginWord(GfxState *state, double x0, double y0); - - // Add a character to the current word. - void addChar(GfxState *state, double x, double y, - double dx, double dy, - CharCode c, Unicode *u, int uLen); - - // End the current word, sorting it into the list of words. - void endWord(); - - // Add a word, sorting it into the list of words. - void addWord(TextWord *word); - - // Coalesce strings that look like parts of the same line. - void coalesce(GBool physLayout); - - // Find a string. If <startAtTop> is true, starts looking at the - // top of the page; else if <startAtLast> is true, starts looking - // immediately after the last find result; else starts looking at - // <xMin>,<yMin>. If <stopAtBottom> is true, stops looking at the - // bottom of the page; else if <stopAtLast> is true, stops looking - // just before the last find result; else stops looking at - // <xMax>,<yMax>. - GBool findText(Unicode *s, int len, - GBool startAtTop, GBool stopAtBottom, - GBool startAtLast, GBool stopAtLast, - double *xMin, double *yMin, - double *xMax, double *yMax); - - // Get the text which is inside the specified rectangle. - GString *getText(double xMin, double yMin, - double xMax, double yMax); - - // Find a string by character position and length. If found, sets - // the text bounding rectangle and returns true; otherwise returns - // false. - GBool findCharRange(int pos, int length, - double *xMin, double *yMin, - double *xMax, double *yMax); - - // Dump contents of page to a file. - void dump(void *outputStream, TextOutputFunc outputFunc, - GBool physLayout); - -#if TEXTOUT_WORD_LIST - // Build a flat word list, in content stream order (if - // this->rawOrder is true), physical layout order (if <physLayout> - // is true and this->rawOrder is false), or reading order (if both - // flags are false). - TextWordList *makeWordList(GBool physLayout); -#endif - -private: - - void clear(); - void assignColumns(TextLineFrag *frags, int nFrags, int rot); - int dumpFragment(Unicode *text, int len, UnicodeMap *uMap, GString *s); - - GBool rawOrder; // keep text in content stream order - - double pageWidth, pageHeight; // width and height of current page - TextWord *curWord; // currently active string - int charPos; // next character position (within content - // stream) - TextFontInfo *curFont; // current font - double curFontSize; // current font size - int nest; // current nesting level (for Type 3 fonts) - int nTinyChars; // number of "tiny" chars seen so far - GBool lastCharOverlap; // set if the last added char overlapped the - // previous char - - TextPool *pools[4]; // a "pool" of TextWords for each rotation - TextFlow *flows; // linked list of flows - TextBlock **blocks; // array of blocks, in yx order - int nBlocks; // number of blocks - int primaryRot; // primary rotation - GBool primaryLR; // primary direction (true means L-to-R, - // false means R-to-L) - TextWord *rawWords; // list of words, in raw order (only if - // rawOrder is set) - TextWord *rawLastWord; // last word on rawWords list - - GList *fonts; // all font info objects used on this - // page [TextFontInfo] - - double lastFindXMin, // coordinates of the last "find" result - lastFindYMin; - GBool haveLastFind; - - friend class TextLine; - friend class TextLineFrag; - friend class TextBlock; - friend class TextFlow; - friend class TextWordList; -}; - -//------------------------------------------------------------------------ -// TextOutputDev -//------------------------------------------------------------------------ - -class TextOutputDev: public OutputDev { -public: - - // Open a text output file. If <fileName> is NULL, no file is - // written (this is useful, e.g., for searching text). If - // <physLayoutA> is true, the original physical layout of the text - // is maintained. If <rawOrder> is true, the text is kept in - // content stream order. - TextOutputDev(char *fileName, GBool physLayoutA, - GBool rawOrderA, GBool append); - - // Create a TextOutputDev which will write to a generic stream. If - // <physLayoutA> is true, the original physical layout of the text - // is maintained. If <rawOrder> is true, the text is kept in - // content stream order. - TextOutputDev(TextOutputFunc func, void *stream, - GBool physLayoutA, GBool rawOrderA); - - // Destructor. - virtual ~TextOutputDev(); - - // Check if file was successfully created. - virtual GBool isOk() { return ok; } - - //---- get info about output device - - // Does this device use upside-down coordinates? - // (Upside-down means (0,0) is the top left corner of the page.) - virtual GBool upsideDown() { return gTrue; } - - // Does this device use drawChar() or drawString()? - virtual GBool useDrawChar() { return gTrue; } - - // Does this device use beginType3Char/endType3Char? Otherwise, - // text in Type 3 fonts will be drawn with drawChar/drawString. - virtual GBool interpretType3Chars() { return gFalse; } - - // Does this device need non-text content? - virtual GBool needNonText() { return gFalse; } - - //----- initialization and control - - // Start a page. - virtual void startPage(int pageNum, GfxState *state); - - // End a page. - virtual void endPage(); - - //----- update text state - virtual void updateFont(GfxState *state); - - //----- text drawing - virtual void beginString(GfxState *state, GString *s); - virtual void endString(GfxState *state); - virtual void drawChar(GfxState *state, double x, double y, - double dx, double dy, - double originX, double originY, - CharCode c, Unicode *u, int uLen); - - //----- special access - - // Find a string. If <startAtTop> is true, starts looking at the - // top of the page; else if <startAtLast> is true, starts looking - // immediately after the last find result; else starts looking at - // <xMin>,<yMin>. If <stopAtBottom> is true, stops looking at the - // bottom of the page; else if <stopAtLast> is true, stops looking - // just before the last find result; else stops looking at - // <xMax>,<yMax>. - GBool findText(Unicode *s, int len, - GBool startAtTop, GBool stopAtBottom, - GBool startAtLast, GBool stopAtLast, - double *xMin, double *yMin, - double *xMax, double *yMax); - - // Get the text which is inside the specified rectangle. - GString *getText(double xMin, double yMin, - double xMax, double yMax); - - // Find a string by character position and length. If found, sets - // the text bounding rectangle and returns true; otherwise returns - // false. - GBool findCharRange(int pos, int length, - double *xMin, double *yMin, - double *xMax, double *yMax); - -#if TEXTOUT_WORD_LIST - // Build a flat word list, in content stream order (if - // this->rawOrder is true), physical layout order (if - // this->physLayout is true and this->rawOrder is false), or reading - // order (if both flags are false). - TextWordList *makeWordList(); -#endif - -private: - - TextOutputFunc outputFunc; // output function - void *outputStream; // output stream - GBool needClose; // need to close the output file? - // (only if outputStream is a FILE*) - TextPage *text; // text for the current page - GBool physLayout; // maintain original physical layout when - // dumping text - GBool rawOrder; // keep text in content stream order - GBool ok; // set up ok? -}; - -#endif |