diff options
author | Arturo Espinosa <unammx@src.gnome.org> | 1999-04-17 02:59:58 (GMT) |
---|---|---|
committer | Arturo Espinosa <unammx@src.gnome.org> | 1999-04-17 02:59:58 (GMT) |
commit | d9f9a6449f377b4c933b75d57541b19c6d088994 (patch) | |
tree | 04f7f0c54447ef792fbf83bc5039174f4681b3bb /pdf/xpdf/pdftotext.cc |
Initial revision
Diffstat (limited to 'pdf/xpdf/pdftotext.cc')
-rw-r--r-- | pdf/xpdf/pdftotext.cc | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/pdf/xpdf/pdftotext.cc b/pdf/xpdf/pdftotext.cc new file mode 100644 index 0000000..aa38a8b --- /dev/null +++ b/pdf/xpdf/pdftotext.cc @@ -0,0 +1,114 @@ +//======================================================================== +// +// pdftotext.cc +// +// Copyright 1997 Derek B. Noonburg +// +//======================================================================== + +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#include "parseargs.h" +#include "GString.h" +#include "gmem.h" +#include "Object.h" +#include "Stream.h" +#include "Array.h" +#include "Dict.h" +#include "XRef.h" +#include "Catalog.h" +#include "Page.h" +#include "PDFDoc.h" +#include "TextOutputDev.h" +#include "Params.h" +#include "Error.h" +#include "config.h" + +static int firstPage = 1; +static int lastPage = 0; +static GBool useASCII7 = gFalse; +GBool printCommands = gFalse; +static GBool printHelp = gFalse; + +static ArgDesc argDesc[] = { + {"-f", argInt, &firstPage, 0, + "first page to convert"}, + {"-l", argInt, &lastPage, 0, + "last page to convert"}, + {"-ascii7", argFlag, &useASCII7, 0, + "convert to 7-bit ASCII (default is 8-bit ISO Latin-1)"}, + {"-h", argFlag, &printHelp, 0, + "print usage information"}, + {"-help", argFlag, &printHelp, 0, + "print usage information"}, + {NULL} +}; + +int main(int argc, char *argv[]) { + PDFDoc *doc; + GString *fileName; + GString *textFileName; + TextOutputDev *textOut; + GBool ok; + char *p; + + // parse args + ok = parseArgs(argDesc, &argc, argv); + if (!ok || argc < 2 || argc > 3 || printHelp) { + fprintf(stderr, "pdftotext version %s\n", xpdfVersion); + fprintf(stderr, "%s\n", xpdfCopyright); + printUsage("pdftotext", "<PDF-file> [<text-file>]", argDesc); + exit(1); + } + fileName = new GString(argv[1]); + + // init error file + errorInit(); + + // read config file + initParams(xpdfConfigFile); + + // open PDF file + xref = NULL; + doc = new PDFDoc(fileName); + if (!doc->isOk()) + exit(1); + + // construct text file name + if (argc == 3) { + textFileName = new GString(argv[2]); + } else { + p = fileName->getCString() + fileName->getLength() - 4; + if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")) + textFileName = new GString(fileName->getCString(), + fileName->getLength() - 4); + else + textFileName = fileName->copy(); + textFileName->append(".txt"); + } + + // get page range + if (firstPage < 1) + firstPage = 1; + if (lastPage < 1 || lastPage > doc->getNumPages()) + lastPage = doc->getNumPages(); + + // write text file + textOut = new TextOutputDev(textFileName->getCString(), useASCII7); + if (textOut->isOk()) + doc->displayPages(textOut, firstPage, lastPage, 72, 0); + delete textOut; + + // clean up + delete textFileName; + delete doc; + freeParams(); + + // check for memory leaks + Object::memCheck(errFile); + gMemReport(errFile); + + return 0; +} |