Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/blahtexml
diff options
context:
space:
mode:
authorChris Ball <cjb@laptop.org>2008-05-28 00:11:19 (GMT)
committer Chris Ball <cjb@laptop.org>2008-05-28 00:11:19 (GMT)
commit833eceb890add1adee101662493ca24b70336386 (patch)
tree402fe46379b3c8a8262fd7063fa87071c5171ca8 /blahtexml
parentf1c0ea322eecb0e7d3527d5ec210411679848551 (diff)
Import blahtex-0.5 (GPLv2) sources.
Diffstat (limited to 'blahtexml')
-rw-r--r--blahtexml/GNU-FDL397
-rw-r--r--blahtexml/GNU-GPL340
-rw-r--r--blahtexml/HTML+Blahtex.xml22
-rw-r--r--blahtexml/HTML+MathML.xml20
-rw-r--r--blahtexml/ISTtoTeX.xslt50
-rw-r--r--blahtexml/InputSymbolTranslation.tex694
-rw-r--r--blahtexml/README33
-rw-r--r--blahtexml/example1.xml5
-rw-r--r--blahtexml/example2.xml5
-rw-r--r--blahtexml/example3.xml5
-rw-r--r--blahtexml/example4.xml4
-rw-r--r--blahtexml/logo.pngbin0 -> 29600 bytes
-rw-r--r--blahtexml/makefile145
-rw-r--r--blahtexml/manual.tex1857
-rw-r--r--blahtexml/source/BlahtexCore/ISTtoCpp.xslt46
-rw-r--r--blahtexml/source/BlahtexCore/InputSymbolTranslation.cpp55
-rw-r--r--blahtexml/source/BlahtexCore/InputSymbolTranslation.h33
-rw-r--r--blahtexml/source/BlahtexCore/InputSymbolTranslation.inc344
-rw-r--r--blahtexml/source/BlahtexCore/InputSymbolTranslation.xml367
-rw-r--r--blahtexml/source/BlahtexCore/Interface.cpp63
-rw-r--r--blahtexml/source/BlahtexCore/Interface.h90
-rw-r--r--blahtexml/source/BlahtexCore/LayoutTree.cpp1677
-rw-r--r--blahtexml/source/BlahtexCore/LayoutTree.h670
-rw-r--r--blahtexml/source/BlahtexCore/MacroProcessor.cpp332
-rw-r--r--blahtexml/source/BlahtexCore/MacroProcessor.h125
-rw-r--r--blahtexml/source/BlahtexCore/Manager.cpp553
-rw-r--r--blahtexml/source/BlahtexCore/Manager.h127
-rw-r--r--blahtexml/source/BlahtexCore/MathmlNode.cpp330
-rw-r--r--blahtexml/source/BlahtexCore/MathmlNode.h173
-rw-r--r--blahtexml/source/BlahtexCore/Misc.h222
-rw-r--r--blahtexml/source/BlahtexCore/ParseTree.h896
-rw-r--r--blahtexml/source/BlahtexCore/ParseTree1.cpp1290
-rw-r--r--blahtexml/source/BlahtexCore/ParseTree2.cpp1089
-rw-r--r--blahtexml/source/BlahtexCore/ParseTree3.cpp1317
-rw-r--r--blahtexml/source/BlahtexCore/Parser.cpp1643
-rw-r--r--blahtexml/source/BlahtexCore/Parser.h142
-rw-r--r--blahtexml/source/BlahtexCore/XmlEncode.cpp694
-rw-r--r--blahtexml/source/BlahtexCore/XmlEncode.h42
-rw-r--r--blahtexml/source/BlahtexXMLin/AttributesImpl.cpp131
-rw-r--r--blahtexml/source/BlahtexXMLin/AttributesImpl.h63
-rw-r--r--blahtexml/source/BlahtexXMLin/BlahtexFilter.cpp171
-rw-r--r--blahtexml/source/BlahtexXMLin/BlahtexFilter.h51
-rw-r--r--blahtexml/source/BlahtexXMLin/SAX2Output.cpp165
-rw-r--r--blahtexml/source/BlahtexXMLin/SAX2Output.h57
-rw-r--r--blahtexml/source/BlahtexXMLin/XercesString.cpp90
-rw-r--r--blahtexml/source/BlahtexXMLin/XercesString.h40
-rw-r--r--blahtexml/source/Messages.cpp328
-rw-r--r--blahtexml/source/UnicodeConverter.cpp222
-rw-r--r--blahtexml/source/UnicodeConverter.h70
-rw-r--r--blahtexml/source/main.cpp694
-rw-r--r--blahtexml/source/mainPng.cpp213
-rw-r--r--blahtexml/source/mainPng.h62
-rw-r--r--blahtexml/source/md5.c381
-rw-r--r--blahtexml/source/md5.h91
-rw-r--r--blahtexml/source/md5Wrapper.cpp53
-rw-r--r--blahtexml/source/md5Wrapper.h36
56 files changed, 18815 insertions, 0 deletions
diff --git a/blahtexml/GNU-FDL b/blahtexml/GNU-FDL
new file mode 100644
index 0000000..4a0fe1c
--- /dev/null
+++ b/blahtexml/GNU-FDL
@@ -0,0 +1,397 @@
+ GNU Free Documentation License
+ Version 1.2, November 2002
+
+
+ Copyright (C) 2000,2001,2002 Free Software Foundation, Inc.
+ 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+0. PREAMBLE
+
+The purpose of this License is to make a manual, textbook, or other
+functional and useful document "free" in the sense of freedom: to
+assure everyone the effective freedom to copy and redistribute it,
+with or without modifying it, either commercially or noncommercially.
+Secondarily, this License preserves for the author and publisher a way
+to get credit for their work, while not being considered responsible
+for modifications made by others.
+
+This License is a kind of "copyleft", which means that derivative
+works of the document must themselves be free in the same sense. It
+complements the GNU General Public License, which is a copyleft
+license designed for free software.
+
+We have designed this License in order to use it for manuals for free
+software, because free software needs free documentation: a free
+program should come with manuals providing the same freedoms that the
+software does. But this License is not limited to software manuals;
+it can be used for any textual work, regardless of subject matter or
+whether it is published as a printed book. We recommend this License
+principally for works whose purpose is instruction or reference.
+
+
+1. APPLICABILITY AND DEFINITIONS
+
+This License applies to any manual or other work, in any medium, that
+contains a notice placed by the copyright holder saying it can be
+distributed under the terms of this License. Such a notice grants a
+world-wide, royalty-free license, unlimited in duration, to use that
+work under the conditions stated herein. The "Document", below,
+refers to any such manual or work. Any member of the public is a
+licensee, and is addressed as "you". You accept the license if you
+copy, modify or distribute the work in a way requiring permission
+under copyright law.
+
+A "Modified Version" of the Document means any work containing the
+Document or a portion of it, either copied verbatim, or with
+modifications and/or translated into another language.
+
+A "Secondary Section" is a named appendix or a front-matter section of
+the Document that deals exclusively with the relationship of the
+publishers or authors of the Document to the Document's overall subject
+(or to related matters) and contains nothing that could fall directly
+within that overall subject. (Thus, if the Document is in part a
+textbook of mathematics, a Secondary Section may not explain any
+mathematics.) The relationship could be a matter of historical
+connection with the subject or with related matters, or of legal,
+commercial, philosophical, ethical or political position regarding
+them.
+
+The "Invariant Sections" are certain Secondary Sections whose titles
+are designated, as being those of Invariant Sections, in the notice
+that says that the Document is released under this License. If a
+section does not fit the above definition of Secondary then it is not
+allowed to be designated as Invariant. The Document may contain zero
+Invariant Sections. If the Document does not identify any Invariant
+Sections then there are none.
+
+The "Cover Texts" are certain short passages of text that are listed,
+as Front-Cover Texts or Back-Cover Texts, in the notice that says that
+the Document is released under this License. A Front-Cover Text may
+be at most 5 words, and a Back-Cover Text may be at most 25 words.
+
+A "Transparent" copy of the Document means a machine-readable copy,
+represented in a format whose specification is available to the
+general public, that is suitable for revising the document
+straightforwardly with generic text editors or (for images composed of
+pixels) generic paint programs or (for drawings) some widely available
+drawing editor, and that is suitable for input to text formatters or
+for automatic translation to a variety of formats suitable for input
+to text formatters. A copy made in an otherwise Transparent file
+format whose markup, or absence of markup, has been arranged to thwart
+or discourage subsequent modification by readers is not Transparent.
+An image format is not Transparent if used for any substantial amount
+of text. A copy that is not "Transparent" is called "Opaque".
+
+Examples of suitable formats for Transparent copies include plain
+ASCII without markup, Texinfo input format, LaTeX input format, SGML
+or XML using a publicly available DTD, and standard-conforming simple
+HTML, PostScript or PDF designed for human modification. Examples of
+transparent image formats include PNG, XCF and JPG. Opaque formats
+include proprietary formats that can be read and edited only by
+proprietary word processors, SGML or XML for which the DTD and/or
+processing tools are not generally available, and the
+machine-generated HTML, PostScript or PDF produced by some word
+processors for output purposes only.
+
+The "Title Page" means, for a printed book, the title page itself,
+plus such following pages as are needed to hold, legibly, the material
+this License requires to appear in the title page. For works in
+formats which do not have any title page as such, "Title Page" means
+the text near the most prominent appearance of the work's title,
+preceding the beginning of the body of the text.
+
+A section "Entitled XYZ" means a named subunit of the Document whose
+title either is precisely XYZ or contains XYZ in parentheses following
+text that translates XYZ in another language. (Here XYZ stands for a
+specific section name mentioned below, such as "Acknowledgements",
+"Dedications", "Endorsements", or "History".) To "Preserve the Title"
+of such a section when you modify the Document means that it remains a
+section "Entitled XYZ" according to this definition.
+
+The Document may include Warranty Disclaimers next to the notice which
+states that this License applies to the Document. These Warranty
+Disclaimers are considered to be included by reference in this
+License, but only as regards disclaiming warranties: any other
+implication that these Warranty Disclaimers may have is void and has
+no effect on the meaning of this License.
+
+
+2. VERBATIM COPYING
+
+You may copy and distribute the Document in any medium, either
+commercially or noncommercially, provided that this License, the
+copyright notices, and the license notice saying this License applies
+to the Document are reproduced in all copies, and that you add no other
+conditions whatsoever to those of this License. You may not use
+technical measures to obstruct or control the reading or further
+copying of the copies you make or distribute. However, you may accept
+compensation in exchange for copies. If you distribute a large enough
+number of copies you must also follow the conditions in section 3.
+
+You may also lend copies, under the same conditions stated above, and
+you may publicly display copies.
+
+
+3. COPYING IN QUANTITY
+
+If you publish printed copies (or copies in media that commonly have
+printed covers) of the Document, numbering more than 100, and the
+Document's license notice requires Cover Texts, you must enclose the
+copies in covers that carry, clearly and legibly, all these Cover
+Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on
+the back cover. Both covers must also clearly and legibly identify
+you as the publisher of these copies. The front cover must present
+the full title with all words of the title equally prominent and
+visible. You may add other material on the covers in addition.
+Copying with changes limited to the covers, as long as they preserve
+the title of the Document and satisfy these conditions, can be treated
+as verbatim copying in other respects.
+
+If the required texts for either cover are too voluminous to fit
+legibly, you should put the first ones listed (as many as fit
+reasonably) on the actual cover, and continue the rest onto adjacent
+pages.
+
+If you publish or distribute Opaque copies of the Document numbering
+more than 100, you must either include a machine-readable Transparent
+copy along with each Opaque copy, or state in or with each Opaque copy
+a computer-network location from which the general network-using
+public has access to download using public-standard network protocols
+a complete Transparent copy of the Document, free of added material.
+If you use the latter option, you must take reasonably prudent steps,
+when you begin distribution of Opaque copies in quantity, to ensure
+that this Transparent copy will remain thus accessible at the stated
+location until at least one year after the last time you distribute an
+Opaque copy (directly or through your agents or retailers) of that
+edition to the public.
+
+It is requested, but not required, that you contact the authors of the
+Document well before redistributing any large number of copies, to give
+them a chance to provide you with an updated version of the Document.
+
+
+4. MODIFICATIONS
+
+You may copy and distribute a Modified Version of the Document under
+the conditions of sections 2 and 3 above, provided that you release
+the Modified Version under precisely this License, with the Modified
+Version filling the role of the Document, thus licensing distribution
+and modification of the Modified Version to whoever possesses a copy
+of it. In addition, you must do these things in the Modified Version:
+
+A. Use in the Title Page (and on the covers, if any) a title distinct
+ from that of the Document, and from those of previous versions
+ (which should, if there were any, be listed in the History section
+ of the Document). You may use the same title as a previous version
+ if the original publisher of that version gives permission.
+B. List on the Title Page, as authors, one or more persons or entities
+ responsible for authorship of the modifications in the Modified
+ Version, together with at least five of the principal authors of the
+ Document (all of its principal authors, if it has fewer than five),
+ unless they release you from this requirement.
+C. State on the Title page the name of the publisher of the
+ Modified Version, as the publisher.
+D. Preserve all the copyright notices of the Document.
+E. Add an appropriate copyright notice for your modifications
+ adjacent to the other copyright notices.
+F. Include, immediately after the copyright notices, a license notice
+ giving the public permission to use the Modified Version under the
+ terms of this License, in the form shown in the Addendum below.
+G. Preserve in that license notice the full lists of Invariant Sections
+ and required Cover Texts given in the Document's license notice.
+H. Include an unaltered copy of this License.
+I. Preserve the section Entitled "History", Preserve its Title, and add
+ to it an item stating at least the title, year, new authors, and
+ publisher of the Modified Version as given on the Title Page. If
+ there is no section Entitled "History" in the Document, create one
+ stating the title, year, authors, and publisher of the Document as
+ given on its Title Page, then add an item describing the Modified
+ Version as stated in the previous sentence.
+J. Preserve the network location, if any, given in the Document for
+ public access to a Transparent copy of the Document, and likewise
+ the network locations given in the Document for previous versions
+ it was based on. These may be placed in the "History" section.
+ You may omit a network location for a work that was published at
+ least four years before the Document itself, or if the original
+ publisher of the version it refers to gives permission.
+K. For any section Entitled "Acknowledgements" or "Dedications",
+ Preserve the Title of the section, and preserve in the section all
+ the substance and tone of each of the contributor acknowledgements
+ and/or dedications given therein.
+L. Preserve all the Invariant Sections of the Document,
+ unaltered in their text and in their titles. Section numbers
+ or the equivalent are not considered part of the section titles.
+M. Delete any section Entitled "Endorsements". Such a section
+ may not be included in the Modified Version.
+N. Do not retitle any existing section to be Entitled "Endorsements"
+ or to conflict in title with any Invariant Section.
+O. Preserve any Warranty Disclaimers.
+
+If the Modified Version includes new front-matter sections or
+appendices that qualify as Secondary Sections and contain no material
+copied from the Document, you may at your option designate some or all
+of these sections as invariant. To do this, add their titles to the
+list of Invariant Sections in the Modified Version's license notice.
+These titles must be distinct from any other section titles.
+
+You may add a section Entitled "Endorsements", provided it contains
+nothing but endorsements of your Modified Version by various
+parties--for example, statements of peer review or that the text has
+been approved by an organization as the authoritative definition of a
+standard.
+
+You may add a passage of up to five words as a Front-Cover Text, and a
+passage of up to 25 words as a Back-Cover Text, to the end of the list
+of Cover Texts in the Modified Version. Only one passage of
+Front-Cover Text and one of Back-Cover Text may be added by (or
+through arrangements made by) any one entity. If the Document already
+includes a cover text for the same cover, previously added by you or
+by arrangement made by the same entity you are acting on behalf of,
+you may not add another; but you may replace the old one, on explicit
+permission from the previous publisher that added the old one.
+
+The author(s) and publisher(s) of the Document do not by this License
+give permission to use their names for publicity for or to assert or
+imply endorsement of any Modified Version.
+
+
+5. COMBINING DOCUMENTS
+
+You may combine the Document with other documents released under this
+License, under the terms defined in section 4 above for modified
+versions, provided that you include in the combination all of the
+Invariant Sections of all of the original documents, unmodified, and
+list them all as Invariant Sections of your combined work in its
+license notice, and that you preserve all their Warranty Disclaimers.
+
+The combined work need only contain one copy of this License, and
+multiple identical Invariant Sections may be replaced with a single
+copy. If there are multiple Invariant Sections with the same name but
+different contents, make the title of each such section unique by
+adding at the end of it, in parentheses, the name of the original
+author or publisher of that section if known, or else a unique number.
+Make the same adjustment to the section titles in the list of
+Invariant Sections in the license notice of the combined work.
+
+In the combination, you must combine any sections Entitled "History"
+in the various original documents, forming one section Entitled
+"History"; likewise combine any sections Entitled "Acknowledgements",
+and any sections Entitled "Dedications". You must delete all sections
+Entitled "Endorsements".
+
+
+6. COLLECTIONS OF DOCUMENTS
+
+You may make a collection consisting of the Document and other documents
+released under this License, and replace the individual copies of this
+License in the various documents with a single copy that is included in
+the collection, provided that you follow the rules of this License for
+verbatim copying of each of the documents in all other respects.
+
+You may extract a single document from such a collection, and distribute
+it individually under this License, provided you insert a copy of this
+License into the extracted document, and follow this License in all
+other respects regarding verbatim copying of that document.
+
+
+7. AGGREGATION WITH INDEPENDENT WORKS
+
+A compilation of the Document or its derivatives with other separate
+and independent documents or works, in or on a volume of a storage or
+distribution medium, is called an "aggregate" if the copyright
+resulting from the compilation is not used to limit the legal rights
+of the compilation's users beyond what the individual works permit.
+When the Document is included in an aggregate, this License does not
+apply to the other works in the aggregate which are not themselves
+derivative works of the Document.
+
+If the Cover Text requirement of section 3 is applicable to these
+copies of the Document, then if the Document is less than one half of
+the entire aggregate, the Document's Cover Texts may be placed on
+covers that bracket the Document within the aggregate, or the
+electronic equivalent of covers if the Document is in electronic form.
+Otherwise they must appear on printed covers that bracket the whole
+aggregate.
+
+
+8. TRANSLATION
+
+Translation is considered a kind of modification, so you may
+distribute translations of the Document under the terms of section 4.
+Replacing Invariant Sections with translations requires special
+permission from their copyright holders, but you may include
+translations of some or all Invariant Sections in addition to the
+original versions of these Invariant Sections. You may include a
+translation of this License, and all the license notices in the
+Document, and any Warranty Disclaimers, provided that you also include
+the original English version of this License and the original versions
+of those notices and disclaimers. In case of a disagreement between
+the translation and the original version of this License or a notice
+or disclaimer, the original version will prevail.
+
+If a section in the Document is Entitled "Acknowledgements",
+"Dedications", or "History", the requirement (section 4) to Preserve
+its Title (section 1) will typically require changing the actual
+title.
+
+
+9. TERMINATION
+
+You may not copy, modify, sublicense, or distribute the Document except
+as expressly provided for under this License. Any other attempt to
+copy, modify, sublicense or distribute the Document is void, and will
+automatically terminate your rights under this License. However,
+parties who have received copies, or rights, from you under this
+License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+
+10. FUTURE REVISIONS OF THIS LICENSE
+
+The Free Software Foundation may publish new, revised versions
+of the GNU Free Documentation License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns. See
+http://www.gnu.org/copyleft/.
+
+Each version of the License is given a distinguishing version number.
+If the Document specifies that a particular numbered version of this
+License "or any later version" applies to it, you have the option of
+following the terms and conditions either of that specified version or
+of any later version that has been published (not as a draft) by the
+Free Software Foundation. If the Document does not specify a version
+number of this License, you may choose any version ever published (not
+as a draft) by the Free Software Foundation.
+
+
+ADDENDUM: How to use this License for your documents
+
+To use this License in a document you have written, include a copy of
+the License in the document and put the following copyright and
+license notices just after the title page:
+
+ Copyright (c) YEAR YOUR NAME.
+ Permission is granted to copy, distribute and/or modify this document
+ under the terms of the GNU Free Documentation License, Version 1.2
+ or any later version published by the Free Software Foundation;
+ with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts.
+ A copy of the license is included in the section entitled "GNU
+ Free Documentation License".
+
+If you have Invariant Sections, Front-Cover Texts and Back-Cover Texts,
+replace the "with...Texts." line with this:
+
+ with the Invariant Sections being LIST THEIR TITLES, with the
+ Front-Cover Texts being LIST, and with the Back-Cover Texts being LIST.
+
+If you have Invariant Sections without Cover Texts, or some other
+combination of the three, merge those two alternatives to suit the
+situation.
+
+If your document contains nontrivial examples of program code, we
+recommend releasing these examples in parallel under your choice of
+free software license, such as the GNU General Public License,
+to permit their use in free software.
diff --git a/blahtexml/GNU-GPL b/blahtexml/GNU-GPL
new file mode 100644
index 0000000..3912109
--- /dev/null
+++ b/blahtexml/GNU-GPL
@@ -0,0 +1,340 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/blahtexml/HTML+Blahtex.xml b/blahtexml/HTML+Blahtex.xml
new file mode 100644
index 0000000..5ee3d14
--- /dev/null
+++ b/blahtexml/HTML+Blahtex.xml
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<html
+ xmlns="http://www.w3.org/1999/xhtml"
+ xmlns:b="http://gva.noekeon.org/blahtexml"
+ xml:lang="en">
+<head>
+ <meta http-equiv="Content-Type" content="text/xml; charset=UTF-8"/>
+ <title>Example of Blahtex's processing</title>
+</head>
+<style type="text/css">
+.ieq, .eq {
+ color: blue;
+}
+</style>
+<body>
+<h1>Using <code>blahtexml --xmlin</code></h1>
+
+<p>In general, the distance of a point with coordinates <span class="ieq" b:inline="(x,y)"/> to the origin <span class="ieq" b:inline="(0,0)"/> is <span class="ieq" b:inline="\sqrt{x^2+y^2}"/>. A circle of radius <span class="ieq" b:inline="r"/> is the set of points that are at a distance <span class="ieq" b:inline="r"/> of its center. So, the equation of a circle of radius <span class="ieq" b:inline="r"/> centered at the origin is <span class="eq" b:block="x^2+y^2=r^2\text{.}"/></p>
+
+</body>
+
+</html>
diff --git a/blahtexml/HTML+MathML.xml b/blahtexml/HTML+MathML.xml
new file mode 100644
index 0000000..9d33038
--- /dev/null
+++ b/blahtexml/HTML+MathML.xml
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN" "http://www.w3.org/TR/MathML2/dtd/xhtml-math11-f.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xmlns:b="http://gva.noekeon.org/blahtexml" xml:lang="en">
+<head>
+ <meta http-equiv="Content-Type" content="text/xml; charset=UTF-8"></meta>
+ <title>Example of Blahtex's processing</title>
+</head>
+<style type="text/css">
+.ieq, .eq {
+ color: blue;
+}
+</style>
+<body>
+<h1>Using <code>blahtexml --xmlin</code></h1>
+
+<p>In general, the distance of a point with coordinates <span class="ieq"><math xmlns="http://www.w3.org/1998/Math/MathML"><mo lspace="0" rspace="0" stretchy="false">(</mo><mi>x</mi><mo lspace="0" rspace="0.167em">,</mo><mi>y</mi><mo lspace="0" rspace="0" stretchy="false">)</mo></math></span> to the origin <span class="ieq"><math xmlns="http://www.w3.org/1998/Math/MathML"><mo lspace="0" rspace="0" stretchy="false">(</mo><mn>0</mn><mo lspace="0" rspace="0.167em">,</mo><mn>0</mn><mo lspace="0" rspace="0" stretchy="false">)</mo></math></span> is <span class="ieq"><math xmlns="http://www.w3.org/1998/Math/MathML"><msqrt><msup><mi>x</mi><mn>2</mn></msup><mo lspace="0.222em" rspace="0.222em">+</mo><msup><mi>y</mi><mn>2</mn></msup></msqrt></math></span>. A circle of radius <span class="ieq"><math xmlns="http://www.w3.org/1998/Math/MathML"><mi>r</mi></math></span> is the set of points that are at a distance <span class="ieq"><math xmlns="http://www.w3.org/1998/Math/MathML"><mi>r</mi></math></span> of its center. So, the equation of a circle of radius <span class="ieq"><math xmlns="http://www.w3.org/1998/Math/MathML"><mi>r</mi></math></span> centered at the origin is <span class="eq"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><msup><mi>x</mi><mn>2</mn></msup><mo lspace="0.222em" rspace="0.222em">+</mo><msup><mi>y</mi><mn>2</mn></msup><mo lspace="0.278em" rspace="0.278em">=</mo><msup><mi>r</mi><mn>2</mn></msup><mtext>.</mtext></math></span></p>
+
+</body>
+
+</html> \ No newline at end of file
diff --git a/blahtexml/ISTtoTeX.xslt b/blahtexml/ISTtoTeX.xslt
new file mode 100644
index 0000000..3d297a0
--- /dev/null
+++ b/blahtexml/ISTtoTeX.xslt
@@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+// File "ISTtoTeX.xslt"
+//
+// blahtexml (version 0.5)
+// Copyright (C) 2008, Gilles Van Assche
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+-->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version='1.0'>
+
+<xsl:output method="text" indent="no" encoding="UTF-8"/>
+
+<xsl:template match="symbols">
+<xsl:text>\begin{longtable}{|l|l|l|}
+\hline
+Symbol &amp; Unicode &amp; Translated as \\
+\endhead
+\hline
+</xsl:text>
+ <xsl:apply-templates select="symbol"/>
+<xsl:text>\end{longtable}
+</xsl:text>
+</xsl:template>
+
+<xsl:template match="symbol">
+ <xsl:text>$</xsl:text>
+ <xsl:value-of select="@tex"/>
+ <xsl:text>$ &amp; \texttt{</xsl:text>
+ <xsl:value-of select="@unicode"/>
+ <xsl:text>} &amp; \verb|</xsl:text>
+ <xsl:value-of select="@tex"/>
+ <xsl:text>| \\
+\hline
+</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/blahtexml/InputSymbolTranslation.tex b/blahtexml/InputSymbolTranslation.tex
new file mode 100644
index 0000000..4595f7f
--- /dev/null
+++ b/blahtexml/InputSymbolTranslation.tex
@@ -0,0 +1,694 @@
+\begin{longtable}{|l|l|l|}
+\hline
+Symbol & Unicode & Translated as \\
+\endhead
+\hline
+$\lnot$ & \texttt{000000AC} & \verb|\lnot| \\
+\hline
+$\pm$ & \texttt{000000B1} & \verb|\pm| \\
+\hline
+$\times$ & \texttt{000000D7} & \verb|\times| \\
+\hline
+$\div$ & \texttt{000000F7} & \verb|\div| \\
+\hline
+$\Gamma$ & \texttt{00000393} & \verb|\Gamma| \\
+\hline
+$\Delta$ & \texttt{00000394} & \verb|\Delta| \\
+\hline
+$\Theta$ & \texttt{00000398} & \verb|\Theta| \\
+\hline
+$\Lambda$ & \texttt{0000039B} & \verb|\Lambda| \\
+\hline
+$\Xi$ & \texttt{0000039E} & \verb|\Xi| \\
+\hline
+$\Pi$ & \texttt{000003A0} & \verb|\Pi| \\
+\hline
+$\Sigma$ & \texttt{000003A3} & \verb|\Sigma| \\
+\hline
+$\Upsilon$ & \texttt{000003A5} & \verb|\Upsilon| \\
+\hline
+$\Phi$ & \texttt{000003A6} & \verb|\Phi| \\
+\hline
+$\Psi$ & \texttt{000003A8} & \verb|\Psi| \\
+\hline
+$\Omega$ & \texttt{000003A9} & \verb|\Omega| \\
+\hline
+$\alpha$ & \texttt{000003B1} & \verb|\alpha| \\
+\hline
+$\beta$ & \texttt{000003B2} & \verb|\beta| \\
+\hline
+$\gamma$ & \texttt{000003B3} & \verb|\gamma| \\
+\hline
+$\delta$ & \texttt{000003B4} & \verb|\delta| \\
+\hline
+$\varepsilon$ & \texttt{000003B5} & \verb|\varepsilon| \\
+\hline
+$\zeta$ & \texttt{000003B6} & \verb|\zeta| \\
+\hline
+$\eta$ & \texttt{000003B7} & \verb|\eta| \\
+\hline
+$\theta$ & \texttt{000003B8} & \verb|\theta| \\
+\hline
+$\iota$ & \texttt{000003B9} & \verb|\iota| \\
+\hline
+$\kappa$ & \texttt{000003BA} & \verb|\kappa| \\
+\hline
+$\lambda$ & \texttt{000003BB} & \verb|\lambda| \\
+\hline
+$\mu$ & \texttt{000003BC} & \verb|\mu| \\
+\hline
+$\nu$ & \texttt{000003BD} & \verb|\nu| \\
+\hline
+$\xi$ & \texttt{000003BE} & \verb|\xi| \\
+\hline
+$\pi$ & \texttt{000003C0} & \verb|\pi| \\
+\hline
+$\rho$ & \texttt{000003C1} & \verb|\rho| \\
+\hline
+$\varsigma$ & \texttt{000003C2} & \verb|\varsigma| \\
+\hline
+$\sigma$ & \texttt{000003C3} & \verb|\sigma| \\
+\hline
+$\tau$ & \texttt{000003C4} & \verb|\tau| \\
+\hline
+$\upsilon$ & \texttt{000003C5} & \verb|\upsilon| \\
+\hline
+$\varphi$ & \texttt{000003C6} & \verb|\varphi| \\
+\hline
+$\chi$ & \texttt{000003C7} & \verb|\chi| \\
+\hline
+$\psi$ & \texttt{000003C8} & \verb|\psi| \\
+\hline
+$\omega$ & \texttt{000003C9} & \verb|\omega| \\
+\hline
+$\vartheta$ & \texttt{000003D1} & \verb|\vartheta| \\
+\hline
+$\phi$ & \texttt{000003D5} & \verb|\phi| \\
+\hline
+$\varpi$ & \texttt{000003D6} & \verb|\varpi| \\
+\hline
+$\digamma$ & \texttt{000003DD} & \verb|\digamma| \\
+\hline
+$\varkappa$ & \texttt{000003F0} & \verb|\varkappa| \\
+\hline
+$\varrho$ & \texttt{000003F1} & \verb|\varrho| \\
+\hline
+$\epsilon$ & \texttt{000003F5} & \verb|\epsilon| \\
+\hline
+$\backepsilon$ & \texttt{000003F6} & \verb|\backepsilon| \\
+\hline
+$\dagger$ & \texttt{00002020} & \verb|\dagger| \\
+\hline
+$\ddagger$ & \texttt{00002021} & \verb|\ddagger| \\
+\hline
+$\bullet$ & \texttt{00002022} & \verb|\bullet| \\
+\hline
+$\dots$ & \texttt{00002026} & \verb|\dots| \\
+\hline
+$\prime$ & \texttt{00002032} & \verb|\prime| \\
+\hline
+$\backprime$ & \texttt{00002035} & \verb|\backprime| \\
+\hline
+$\leftarrow$ & \texttt{00002190} & \verb|\leftarrow| \\
+\hline
+$\uparrow$ & \texttt{00002191} & \verb|\uparrow| \\
+\hline
+$\rightarrow$ & \texttt{00002192} & \verb|\rightarrow| \\
+\hline
+$\downarrow$ & \texttt{00002193} & \verb|\downarrow| \\
+\hline
+$\leftrightarrow$ & \texttt{00002194} & \verb|\leftrightarrow| \\
+\hline
+$\updownarrow$ & \texttt{00002195} & \verb|\updownarrow| \\
+\hline
+$\nwarrow$ & \texttt{00002196} & \verb|\nwarrow| \\
+\hline
+$\nearrow$ & \texttt{00002197} & \verb|\nearrow| \\
+\hline
+$\searrow$ & \texttt{00002198} & \verb|\searrow| \\
+\hline
+$\swarrow$ & \texttt{00002199} & \verb|\swarrow| \\
+\hline
+$\nleftarrow$ & \texttt{0000219A} & \verb|\nleftarrow| \\
+\hline
+$\nrightarrow$ & \texttt{0000219B} & \verb|\nrightarrow| \\
+\hline
+$\rightsquigarrow$ & \texttt{0000219D} & \verb|\rightsquigarrow| \\
+\hline
+$\twoheadleftarrow$ & \texttt{0000219E} & \verb|\twoheadleftarrow| \\
+\hline
+$\twoheadrightarrow$ & \texttt{000021A0} & \verb|\twoheadrightarrow| \\
+\hline
+$\leftarrowtail$ & \texttt{000021A2} & \verb|\leftarrowtail| \\
+\hline
+$\rightarrowtail$ & \texttt{000021A3} & \verb|\rightarrowtail| \\
+\hline
+$\mapsto$ & \texttt{000021A6} & \verb|\mapsto| \\
+\hline
+$\hookleftarrow$ & \texttt{000021A9} & \verb|\hookleftarrow| \\
+\hline
+$\hookrightarrow$ & \texttt{000021AA} & \verb|\hookrightarrow| \\
+\hline
+$\looparrowleft$ & \texttt{000021AB} & \verb|\looparrowleft| \\
+\hline
+$\looparrowright$ & \texttt{000021AC} & \verb|\looparrowright| \\
+\hline
+$\leftrightsquigarrow$ & \texttt{000021AD} & \verb|\leftrightsquigarrow| \\
+\hline
+$\nleftrightarrow$ & \texttt{000021AE} & \verb|\nleftrightarrow| \\
+\hline
+$\Lsh$ & \texttt{000021B0} & \verb|\Lsh| \\
+\hline
+$\Rsh$ & \texttt{000021B1} & \verb|\Rsh| \\
+\hline
+$\curvearrowleft$ & \texttt{000021B6} & \verb|\curvearrowleft| \\
+\hline
+$\curvearrowright$ & \texttt{000021B7} & \verb|\curvearrowright| \\
+\hline
+$\circlearrowleft$ & \texttt{000021BA} & \verb|\circlearrowleft| \\
+\hline
+$\circlearrowright$ & \texttt{000021BB} & \verb|\circlearrowright| \\
+\hline
+$\leftharpoonup$ & \texttt{000021BC} & \verb|\leftharpoonup| \\
+\hline
+$\leftharpoondown$ & \texttt{000021BD} & \verb|\leftharpoondown| \\
+\hline
+$\upharpoonright$ & \texttt{000021BE} & \verb|\upharpoonright| \\
+\hline
+$\upharpoonleft$ & \texttt{000021BF} & \verb|\upharpoonleft| \\
+\hline
+$\rightharpoonup$ & \texttt{000021C0} & \verb|\rightharpoonup| \\
+\hline
+$\rightharpoondown$ & \texttt{000021C1} & \verb|\rightharpoondown| \\
+\hline
+$\downharpoonright$ & \texttt{000021C2} & \verb|\downharpoonright| \\
+\hline
+$\downharpoonleft$ & \texttt{000021C3} & \verb|\downharpoonleft| \\
+\hline
+$\rightleftarrows$ & \texttt{000021C4} & \verb|\rightleftarrows| \\
+\hline
+$\leftrightarrows$ & \texttt{000021C6} & \verb|\leftrightarrows| \\
+\hline
+$\leftleftarrows$ & \texttt{000021C7} & \verb|\leftleftarrows| \\
+\hline
+$\upuparrows$ & \texttt{000021C8} & \verb|\upuparrows| \\
+\hline
+$\rightrightarrows$ & \texttt{000021C9} & \verb|\rightrightarrows| \\
+\hline
+$\downdownarrows$ & \texttt{000021CA} & \verb|\downdownarrows| \\
+\hline
+$\leftrightharpoons$ & \texttt{000021CB} & \verb|\leftrightharpoons| \\
+\hline
+$\rightleftharpoons$ & \texttt{000021CC} & \verb|\rightleftharpoons| \\
+\hline
+$\nLeftarrow$ & \texttt{000021CD} & \verb|\nLeftarrow| \\
+\hline
+$\nLeftrightarrow$ & \texttt{000021CE} & \verb|\nLeftrightarrow| \\
+\hline
+$\nRightarrow$ & \texttt{000021CF} & \verb|\nRightarrow| \\
+\hline
+$\Leftarrow$ & \texttt{000021D0} & \verb|\Leftarrow| \\
+\hline
+$\Uparrow$ & \texttt{000021D1} & \verb|\Uparrow| \\
+\hline
+$\Rightarrow$ & \texttt{000021D2} & \verb|\Rightarrow| \\
+\hline
+$\Downarrow$ & \texttt{000021D3} & \verb|\Downarrow| \\
+\hline
+$\Leftrightarrow$ & \texttt{000021D4} & \verb|\Leftrightarrow| \\
+\hline
+$\Updownarrow$ & \texttt{000021D5} & \verb|\Updownarrow| \\
+\hline
+$\Lleftarrow$ & \texttt{000021DA} & \verb|\Lleftarrow| \\
+\hline
+$\Rrightarrow$ & \texttt{000021DB} & \verb|\Rrightarrow| \\
+\hline
+$\leadsto$ & \texttt{000021DD} & \verb|\leadsto| \\
+\hline
+$\forall$ & \texttt{00002200} & \verb|\forall| \\
+\hline
+$\complement$ & \texttt{00002201} & \verb|\complement| \\
+\hline
+$\exists$ & \texttt{00002203} & \verb|\exists| \\
+\hline
+$\nexists$ & \texttt{00002204} & \verb|\nexists| \\
+\hline
+$\nabla$ & \texttt{00002207} & \verb|\nabla| \\
+\hline
+$\in$ & \texttt{00002208} & \verb|\in| \\
+\hline
+$\notin$ & \texttt{00002209} & \verb|\notin| \\
+\hline
+$\ni$ & \texttt{0000220B} & \verb|\ni| \\
+\hline
+$\prod$ & \texttt{0000220F} & \verb|\prod| \\
+\hline
+$\coprod$ & \texttt{00002210} & \verb|\coprod| \\
+\hline
+$\sum$ & \texttt{00002211} & \verb|\sum| \\
+\hline
+$\mp$ & \texttt{00002213} & \verb|\mp| \\
+\hline
+$\dotplus$ & \texttt{00002214} & \verb|\dotplus| \\
+\hline
+$\circ$ & \texttt{00002218} & \verb|\circ| \\
+\hline
+$\surd$ & \texttt{0000221A} & \verb|\surd| \\
+\hline
+$\propto$ & \texttt{0000221D} & \verb|\propto| \\
+\hline
+$\angle$ & \texttt{00002220} & \verb|\angle| \\
+\hline
+$\measuredangle$ & \texttt{00002221} & \verb|\measuredangle| \\
+\hline
+$\sphericalangle$ & \texttt{00002222} & \verb|\sphericalangle| \\
+\hline
+$\nmid$ & \texttt{00002224} & \verb|\nmid| \\
+\hline
+$\parallel$ & \texttt{00002225} & \verb|\parallel| \\
+\hline
+$\nparallel$ & \texttt{00002226} & \verb|\nparallel| \\
+\hline
+$\wedge$ & \texttt{00002227} & \verb|\wedge| \\
+\hline
+$\vee$ & \texttt{00002228} & \verb|\vee| \\
+\hline
+$\cap$ & \texttt{00002229} & \verb|\cap| \\
+\hline
+$\cup$ & \texttt{0000222A} & \verb|\cup| \\
+\hline
+$\int$ & \texttt{0000222B} & \verb|\int| \\
+\hline
+$\iint$ & \texttt{0000222C} & \verb|\iint| \\
+\hline
+$\iiint$ & \texttt{0000222D} & \verb|\iiint| \\
+\hline
+$\oint$ & \texttt{0000222E} & \verb|\oint| \\
+\hline
+$\therefore$ & \texttt{00002234} & \verb|\therefore| \\
+\hline
+$\because$ & \texttt{00002235} & \verb|\because| \\
+\hline
+$\sim$ & \texttt{0000223C} & \verb|\sim| \\
+\hline
+$\backsim$ & \texttt{0000223D} & \verb|\backsim| \\
+\hline
+$\wr$ & \texttt{00002240} & \verb|\wr| \\
+\hline
+$\nsim$ & \texttt{00002241} & \verb|\nsim| \\
+\hline
+$\eqsim$ & \texttt{00002242} & \verb|\eqsim| \\
+\hline
+$\simeq$ & \texttt{00002243} & \verb|\simeq| \\
+\hline
+$\cong$ & \texttt{00002245} & \verb|\cong| \\
+\hline
+$\ncong$ & \texttt{00002247} & \verb|\ncong| \\
+\hline
+$\approx$ & \texttt{00002248} & \verb|\approx| \\
+\hline
+$\approxeq$ & \texttt{0000224A} & \verb|\approxeq| \\
+\hline
+$\Bumpeq$ & \texttt{0000224E} & \verb|\Bumpeq| \\
+\hline
+$\bumpeq$ & \texttt{0000224F} & \verb|\bumpeq| \\
+\hline
+$\doteq$ & \texttt{00002250} & \verb|\doteq| \\
+\hline
+$\doteqdot$ & \texttt{00002251} & \verb|\doteqdot| \\
+\hline
+$\fallingdotseq$ & \texttt{00002252} & \verb|\fallingdotseq| \\
+\hline
+$\risingdotseq$ & \texttt{00002253} & \verb|\risingdotseq| \\
+\hline
+$\eqcirc$ & \texttt{00002256} & \verb|\eqcirc| \\
+\hline
+$\circeq$ & \texttt{00002257} & \verb|\circeq| \\
+\hline
+$\triangleq$ & \texttt{0000225C} & \verb|\triangleq| \\
+\hline
+$\neq$ & \texttt{00002260} & \verb|\neq| \\
+\hline
+$\equiv$ & \texttt{00002261} & \verb|\equiv| \\
+\hline
+$\leq$ & \texttt{00002264} & \verb|\leq| \\
+\hline
+$\geq$ & \texttt{00002265} & \verb|\geq| \\
+\hline
+$\leqq$ & \texttt{00002266} & \verb|\leqq| \\
+\hline
+$\geqq$ & \texttt{00002267} & \verb|\geqq| \\
+\hline
+$\lneqq$ & \texttt{00002268} & \verb|\lneqq| \\
+\hline
+$\gneqq$ & \texttt{00002269} & \verb|\gneqq| \\
+\hline
+$\ll$ & \texttt{0000226A} & \verb|\ll| \\
+\hline
+$\gg$ & \texttt{0000226B} & \verb|\gg| \\
+\hline
+$\between$ & \texttt{0000226C} & \verb|\between| \\
+\hline
+$\nless$ & \texttt{0000226E} & \verb|\nless| \\
+\hline
+$\ngtr$ & \texttt{0000226F} & \verb|\ngtr| \\
+\hline
+$\nleq$ & \texttt{00002270} & \verb|\nleq| \\
+\hline
+$\ngeq$ & \texttt{00002271} & \verb|\ngeq| \\
+\hline
+$\lesssim$ & \texttt{00002272} & \verb|\lesssim| \\
+\hline
+$\gtrsim$ & \texttt{00002273} & \verb|\gtrsim| \\
+\hline
+$\lessgtr$ & \texttt{00002276} & \verb|\lessgtr| \\
+\hline
+$\gtrless$ & \texttt{00002277} & \verb|\gtrless| \\
+\hline
+$\prec$ & \texttt{0000227A} & \verb|\prec| \\
+\hline
+$\succ$ & \texttt{0000227B} & \verb|\succ| \\
+\hline
+$\preccurlyeq$ & \texttt{0000227C} & \verb|\preccurlyeq| \\
+\hline
+$\succcurlyeq$ & \texttt{0000227D} & \verb|\succcurlyeq| \\
+\hline
+$\precsim$ & \texttt{0000227E} & \verb|\precsim| \\
+\hline
+$\succsim$ & \texttt{0000227F} & \verb|\succsim| \\
+\hline
+$\nprec$ & \texttt{00002280} & \verb|\nprec| \\
+\hline
+$\nsucc$ & \texttt{00002281} & \verb|\nsucc| \\
+\hline
+$\subset$ & \texttt{00002282} & \verb|\subset| \\
+\hline
+$\supset$ & \texttt{00002283} & \verb|\supset| \\
+\hline
+$\subseteq$ & \texttt{00002286} & \verb|\subseteq| \\
+\hline
+$\supseteq$ & \texttt{00002287} & \verb|\supseteq| \\
+\hline
+$\nsubseteq$ & \texttt{00002288} & \verb|\nsubseteq| \\
+\hline
+$\nsupseteq$ & \texttt{00002289} & \verb|\nsupseteq| \\
+\hline
+$\subsetneq$ & \texttt{0000228A} & \verb|\subsetneq| \\
+\hline
+$\supsetneq$ & \texttt{0000228B} & \verb|\supsetneq| \\
+\hline
+$\uplus$ & \texttt{0000228E} & \verb|\uplus| \\
+\hline
+$\sqsubset$ & \texttt{0000228F} & \verb|\sqsubset| \\
+\hline
+$\sqsupset$ & \texttt{00002290} & \verb|\sqsupset| \\
+\hline
+$\sqsubseteq$ & \texttt{00002291} & \verb|\sqsubseteq| \\
+\hline
+$\sqsupseteq$ & \texttt{00002292} & \verb|\sqsupseteq| \\
+\hline
+$\sqcap$ & \texttt{00002293} & \verb|\sqcap| \\
+\hline
+$\sqcup$ & \texttt{00002294} & \verb|\sqcup| \\
+\hline
+$\oplus$ & \texttt{00002295} & \verb|\oplus| \\
+\hline
+$\ominus$ & \texttt{00002296} & \verb|\ominus| \\
+\hline
+$\otimes$ & \texttt{00002297} & \verb|\otimes| \\
+\hline
+$\oslash$ & \texttt{00002298} & \verb|\oslash| \\
+\hline
+$\odot$ & \texttt{00002299} & \verb|\odot| \\
+\hline
+$\circledcirc$ & \texttt{0000229A} & \verb|\circledcirc| \\
+\hline
+$\circledast$ & \texttt{0000229B} & \verb|\circledast| \\
+\hline
+$\circleddash$ & \texttt{0000229D} & \verb|\circleddash| \\
+\hline
+$\boxplus$ & \texttt{0000229E} & \verb|\boxplus| \\
+\hline
+$\boxminus$ & \texttt{0000229F} & \verb|\boxminus| \\
+\hline
+$\boxtimes$ & \texttt{000022A0} & \verb|\boxtimes| \\
+\hline
+$\boxdot$ & \texttt{000022A1} & \verb|\boxdot| \\
+\hline
+$\vdash$ & \texttt{000022A2} & \verb|\vdash| \\
+\hline
+$\dashv$ & \texttt{000022A3} & \verb|\dashv| \\
+\hline
+$\top$ & \texttt{000022A4} & \verb|\top| \\
+\hline
+$\bot$ & \texttt{000022A5} & \verb|\bot| \\
+\hline
+$\models$ & \texttt{000022A7} & \verb|\models| \\
+\hline
+$\vDash$ & \texttt{000022A8} & \verb|\vDash| \\
+\hline
+$\Vdash$ & \texttt{000022A9} & \verb|\Vdash| \\
+\hline
+$\Vvdash$ & \texttt{000022AA} & \verb|\Vvdash| \\
+\hline
+$\nvdash$ & \texttt{000022AC} & \verb|\nvdash| \\
+\hline
+$\nvDash$ & \texttt{000022AD} & \verb|\nvDash| \\
+\hline
+$\nVdash$ & \texttt{000022AE} & \verb|\nVdash| \\
+\hline
+$\nVDash$ & \texttt{000022AF} & \verb|\nVDash| \\
+\hline
+$\lhd$ & \texttt{000022B2} & \verb|\lhd| \\
+\hline
+$\rhd$ & \texttt{000022B3} & \verb|\rhd| \\
+\hline
+$\unlhd$ & \texttt{000022B4} & \verb|\unlhd| \\
+\hline
+$\unrhd$ & \texttt{000022B5} & \verb|\unrhd| \\
+\hline
+$\multimap$ & \texttt{000022B8} & \verb|\multimap| \\
+\hline
+$\intercal$ & \texttt{000022BA} & \verb|\intercal| \\
+\hline
+$\veebar$ & \texttt{000022BB} & \verb|\veebar| \\
+\hline
+$\bigwedge$ & \texttt{000022C0} & \verb|\bigwedge| \\
+\hline
+$\bigvee$ & \texttt{000022C1} & \verb|\bigvee| \\
+\hline
+$\bigcap$ & \texttt{000022C2} & \verb|\bigcap| \\
+\hline
+$\bigcup$ & \texttt{000022C3} & \verb|\bigcup| \\
+\hline
+$\diamond$ & \texttt{000022C4} & \verb|\diamond| \\
+\hline
+$\cdot$ & \texttt{000022C5} & \verb|\cdot| \\
+\hline
+$\star$ & \texttt{000022C6} & \verb|\star| \\
+\hline
+$\divideontimes$ & \texttt{000022C7} & \verb|\divideontimes| \\
+\hline
+$\bowtie$ & \texttt{000022C8} & \verb|\bowtie| \\
+\hline
+$\ltimes$ & \texttt{000022C9} & \verb|\ltimes| \\
+\hline
+$\rtimes$ & \texttt{000022CA} & \verb|\rtimes| \\
+\hline
+$\leftthreetimes$ & \texttt{000022CB} & \verb|\leftthreetimes| \\
+\hline
+$\rightthreetimes$ & \texttt{000022CC} & \verb|\rightthreetimes| \\
+\hline
+$\backsimeq$ & \texttt{000022CD} & \verb|\backsimeq| \\
+\hline
+$\curlyvee$ & \texttt{000022CE} & \verb|\curlyvee| \\
+\hline
+$\curlywedge$ & \texttt{000022CF} & \verb|\curlywedge| \\
+\hline
+$\Subset$ & \texttt{000022D0} & \verb|\Subset| \\
+\hline
+$\Supset$ & \texttt{000022D1} & \verb|\Supset| \\
+\hline
+$\Cap$ & \texttt{000022D2} & \verb|\Cap| \\
+\hline
+$\Cup$ & \texttt{000022D3} & \verb|\Cup| \\
+\hline
+$\pitchfork$ & \texttt{000022D4} & \verb|\pitchfork| \\
+\hline
+$\lessdot$ & \texttt{000022D6} & \verb|\lessdot| \\
+\hline
+$\gtrdot$ & \texttt{000022D7} & \verb|\gtrdot| \\
+\hline
+$\lll$ & \texttt{000022D8} & \verb|\lll| \\
+\hline
+$\ggg$ & \texttt{000022D9} & \verb|\ggg| \\
+\hline
+$\lesseqgtr$ & \texttt{000022DA} & \verb|\lesseqgtr| \\
+\hline
+$\gtreqless$ & \texttt{000022DB} & \verb|\gtreqless| \\
+\hline
+$\curlyeqprec$ & \texttt{000022DE} & \verb|\curlyeqprec| \\
+\hline
+$\curlyeqsucc$ & \texttt{000022DF} & \verb|\curlyeqsucc| \\
+\hline
+$\lnsim$ & \texttt{000022E6} & \verb|\lnsim| \\
+\hline
+$\gnsim$ & \texttt{000022E7} & \verb|\gnsim| \\
+\hline
+$\precnsim$ & \texttt{000022E8} & \verb|\precnsim| \\
+\hline
+$\succnsim$ & \texttt{000022E9} & \verb|\succnsim| \\
+\hline
+$\ntriangleleft$ & \texttt{000022EA} & \verb|\ntriangleleft| \\
+\hline
+$\ntriangleright$ & \texttt{000022EB} & \verb|\ntriangleright| \\
+\hline
+$\ntrianglelefteq$ & \texttt{000022EC} & \verb|\ntrianglelefteq| \\
+\hline
+$\ntrianglerighteq$ & \texttt{000022ED} & \verb|\ntrianglerighteq| \\
+\hline
+$\vdots$ & \texttt{000022EE} & \verb|\vdots| \\
+\hline
+$\cdots$ & \texttt{000022EF} & \verb|\cdots| \\
+\hline
+$\ddots$ & \texttt{000022F1} & \verb|\ddots| \\
+\hline
+$\barwedge$ & \texttt{00002305} & \verb|\barwedge| \\
+\hline
+$\doublebarwedge$ & \texttt{00002306} & \verb|\doublebarwedge| \\
+\hline
+$\lceil$ & \texttt{00002308} & \verb|\lceil| \\
+\hline
+$\rceil$ & \texttt{00002309} & \verb|\rceil| \\
+\hline
+$\lfloor$ & \texttt{0000230A} & \verb|\lfloor| \\
+\hline
+$\rfloor$ & \texttt{0000230B} & \verb|\rfloor| \\
+\hline
+$\ulcorner$ & \texttt{0000231C} & \verb|\ulcorner| \\
+\hline
+$\urcorner$ & \texttt{0000231D} & \verb|\urcorner| \\
+\hline
+$\llcorner$ & \texttt{0000231E} & \verb|\llcorner| \\
+\hline
+$\lrcorner$ & \texttt{0000231F} & \verb|\lrcorner| \\
+\hline
+$\frown$ & \texttt{00002322} & \verb|\frown| \\
+\hline
+$\smile$ & \texttt{00002323} & \verb|\smile| \\
+\hline
+$\langle$ & \texttt{00002329} & \verb|\langle| \\
+\hline
+$\rangle$ & \texttt{0000232A} & \verb|\rangle| \\
+\hline
+$\square$ & \texttt{000025A1} & \verb|\square| \\
+\hline
+$\triangle$ & \texttt{000025B3} & \verb|\triangle| \\
+\hline
+$\blacktriangle$ & \texttt{000025B4} & \verb|\blacktriangle| \\
+\hline
+$\vartriangle$ & \texttt{000025B5} & \verb|\vartriangle| \\
+\hline
+$\blacktriangleright$ & \texttt{000025B6} & \verb|\blacktriangleright| \\
+\hline
+$\triangleright$ & \texttt{000025B9} & \verb|\triangleright| \\
+\hline
+$\bigtriangledown$ & \texttt{000025BD} & \verb|\bigtriangledown| \\
+\hline
+$\blacktriangledown$ & \texttt{000025BE} & \verb|\blacktriangledown| \\
+\hline
+$\triangledown$ & \texttt{000025BF} & \verb|\triangledown| \\
+\hline
+$\blacktriangleleft$ & \texttt{000025C0} & \verb|\blacktriangleleft| \\
+\hline
+$\triangleleft$ & \texttt{000025C3} & \verb|\triangleleft| \\
+\hline
+$\lozenge$ & \texttt{000025CA} & \verb|\lozenge| \\
+\hline
+$\bigcirc$ & \texttt{000025EF} & \verb|\bigcirc| \\
+\hline
+$\blacksquare$ & \texttt{000025FC} & \verb|\blacksquare| \\
+\hline
+$\bigstar$ & \texttt{00002605} & \verb|\bigstar| \\
+\hline
+$\spadesuit$ & \texttt{00002660} & \verb|\spadesuit| \\
+\hline
+$\clubsuit$ & \texttt{00002663} & \verb|\clubsuit| \\
+\hline
+$\heartsuit$ & \texttt{00002665} & \verb|\heartsuit| \\
+\hline
+$\diamondsuit$ & \texttt{00002666} & \verb|\diamondsuit| \\
+\hline
+$\flat$ & \texttt{0000266D} & \verb|\flat| \\
+\hline
+$\natural$ & \texttt{0000266E} & \verb|\natural| \\
+\hline
+$\sharp$ & \texttt{0000266F} & \verb|\sharp| \\
+\hline
+$\checkmark$ & \texttt{00002713} & \verb|\checkmark| \\
+\hline
+$\dashleftarrow$ & \texttt{0000290E} & \verb|\dashleftarrow| \\
+\hline
+$\dashrightarrow$ & \texttt{0000290F} & \verb|\dashrightarrow| \\
+\hline
+$\blacklozenge$ & \texttt{000029EB} & \verb|\blacklozenge| \\
+\hline
+$\bigodot$ & \texttt{00002A00} & \verb|\bigodot| \\
+\hline
+$\bigoplus$ & \texttt{00002A01} & \verb|\bigoplus| \\
+\hline
+$\bigotimes$ & \texttt{00002A02} & \verb|\bigotimes| \\
+\hline
+$\biguplus$ & \texttt{00002A04} & \verb|\biguplus| \\
+\hline
+$\bigsqcup$ & \texttt{00002A06} & \verb|\bigsqcup| \\
+\hline
+$\iiiint$ & \texttt{00002A0C} & \verb|\iiiint| \\
+\hline
+$\amalg$ & \texttt{00002A3F} & \verb|\amalg| \\
+\hline
+$\leqslant$ & \texttt{00002A7D} & \verb|\leqslant| \\
+\hline
+$\geqslant$ & \texttt{00002A7E} & \verb|\geqslant| \\
+\hline
+$\lessapprox$ & \texttt{00002A85} & \verb|\lessapprox| \\
+\hline
+$\gtrapprox$ & \texttt{00002A86} & \verb|\gtrapprox| \\
+\hline
+$\lnapprox$ & \texttt{00002A89} & \verb|\lnapprox| \\
+\hline
+$\gnapprox$ & \texttt{00002A8A} & \verb|\gnapprox| \\
+\hline
+$\lesseqqgtr$ & \texttt{00002A8B} & \verb|\lesseqqgtr| \\
+\hline
+$\gtreqqless$ & \texttt{00002A8C} & \verb|\gtreqqless| \\
+\hline
+$\eqslantless$ & \texttt{00002A95} & \verb|\eqslantless| \\
+\hline
+$\eqslantgtr$ & \texttt{00002A96} & \verb|\eqslantgtr| \\
+\hline
+$\preceq$ & \texttt{00002AAF} & \verb|\preceq| \\
+\hline
+$\succeq$ & \texttt{00002AB0} & \verb|\succeq| \\
+\hline
+$\precneqq$ & \texttt{00002AB5} & \verb|\precneqq| \\
+\hline
+$\succneqq$ & \texttt{00002AB6} & \verb|\succneqq| \\
+\hline
+$\precapprox$ & \texttt{00002AB7} & \verb|\precapprox| \\
+\hline
+$\succapprox$ & \texttt{00002AB8} & \verb|\succapprox| \\
+\hline
+$\precnapprox$ & \texttt{00002AB9} & \verb|\precnapprox| \\
+\hline
+$\succnapprox$ & \texttt{00002ABA} & \verb|\succnapprox| \\
+\hline
+$\subseteqq$ & \texttt{00002AC5} & \verb|\subseteqq| \\
+\hline
+$\supseteqq$ & \texttt{00002AC6} & \verb|\supseteqq| \\
+\hline
+$\subsetneqq$ & \texttt{00002ACB} & \verb|\subsetneqq| \\
+\hline
+$\supsetneqq$ & \texttt{00002ACC} & \verb|\supsetneqq| \\
+\hline
+\end{longtable}
diff --git a/blahtexml/README b/blahtexml/README
new file mode 100644
index 0000000..ee20840
--- /dev/null
+++ b/blahtexml/README
@@ -0,0 +1,33 @@
+Blahtex(ml) version 0.5
+======================================
+
+Blahtex is licensed under the GNU General Public License.
+See the file "GNU-GPL" and the source code for more details.
+
+Brief build instructions (more details in the manual): run one of
+
+ make linux
+ make mac
+
+---
+
+Blahtexml is a simple extension of blahtex to allow it to process an entire
+XML file, and to convert each blahtex equation in it into MathML equations.
+Blahtexml is a source-level extension of blahtex, which means that anything
+that works with Blahtex also works with blahtexml.
+
+Blahtexml requires Xerces-C 2.x to be installed. If you do not have Xerces-C
+installed, you will not be able to build blahtexml (but you will still be able
+to builld and use blahtex of course).
+
+Blahtexml is also licensed under the GNU General Public License.
+See the file "GNU-GPL" and the source code for more details.
+
+For more information, see
+
+ http://gva.noekeon.org/blahtexml/
+
+Brief build instructions (more details in the manual): run one of
+
+ make blahtexml-linux
+ make blahtexml-mac
diff --git a/blahtexml/example1.xml b/blahtexml/example1.xml
new file mode 100644
index 0000000..0a50767
--- /dev/null
+++ b/blahtexml/example1.xml
@@ -0,0 +1,5 @@
+<?xml version="1.0"?>
+<equations xmlns:b="http://gva.noekeon.org/blahtexml">
+ <equation b:inline="x+y"/>
+ <equation b:block="\exp(-\gamma x)"/>
+</equations>
diff --git a/blahtexml/example2.xml b/blahtexml/example2.xml
new file mode 100644
index 0000000..5fb3fcb
--- /dev/null
+++ b/blahtexml/example2.xml
@@ -0,0 +1,5 @@
+<root xmlns:b="http://gva.noekeon.org/blahtexml">
+ <math xmlns="http://www.w3.org/1998/Math/MathML">
+ <msqrt b:m="x+y"/>
+ </math>
+</root>
diff --git a/blahtexml/example3.xml b/blahtexml/example3.xml
new file mode 100644
index 0000000..8c3a66b
--- /dev/null
+++ b/blahtexml/example3.xml
@@ -0,0 +1,5 @@
+<?xml version='1.0' encoding="UTF-8"?>
+<root xmlns:b="http://gva.noekeon.org/blahtexml">
+ <eq b:inline="x"/>
+ <eq xmlns:m="http://www.w3.org/1998/Math/MathML" b:inline="x"/>
+</root>
diff --git a/blahtexml/example4.xml b/blahtexml/example4.xml
new file mode 100644
index 0000000..1a76df2
--- /dev/null
+++ b/blahtexml/example4.xml
@@ -0,0 +1,4 @@
+<?xml version='1.0' encoding="UTF-8"?>
+<root xmlns:b="http://gva.noekeon.org/blahtexml">
+ <eq b:inline="\qwerty"/>
+</root>
diff --git a/blahtexml/logo.png b/blahtexml/logo.png
new file mode 100644
index 0000000..9443084
--- /dev/null
+++ b/blahtexml/logo.png
Binary files differ
diff --git a/blahtexml/makefile b/blahtexml/makefile
new file mode 100644
index 0000000..fb3c549
--- /dev/null
+++ b/blahtexml/makefile
@@ -0,0 +1,145 @@
+#
+# makefile
+#
+# blahtex (version 0.4.4)
+# a TeX to MathML converter designed with MediaWiki in mind
+# Copyright (C) 2006, David Harvey
+#
+# blahtexml (version 0.5)
+# Copyright (C) 2007-2008, Gilles Van Assche
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+#
+
+
+SOURCES = \
+ source/main.cpp \
+ source/mainPng.cpp \
+ source/md5.c \
+ source/md5Wrapper.cpp \
+ source/Messages.cpp \
+ source/UnicodeConverter.cpp \
+ source/BlahtexCore/InputSymbolTranslation.cpp \
+ source/BlahtexCore/Interface.cpp \
+ source/BlahtexCore/LayoutTree.cpp \
+ source/BlahtexCore/MacroProcessor.cpp \
+ source/BlahtexCore/Manager.cpp \
+ source/BlahtexCore/Parser.cpp \
+ source/BlahtexCore/ParseTree1.cpp \
+ source/BlahtexCore/ParseTree2.cpp \
+ source/BlahtexCore/ParseTree3.cpp \
+ source/BlahtexCore/MathmlNode.cpp \
+ source/BlahtexCore/XmlEncode.cpp
+
+SOURCES_XMLIN = $(SOURCES) \
+ source/BlahtexXMLin/AttributesImpl.cpp \
+ source/BlahtexXMLin/BlahtexFilter.cpp \
+ source/BlahtexXMLin/SAX2Output.cpp \
+ source/BlahtexXMLin/XercesString.cpp
+
+HEADERS = \
+ source/mainPng.h \
+ source/md5.h \
+ source/md5Wrapper.h \
+ source/UnicodeConverter.h \
+ source/BlahtexCore/InputSymbolTranslation.h \
+ source/BlahtexCore/Interface.h \
+ source/BlahtexCore/LayoutTree.h \
+ source/BlahtexCore/MacroProcessor.h \
+ source/BlahtexCore/Manager.h \
+ source/BlahtexCore/Misc.h \
+ source/BlahtexCore/Parser.h \
+ source/BlahtexCore/ParseTree.h \
+ source/BlahtexCore/MathmlNode.h \
+ source/BlahtexCore/XmlEncode.h
+
+HEADERS_XMLIN = $(HEADERS) \
+ source/BlahtexXMLin/AttributesImpl.h \
+ source/BlahtexXMLin/BlahtexFilter.h \
+ source/BlahtexXMLin/SAX2Output.h \
+ source/BlahtexXMLin/XercesString.h
+
+BINDIR = bin-blahtex
+
+$(BINDIR):
+ mkdir -p $(BINDIR)
+
+BINDIR_XMLIN = bin-blahtexml
+
+$(BINDIR_XMLIN):
+ mkdir -p $(BINDIR_XMLIN)
+
+OBJECTS = $(addprefix $(BINDIR)/, $(notdir $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(SOURCES)))))
+
+OBJECTS_XMLIN = $(addprefix $(BINDIR_XMLIN)/, $(notdir $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(SOURCES_XMLIN)))))
+
+source/BlahtexCore/InputSymbolTranslation.inc: source/BlahtexCore/InputSymbolTranslation.xml
+ xsltproc -o $@ source/BlahtexCore/ISTtoCpp.xslt $<
+
+source/BlahtexCore/InputSymbolTranslation.cpp: source/BlahtexCore/InputSymbolTranslation.inc
+
+$(BINDIR)/InputSymbolTranslation.o: InputSymbolTranslation.cpp InputSymbolTranslation.inc
+
+$(BINDIR_XMLIN)/InputSymbolTranslation.o: InputSymbolTranslation.cpp InputSymbolTranslation.inc
+
+#default targets are still blahtex (not blahtexml)
+linux: blahtex-linux
+mac: blahtex-mac
+
+CFLAGS = -O2
+
+VPATH = source:source/BlahtexCore:source/BlahtexXMLin
+
+INCLUDES=-I. -Isource -Isource/BlahtexCore -Isource/BlahtexXMLin
+
+$(BINDIR)/%.o:%.cpp
+ $(CXX) $(INCLUDES) $(CFLAGS) -c $< -o $@
+
+$(BINDIR)/%.o:%.c
+ $(CC) $(INCLUDES) $(CFLAGS) -c $< -o $@
+
+$(BINDIR_XMLIN)/%.o:%.cpp
+ $(CXX) $(INCLUDES) $(CFLAGS) -DBLAHTEXML_USING_XERCES -c $< -o $@
+
+$(BINDIR_XMLIN)/%.o:%.c
+ $(CC) $(INCLUDES) $(CFLAGS) -DBLAHTEXML_USING_XERCES -c $< -o $@
+
+blahtex-linux: $(BINDIR) $(OBJECTS) $(HEADERS)
+ $(CXX) $(CFLAGS) -o blahtex $(OBJECTS)
+
+blahtex-mac: $(BINDIR) $(OBJECTS) $(HEADERS)
+ $(CXX) $(CFLAGS) -o blahtex -liconv $(OBJECTS)
+
+blahtexml-linux: $(BINDIR_XMLIN) $(OBJECTS_XMLIN) $(HEADERS_XMLIN)
+ $(CXX) $(CFLAGS) -o blahtexml $(OBJECTS_XMLIN) -lxerces-c
+
+blahtexml-mac: $(BINDIR_XMLIN) $(OBJECTS_XMLIN) $(HEADERS_XMLIN)
+ $(CXX) $(CFLAGS) -o blahtexml -liconv $(OBJECTS_XMLIN) -lxerces-c
+
+clean:
+ rm -f blahtex $(OBJECTS) blahtexml $(OBJECTS_XMLIN)
+
+# Documentation
+
+doc: manual.pdf
+
+manual.pdf: manual.tex InputSymbolTranslation.tex
+ pdflatex manual
+ pdflatex manual
+
+InputSymbolTranslation.tex: source/BlahtexCore/InputSymbolTranslation.xml
+ xsltproc -o $@ ISTtoTeX.xslt $<
+
+########## end of file ##########
diff --git a/blahtexml/manual.tex b/blahtexml/manual.tex
new file mode 100644
index 0000000..101b503
--- /dev/null
+++ b/blahtexml/manual.tex
@@ -0,0 +1,1857 @@
+% blahtex manual
+
+% Copyright (c) 2006, David Harvey
+
+% Copyright (C) 2007-2008, Gilles Van Assche
+
+% Permission is granted to copy, distribute and/or modify this document
+% under the terms of the GNU Free Documentation License, Version 1.2
+% or any later version published by the Free Software Foundation;
+% with no Invariant Sections, no Front-Cover Texts, and no Back-Cover
+% Texts. A copy of the license is included in the file GNU-FDL.
+
+\documentclass{article}
+\usepackage{html} % latex2html package
+\usepackage{ucs} % for \unichar
+\usepackage{graphicx}
+\usepackage{longtable}
+\usepackage{amsmath, amssymb}
+
+\newcommand{\blahtexversion}{0.5}
+\newcommand{\texcommand}[1]{\textbackslash{}#1}
+\newcommand{\mylink}[1]{\htmladdnormallink{\texttt{#1}}{#1}}
+
+% Macros used for building tables of commands:
+\newcommand{\spacer}{\,\,\, \hfil}
+\newcommand{\lastspacer}{\hfill\hfill\hfill}
+
+
+
+\newenvironment{mylist}{\begin{quote}}{\end{quote}}
+
+
+\begin{document}
+
+\thispagestyle{empty}
+
+\begin{center}
+\includegraphics[width=10cm]{logo.png}
+
+\vskip 1.6cm
+
+{\Large blahtex and blahtexml version \blahtexversion{} manual}
+
+\vskip 0.8cm
+
+{\Large David Harvey} and {\Large Gilles Van Assche}
+\end{center}
+
+\vskip 1.6cm
+
+{\footnotesize
+Copyright (c) 2006, David Harvey. Permission is granted to copy, distribute
+and/or modify this document under the terms of the
+\htmladdnormallink{GNU Free Documentation License}{http://www.gnu.org/copyleft/fdl.html},
+Version 1.2 or any later version published by the Free Software Foundation;
+with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts.
+A copy of the license, and the \LaTeX{} source for this manual, is included
+in the blahtex source distribution.
+}
+
+{\footnotesize
+Copyright (c) 2007-2008, Gilles Van Assche. Permission is granted to copy, distribute and/or modify this document under the terms of the
+\htmladdnormallink{GNU Free Documentation License}{http://www.gnu.org/copyleft/fdl.html},
+Version 1.2 or any later version published by the Free Software Foundation;
+with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts.
+A copy of the license, and the \LaTeX{} source for this manual, is included
+in the blahtex source distribution.
+}
+
+\section{Introduction}
+
+\begin{latexonly}
+This is the manual for blahtex version \blahtexversion. The most up-to-date information about blahtex, including
+an online HTML version of this document, is available at \texttt{www.blahtex.org}.
+
+This manual also contains information regarding the blahtex extension \emph{blahtexml}, which converts
+all equations from an XML file given at input.
+The most up-to-date information about blahtexml is available at \texttt{gva.noekeon.org/blahtexml}.
+\end{latexonly}
+
+\begin{htmlonly}
+This is the manual for blahtex version \blahtexversion. The most up-to-date information about blahtex, including
+a PDF version of this document, is available at \htmladdnormallink{www.blahtex.org}{http://www.blahtex.org/}.
+
+This manual also contains information regarding the blahtex extension \emph{blahtexml}, which converts
+all equations from an XML file given at input.
+The most up-to-date information about blahtexml is available at
+\htmladdnormallink{gva.noekeon.org/blahtexml}{http://gva.noekeon.org/blahtexml}.
+\end{htmlonly}
+
+\subsection{How this document is organised}
+
+\begin{itemize}
+\item {\bf What blahtex can handle} (Section \ref{sec:handle}) explains what kind of \TeX{} input blahtex can cope with, and how it differs from texvc.
+\item {\bf The blahtex command-line application} (Section \ref{sec:command-line}) describes how to compile, install, and run the blahtex command-line application, and how to interpret its output. This will be of interest to developers who would like a simple way to incorporate blahtex into their project.
+\item {\bf The blahtexml command-line application} (Section \ref{sec:blahtexml}) describes how to compile, install, and run the blahtexml command-line application.
+\item {\bf The blahtex API} (Section \ref{sec:API}) describes how to link blahtex directly into your code, which might give better performance in some environments.
+\item {\bf History/changelog} (Section \ref{sec:history}) summarises previous versions and changes.
+\end{itemize}
+
+\subsection{What is blahtex?}
+
+Blahtex is a free software tool/library that translates \TeX{} markup into MathML markup. It is also capable of generating PNG format images, using some external tools (\LaTeX{} and \texttt{dvipng}).
+
+Blahtex is \emph{not} designed to process entire \TeX{} documents. Rather, it focuses on the mathematical capabilities of the \TeX{} language, processing only a single equation at a time. It is designed to provide mathematical support to a larger document markup system. Currently, the main target platform is \htmladdnormallink{MediaWiki}{http://www.mediawiki.org/wiki/MediaWiki} --- the software that powers \htmladdnormallink{Wikipedia}{http://www.wikipedia.org/} and many other wikis --- but blahtex has been designed with flexibility of integration in mind.
+
+Blahtex concentrates on matching the \emph{appearance} of \TeX{} output, as far as this is possible given the fonts available to the MathML renderer. It only outputs Presentation MathML, not Content MathML. Blahtex is aware of at least some of \TeX{}'s rules concerning spacing and fonts. For example, it knows about `atom flavours' (like ord, rel, op, etc) and \TeX{}'s algorithms for determining the amount of space between them.
+
+Blahtex implements some subset of \TeX{}, \LaTeX{} and AMS-\LaTeX{}, including almost all of the symbols. A complete list of supported and quasi-supported commands can be found in Section \ref{sec:handle}.
+
+Blahtex is internally Unicode-based. Non-ASCII characters may be used in text mode (e.g.~within \texttt{\texcommand{text}\{...\}} blocks). These will be handled correctly for MathML output. For PNG output, blahtex can currently handle some extended Latin characters (see Section \ref{sec:non-ascii-characters}), and there is experimental support for Cyrillic and Japanese. More scripts may be added in the future.
+
+Blahtex is open source software. The source code is released under the \htmladdnormallink{GNU GPL}{http://www.gnu.org/copyleft/gpl.html} (General Public License). This means that although the source is copyrighted, you may modify it, use it in your own programs, or even sell it, as long as you adhere to the GPL.
+
+Blahtex is written in C++. It compiles on Linux and Mac OS X systems, but probably is not as portable as it could be (see Section \ref{sec:prerequisites}).
+
+Blahtex obviously owes a lot to \htmladdnormallink{texvc}{http://en.wikipedia.org/wiki/texvc}, the software presently used by MediaWiki to handle \TeX{} input, written by Tomasz Wegrzanowski.
+
+Blahtex is a work in progress. I hereby solicit {\bf your feedback}, to help me improve it as much as possible.
+
+(It has not escaped the author's attention that every paragraph of this section either begins or ends with the word `blahtex'.)
+
+\subsection{The origin of the name `blahtex'}
+
+{In the beginning there was \TeX{}. Later, we also met \LaTeX{}, and ConTeXt, \small teTeX, \footnotesize MiKTeX, blah \scriptsize blah \tiny blah...}
+
+\subsection{Other converters}
+
+There are a variety of other \TeX{}-to-MathML converters available. The MathML home page (\mylink{http://www.w3.org/Math/}) has quite a long list. Here are a few that have online demos available:
+
+\begin{itemize}
+\item {\bf itex2mml}: \\
+\mylink{http://pear.math.pitt.edu/mathzilla/itex2mml.html}
+\item {\bf TexToMathML}: \\
+\mylink{http://www.orcca.on.ca/MathML/texmml/textomml.html}
+\item {\bf TtM}: \\
+\mylink{http://hutchinson.belmont.ma.us/tth/mml/}
+\end{itemize}
+
+They have their pros and cons, as does blahtex. I happen to think blahtex is rather good, but of course I am biased :-) Feel free to disagree. Please let me know if you think blahtex is no good, and \emph{why} it's no good, so that maybe I can fix it. (Also, let me know if you think it's great!)
+
+\subsection{Acknowledgements}
+
+Thanks to the crew at Wikipedia, for pioneering such a fabulous resource, especially the regulars at WikiProject Mathematics.
+
+Thanks to Jitse Niesen for his ongoing work on integrating blahtex into MediaWiki (currently on show at \htmladdnormallink{\texttt{wiki.blahtex.org}}{http://wiki.blahtex.org/}), and for generally being very supportive of this project.
+
+\section{What blahtex can handle}\label{sec:handle}
+
+Blahtex supports some subset of \TeX{}, \LaTeX{} and AMS-\LaTeX{}. This section gives a complete list of supported commands, together with some comments where the support is known to be incomplete.
+
+\subsection{Macros}
+
+Blahtex supports \texttt{\texcommand{newcommand}}, including arguments (but not \emph{optional} arguments).
+
+Blahtex protects against a malicious user eliciting exponential time via recursive macros, by imposing a hard limit on the amount of macro processing that can occur.
+
+Note that \texttt{\texcommand{newcommand}} is \emph{not} local to blocks, as is the case in \TeX{}. For example, \texttt{\{\texcommand{newcommand}\{\texcommand{abc}\}\{xyz\}\} \texcommand{abc}} is legal in blahtex, but not in \TeX{}, because \TeX{} only remembers the definition of \texttt{\texcommand{abc}} within the outermost \texttt{\{...\}} block.
+
+Clearly \texttt{\texcommand{newcommand}} is not very useful for an individual equation. In a larger document markup system, a good approach might be to provide a facility for specifying a document-wide collection of macros, and the software would automatically append the relevant \texttt{\texcommand{newcommand}}s to the beginning of each equation in which a macro need to be available. It is not clear at this stage whether this model would be technically feasible in MediaWiki.
+
+\subsection{Environments}
+
+\texttt{\texcommand{begin}\{XYZ\} ... \texcommand{end}\{XYZ\}}, where \texttt{XYZ} is one of:
+
+\begin{mylist}
+\texttt{matrix} \spacer
+\texttt{pmatrix} \spacer
+\texttt{bmatrix} \spacer
+\texttt{Bmatrix} \spacer
+\texttt{vmatrix} \spacer
+\texttt{Vmatrix} \spacer
+\texttt{cases} \spacer
+\texttt{aligned} \spacer
+\texttt{smallmatrix} \lastspacer
+\end{mylist}
+
+\subsection{Miscellaneous}
+
+\begin{mylist}
+\texttt{\texcommand{sqrt}} (including with optional argument) \spacer
+\texttt{\texcommand{substack}} \spacer
+\texttt{\texcommand{overset}} \spacer
+\texttt{\texcommand{underset}} \spacer
+\texttt{\texcommand{not}} \lastspacer
+\end{mylist}
+
+When it encounters \texttt{\texcommand{not}}, blahtex will attempt to find a MathML character that directly corresponds to the negation of any operator appearing after \texttt{\texcommand{not}}. Failing that, it will try to draw an ordinary slash in the right place, using the MathML \texttt{<mpadded>} element to fudge things.
+
+\subsection{Colour}
+
+Blahtex supports \texttt{\texcommand{color}\{X\}}, where \texttt{X} is one of the following named colours:
+
+\begin{mylist}
+\texttt{GreenYellow} \spacer
+\texttt{Yellow} \spacer
+\texttt{yellow} \spacer
+\texttt{Goldenrod} \spacer
+\texttt{Dandelion} \spacer
+\texttt{Apricot} \spacer
+\texttt{Peach} \spacer
+\texttt{Melon} \spacer
+\texttt{YellowOrange} \spacer
+\texttt{Orange} \spacer
+\texttt{BurntOrange} \spacer
+\texttt{Bittersweet} \spacer
+\texttt{RedOrange} \spacer
+\texttt{Mahogany} \spacer
+\texttt{Maroon} \spacer
+\texttt{BrickRed} \spacer
+\texttt{Red} \spacer
+\texttt{red} \spacer
+\texttt{OrangeRed} \spacer
+\texttt{RubineRed} \spacer
+\texttt{WildStrawberry} \spacer
+\texttt{Salmon} \spacer
+\texttt{CarnationPink} \spacer
+\texttt{Magenta} \spacer
+\texttt{magenta} \spacer
+\texttt{VioletRed} \spacer
+\texttt{Rhodamine} \spacer
+\texttt{Mulberry} \spacer
+\texttt{RedViolet} \spacer
+\texttt{Fuchsia} \spacer
+\texttt{Lavender} \spacer
+\texttt{Thistle} \spacer
+\texttt{Orchid} \spacer
+\texttt{DarkOrchid} \spacer
+\texttt{Purple} \spacer
+\texttt{Plum} \spacer
+\texttt{Violet} \spacer
+\texttt{RoyalPurple} \spacer
+\texttt{BlueViolet} \spacer
+\texttt{Periwinkle} \spacer
+\texttt{CadetBlue} \spacer
+\texttt{CornflowerBlue} \spacer
+\texttt{MidnightBlue} \spacer
+\texttt{NavyBlue} \spacer
+\texttt{RoyalBlue} \spacer
+\texttt{Blue} \spacer
+\texttt{blue} \spacer
+\texttt{Cerulean} \spacer
+\texttt{Cyan} \spacer
+\texttt{cyan} \spacer
+\texttt{ProcessBlue} \spacer
+\texttt{SkyBlue} \spacer
+\texttt{Turquoise} \spacer
+\texttt{TealBlue} \spacer
+\texttt{Aquamarine} \spacer
+\texttt{BlueGreen} \spacer
+\texttt{Emerald} \spacer
+\texttt{JungleGreen} \spacer
+\texttt{SeaGreen} \spacer
+\texttt{Green} \spacer
+\texttt{green} \spacer
+\texttt{ForestGreen} \spacer
+\texttt{PineGreen} \spacer
+\texttt{LimeGreen} \spacer
+\texttt{YellowGreen} \spacer
+\texttt{SpringGreen} \spacer
+\texttt{OliveGreen} \spacer
+\texttt{RawSienna} \spacer
+\texttt{Sepia} \spacer
+\texttt{Brown} \spacer
+\texttt{Tan} \spacer
+\texttt{Gray} \spacer
+\texttt{Black} \spacer
+\texttt{black} \spacer
+\texttt{White} \spacer
+\texttt{white} \lastspacer
+\end{mylist}
+
+At this time there is no support for colour models, so you can't do things like \texttt{\texcommand{color}[rgb]\{0.2,0.3,0.4\}}.
+
+There are some subtle bugs in the parsing of \texttt{\texcommand{color}} commands. Things like \texttt{\texcommand{overset}\{a\}\{\texcommand{color}\{blue\}x\}} are not legal in \LaTeX, for reasons I haven't yet fully investigated; blahtex still accepts them.
+
+\subsection{Text commands}
+
+\begin{mylist}
+\texttt{\texcommand{text}} \spacer
+\texttt{\texcommand{textit}} \spacer
+\texttt{\texcommand{textbf}} \spacer
+\texttt{\texcommand{textrm}} \spacer
+\texttt{\texcommand{texttt}} \spacer
+\texttt{\texcommand{textsf}} \spacer
+\texttt{\texcommand{emph}} \spacer
+\texttt{\texcommand{hbox}} \spacer
+\texttt{\texcommand{mbox}} \lastspacer
+\end{mylist}
+
+The command \texttt{\texcommand{hbox}} doesn't really behave like it should, because MathML doesn't really have a notion of `horizontal box'. Blahtex treats \texttt{\texcommand{hbox}} essentially equivalently to \texttt{\texcommand{text}}, with slightly different formatting rules. Things like \texttt{\texcommand{hbox} to 12pt} are not supported.
+
+\subsection{Fractions, binomials}
+
+\begin{mylist}
+\texttt{\texcommand{frac}} \spacer
+\texttt{\texcommand{cfrac}} \spacer
+\texttt{\texcommand{over}} \spacer
+\texttt{\texcommand{binom}} \spacer
+\texttt{\texcommand{choose}} \spacer
+\texttt{\texcommand{atop}} \lastspacer
+\end{mylist}
+
+\subsection{Delimiters}
+
+\begin{mylist}
+\texttt{\texcommand{left}} \spacer
+\texttt{\texcommand{right}} \spacer
+\texttt{\texcommand{big}} \spacer
+\texttt{\texcommand{Big}} \spacer
+\texttt{\texcommand{bigg}} \spacer
+\texttt{\texcommand{Bigg}} \spacer
+\texttt{\texcommand{bigl}} \spacer
+\texttt{\texcommand{Bigl}} \spacer
+\texttt{\texcommand{biggl}} \spacer
+\texttt{\texcommand{Biggl}} \spacer
+\texttt{\texcommand{bigr}} \spacer
+\texttt{\texcommand{Bigr}} \spacer
+\texttt{\texcommand{biggr}} \spacer
+\texttt{\texcommand{Biggr}} \lastspacer
+\end{mylist}
+
+\subsection{Atom flavours}
+
+\begin{mylist}
+\texttt{\texcommand{mathop}} \spacer
+\texttt{\texcommand{mathrel}} \spacer
+\texttt{\texcommand{mathord}} \spacer
+\texttt{\texcommand{mathbin}} \spacer
+\texttt{\texcommand{mathopen}} \spacer
+\texttt{\texcommand{mathclose}} \spacer
+\texttt{\texcommand{mathpunct}} \spacer
+\texttt{\texcommand{mathinner}} \lastspacer
+\end{mylist}
+
+\subsection{Limits}
+
+\begin{mylist}
+\texttt{\texcommand{limits}} \spacer
+\texttt{\texcommand{nolimits}} \spacer
+\texttt{\texcommand{displaylimits}} \lastspacer
+\end{mylist}
+
+\subsection{Spacing}
+
+\begin{mylist}
+\texttt{\texcommand{,}} \spacer
+\texttt{\texcommand{!}} \spacer
+\texttt{\texcommand{ }} \spacer
+\texttt{\texcommand{;}} \spacer
+\texttt{\texcommand{>}} \spacer
+\texttt{\texcommand{quad}} \spacer
+\texttt{\texcommand{qquad}} \lastspacer
+\end{mylist}
+
+\subsection{Accents}
+
+\begin{mylist}
+\texttt{\texcommand{hat}} \spacer
+\texttt{\texcommand{widehat}} \spacer
+\texttt{\texcommand{dot}} \spacer
+\texttt{\texcommand{ddot}} \spacer
+\texttt{\texcommand{bar}} \spacer
+\texttt{\texcommand{overline}} \spacer
+\texttt{\texcommand{underline}} \spacer
+\texttt{\texcommand{overbrace}} \spacer
+\texttt{\texcommand{underbrace}} \spacer
+\texttt{\texcommand{overleftarrow}} \spacer
+\texttt{\texcommand{overrightarrow}} \spacer
+\texttt{\texcommand{overleftrightarrow}} \spacer
+\texttt{\texcommand{check}} \spacer
+\texttt{\texcommand{acute}} \spacer
+\texttt{\texcommand{grave}} \spacer
+\texttt{\texcommand{vec}} \spacer
+\texttt{\texcommand{breve}} \spacer
+\texttt{\texcommand{tilde}} \spacer
+\texttt{\texcommand{widetilde}} \lastspacer
+\end{mylist}
+
+\subsection{Fonts}
+
+\begin{mylist}
+\texttt{\texcommand{mathbf}} \spacer
+\texttt{\texcommand{mathbb}} \spacer
+\texttt{\texcommand{mathrm}} \spacer
+\texttt{\texcommand{mathit}} \spacer
+\texttt{\texcommand{mathcal}} \spacer
+\texttt{\texcommand{mathfrak}} \spacer
+\texttt{\texcommand{mathsf}} \spacer
+\texttt{\texcommand{mathtt}} \spacer
+\texttt{\texcommand{boldsymbol}} \spacer
+\texttt{\texcommand{rm}} \spacer
+\texttt{\texcommand{bf}} \spacer
+\texttt{\texcommand{it}} \spacer
+\texttt{\texcommand{cal}} \spacer
+\texttt{\texcommand{tt}} \spacer
+\texttt{\texcommand{sf}} \spacer
+\texttt{\texcommand{Bbb}} \spacer
+\texttt{\texcommand{bold}} \lastspacer
+\end{mylist}
+
+\subsection{Style}
+
+\begin{mylist}
+\texttt{\texcommand{displaystyle}} \spacer
+\texttt{\texcommand{textstyle}} \spacer
+\texttt{\texcommand{scriptstyle}} \spacer
+\texttt{\texcommand{scriptscriptstyle}} \lastspacer
+\end{mylist}
+
+\subsection{Named operators}
+
+\begin{mylist}
+\texttt{\texcommand{operatorname}} \spacer
+\texttt{\texcommand{operatornamewithlimits}} \spacer
+\texttt{\texcommand{lim}} \spacer
+\texttt{\texcommand{sup}} \spacer
+\texttt{\texcommand{inf}} \spacer
+\texttt{\texcommand{limsup}} \spacer
+\texttt{\texcommand{liminf}} \spacer
+\texttt{\texcommand{injlim}} \spacer
+\texttt{\texcommand{projlim}} \spacer
+\texttt{\texcommand{varlimsup}} \spacer
+\texttt{\texcommand{varliminf}} \spacer
+\texttt{\texcommand{varinjlim}} \spacer
+\texttt{\texcommand{varprojlim}} \spacer
+\texttt{\texcommand{min}} \spacer
+\texttt{\texcommand{max}} \spacer
+\texttt{\texcommand{gcd}} \spacer
+\texttt{\texcommand{det}} \spacer
+\texttt{\texcommand{Pr}} \spacer
+\texttt{\texcommand{ker}} \spacer
+\texttt{\texcommand{hom}} \spacer
+\texttt{\texcommand{dim}} \spacer
+\texttt{\texcommand{arg}} \spacer
+\texttt{\texcommand{sin}} \spacer
+\texttt{\texcommand{cos}} \spacer
+\texttt{\texcommand{sec}} \spacer
+\texttt{\texcommand{csc}} \spacer
+\texttt{\texcommand{tan}} \spacer
+\texttt{\texcommand{cot}} \spacer
+\texttt{\texcommand{arcsin}} \spacer
+\texttt{\texcommand{arccos}} \spacer
+\texttt{\texcommand{arctan}} \spacer
+\texttt{\texcommand{sinh}} \spacer
+\texttt{\texcommand{cosh}} \spacer
+\texttt{\texcommand{tanh}} \spacer
+\texttt{\texcommand{coth}} \spacer
+\texttt{\texcommand{log}} \spacer
+\texttt{\texcommand{lg}} \spacer
+\texttt{\texcommand{ln}} \spacer
+\texttt{\texcommand{exp}} \spacer
+\texttt{\texcommand{deg}} \spacer
+\texttt{\texcommand{mod}} \spacer
+\texttt{\texcommand{bmod}} \spacer
+\texttt{\texcommand{pmod}} \lastspacer
+\end{mylist}
+
+\subsection{Escaped characters}
+
+\begin{mylist}
+\texttt{\texcommand{\_}} \spacer
+\texttt{\texcommand{\&}} \spacer
+\texttt{\texcommand{\$}} \spacer
+\texttt{\texcommand{\#}} \spacer
+\texttt{\texcommand{\%}} \spacer
+\texttt{\texcommand{\{}} \spacer
+\texttt{\texcommand{\}}} \lastspacer
+\end{mylist}
+
+\subsection{Greek letters}
+
+\begin{mylist}
+\texttt{\texcommand{alpha}} \spacer
+\texttt{\texcommand{beta}} \spacer
+\texttt{\texcommand{gamma}} \spacer
+\texttt{\texcommand{delta}} \spacer
+\texttt{\texcommand{epsilon}} \spacer
+\texttt{\texcommand{varepsilon}} \spacer
+\texttt{\texcommand{zeta}} \spacer
+\texttt{\texcommand{eta}} \spacer
+\texttt{\texcommand{vartheta}} \spacer
+\texttt{\texcommand{theta}} \spacer
+\texttt{\texcommand{iota}} \spacer
+\texttt{\texcommand{kappa}} \spacer
+\texttt{\texcommand{varkappa}} \spacer
+\texttt{\texcommand{lambda}} \spacer
+\texttt{\texcommand{mu}} \spacer
+\texttt{\texcommand{nu}} \spacer
+\texttt{\texcommand{pi}} \spacer
+\texttt{\texcommand{varpi}} \spacer
+\texttt{\texcommand{rho}} \spacer
+\texttt{\texcommand{varrho}} \spacer
+\texttt{\texcommand{sigma}} \spacer
+\texttt{\texcommand{varsigma}} \spacer
+\texttt{\texcommand{tau}} \spacer
+\texttt{\texcommand{upsilon}} \spacer
+\texttt{\texcommand{phi}} \spacer
+\texttt{\texcommand{varphi}} \spacer
+\texttt{\texcommand{chi}} \spacer
+\texttt{\texcommand{psi}} \spacer
+\texttt{\texcommand{omega}} \spacer
+\texttt{\texcommand{xi}} \spacer
+\texttt{\texcommand{digamma}} \spacer
+\texttt{\texcommand{Gamma}} \spacer
+\texttt{\texcommand{Delta}} \spacer
+\texttt{\texcommand{Theta}} \spacer
+\texttt{\texcommand{Lambda}} \spacer
+\texttt{\texcommand{Pi}} \spacer
+\texttt{\texcommand{Sigma}} \spacer
+\texttt{\texcommand{Upsilon}} \spacer
+\texttt{\texcommand{Phi}} \spacer
+\texttt{\texcommand{Psi}} \spacer
+\texttt{\texcommand{Omega}} \spacer
+\texttt{\texcommand{Xi}} \lastspacer
+\end{mylist}
+
+\subsection{Various mathematical symbols in no particular order}
+
+\begin{mylist}
+\texttt{\texcommand{ast}} \spacer
+\texttt{\texcommand{implies}} \spacer
+\texttt{\texcommand{neg}} \spacer
+\texttt{\texcommand{ne}} \spacer
+\texttt{\texcommand{ge}} \spacer
+\texttt{\texcommand{le}} \spacer
+\texttt{\texcommand{land}} \spacer
+\texttt{\texcommand{lor}} \spacer
+\texttt{\texcommand{gets}} \spacer
+\texttt{\texcommand{to}} \spacer
+\texttt{\texcommand{vert}} \spacer
+\texttt{\texcommand{lvert}} \spacer
+\texttt{\texcommand{rvert}} \spacer
+\texttt{\texcommand{Vert}} \spacer
+\texttt{\texcommand{lVert}} \spacer
+\texttt{\texcommand{rVert}} \spacer
+\texttt{\texcommand{lfloor}} \spacer
+\texttt{\texcommand{rfloor}} \spacer
+\texttt{\texcommand{lceil}} \spacer
+\texttt{\texcommand{rceil}} \spacer
+\texttt{\texcommand{lbrace}} \spacer
+\texttt{\texcommand{rbrace}} \spacer
+\texttt{\texcommand{langle}} \spacer
+\texttt{\texcommand{rangle}} \spacer
+\texttt{\texcommand{lbrack}} \spacer
+\texttt{\texcommand{rbrack}} \spacer
+\texttt{\texcommand{aleph}} \spacer
+\texttt{\texcommand{beth}} \spacer
+\texttt{\texcommand{gimel}} \spacer
+\texttt{\texcommand{daleth}} \spacer
+\texttt{\texcommand{wp}} \spacer
+\texttt{\texcommand{ell}} \spacer
+\texttt{\texcommand{P}} \spacer
+\texttt{\texcommand{imath}} \spacer
+\texttt{\texcommand{forall}} \spacer
+\texttt{\texcommand{exists}} \spacer
+\texttt{\texcommand{Finv}} \spacer
+\texttt{\texcommand{Game}} \spacer
+\texttt{\texcommand{partial}} \spacer
+\texttt{\texcommand{Re}} \spacer
+\texttt{\texcommand{Im}} \spacer
+\texttt{\texcommand{leftarrow}} \spacer
+\texttt{\texcommand{rightarrow}} \spacer
+\texttt{\texcommand{longleftarrow}} \spacer
+\texttt{\texcommand{longrightarrow}} \spacer
+\texttt{\texcommand{Leftarrow}} \spacer
+\texttt{\texcommand{Rightarrow}} \spacer
+\texttt{\texcommand{Longleftarrow}} \spacer
+\texttt{\texcommand{Longrightarrow}} \spacer
+\texttt{\texcommand{mapsto}} \spacer
+\texttt{\texcommand{longmapsto}} \spacer
+\texttt{\texcommand{leftrightarrow}} \spacer
+\texttt{\texcommand{Leftrightarrow}} \spacer
+\texttt{\texcommand{longleftrightarrow}} \spacer
+\texttt{\texcommand{Longleftrightarrow}} \spacer
+\texttt{\texcommand{uparrow}} \spacer
+\texttt{\texcommand{Uparrow}} \spacer
+\texttt{\texcommand{downarrow}} \spacer
+\texttt{\texcommand{Downarrow}} \spacer
+\texttt{\texcommand{updownarrow}} \spacer
+\texttt{\texcommand{Updownarrow}} \spacer
+\texttt{\texcommand{searrow}} \spacer
+\texttt{\texcommand{nearrow}} \spacer
+\texttt{\texcommand{swarrow}} \spacer
+\texttt{\texcommand{nwarrow}} \spacer
+\texttt{\texcommand{hookrightarrow}} \spacer
+\texttt{\texcommand{hookleftarrow}} \spacer
+\texttt{\texcommand{upharpoonright}} \spacer
+\texttt{\texcommand{upharpoonleft}} \spacer
+\texttt{\texcommand{downharpoonright}} \spacer
+\texttt{\texcommand{downharpoonleft}} \spacer
+\texttt{\texcommand{rightharpoonup}} \spacer
+\texttt{\texcommand{rightharpoondown}} \spacer
+\texttt{\texcommand{leftharpoonup}} \spacer
+\texttt{\texcommand{leftharpoondown}} \spacer
+\texttt{\texcommand{nleftarrow}} \spacer
+\texttt{\texcommand{nrightarrow}} \spacer
+\texttt{\texcommand{supset}} \spacer
+\texttt{\texcommand{subset}} \spacer
+\texttt{\texcommand{supseteq}} \spacer
+\texttt{\texcommand{subseteq}} \spacer
+\texttt{\texcommand{sqsupset}} \spacer
+\texttt{\texcommand{sqsubset}} \spacer
+\texttt{\texcommand{sqsupseteq}} \spacer
+\texttt{\texcommand{sqsubseteq}} \spacer
+\texttt{\texcommand{supsetneq}} \spacer
+\texttt{\texcommand{subsetneq}} \spacer
+\texttt{\texcommand{in}} \spacer
+\texttt{\texcommand{ni}} \spacer
+\texttt{\texcommand{notin}} \spacer
+\texttt{\texcommand{iff}} \spacer
+\texttt{\texcommand{mid}} \spacer
+\texttt{\texcommand{sim}} \spacer
+\texttt{\texcommand{simeq}} \spacer
+\texttt{\texcommand{approx}} \spacer
+\texttt{\texcommand{propto}} \spacer
+\texttt{\texcommand{equiv}} \spacer
+\texttt{\texcommand{cong}} \spacer
+\texttt{\texcommand{neq}} \spacer
+\texttt{\texcommand{ll}} \spacer
+\texttt{\texcommand{gg}} \spacer
+\texttt{\texcommand{geq}} \spacer
+\texttt{\texcommand{leq}} \spacer
+\texttt{\texcommand{triangleleft}} \spacer
+\texttt{\texcommand{triangleright}} \spacer
+\texttt{\texcommand{trianglelefteq}} \spacer
+\texttt{\texcommand{trianglerighteq}} \spacer
+\texttt{\texcommand{models}} \spacer
+\texttt{\texcommand{vdash}} \spacer
+\texttt{\texcommand{Vdash}} \spacer
+\texttt{\texcommand{vDash}} \spacer
+\texttt{\texcommand{lesssim}} \spacer
+\texttt{\texcommand{nless}} \spacer
+\texttt{\texcommand{ngeq}} \spacer
+\texttt{\texcommand{nleq}} \spacer
+\texttt{\texcommand{times}} \spacer
+\texttt{\texcommand{div}} \spacer
+\texttt{\texcommand{wedge}} \spacer
+\texttt{\texcommand{vee}} \spacer
+\texttt{\texcommand{oplus}} \spacer
+\texttt{\texcommand{otimes}} \spacer
+\texttt{\texcommand{cap}} \spacer
+\texttt{\texcommand{cup}} \spacer
+\texttt{\texcommand{sqcap}} \spacer
+\texttt{\texcommand{sqcup}} \spacer
+\texttt{\texcommand{smile}} \spacer
+\texttt{\texcommand{frown}} \spacer
+\texttt{\texcommand{smallsmile}} \spacer
+\texttt{\texcommand{smallfrown}} \spacer
+\texttt{\texcommand{setminus}} \spacer
+\texttt{\texcommand{smallsetminus}} \spacer
+\texttt{\texcommand{And}} \spacer
+\texttt{\texcommand{star}} \spacer
+\texttt{\texcommand{triangle}} \spacer
+\texttt{\texcommand{wr}} \spacer
+\texttt{\texcommand{infty}} \spacer
+\texttt{\texcommand{circ}} \spacer
+\texttt{\texcommand{hbar}} \spacer
+\texttt{\texcommand{lnot}} \spacer
+\texttt{\texcommand{nabla}} \spacer
+\texttt{\texcommand{prime}} \spacer
+\texttt{\texcommand{backslash}} \spacer
+\texttt{\texcommand{pm}} \spacer
+\texttt{\texcommand{mp}} \spacer
+\texttt{\texcommand{emptyset}} \spacer
+\texttt{\texcommand{varnothing}} \spacer
+\texttt{\texcommand{S}} \spacer
+\texttt{\texcommand{angle}} \spacer
+\texttt{\texcommand{colon}} \spacer
+\texttt{\texcommand{Diamond}} \spacer
+\texttt{\texcommand{nmid}} \spacer
+\texttt{\texcommand{square}} \spacer
+\texttt{\texcommand{Box}} \spacer
+\texttt{\texcommand{checkmark}} \spacer
+\texttt{\texcommand{complement}} \spacer
+\texttt{\texcommand{eth}} \spacer
+\texttt{\texcommand{hslash}} \spacer
+\texttt{\texcommand{mho}} \spacer
+\texttt{\texcommand{flat}} \spacer
+\texttt{\texcommand{sharp}} \spacer
+\texttt{\texcommand{natural}} \spacer
+\texttt{\texcommand{bullet}} \spacer
+\texttt{\texcommand{dagger}} \spacer
+\texttt{\texcommand{ddagger}} \spacer
+\texttt{\texcommand{clubsuit}} \spacer
+\texttt{\texcommand{spadesuit}} \spacer
+\texttt{\texcommand{heartsuit}} \spacer
+\texttt{\texcommand{diamondsuit}} \spacer
+\texttt{\texcommand{top}} \spacer
+\texttt{\texcommand{bot}} \spacer
+\texttt{\texcommand{perp}} \spacer
+\texttt{\texcommand{ldots}} \spacer
+\texttt{\texcommand{cdot}} \spacer
+\texttt{\texcommand{cdots}} \spacer
+\texttt{\texcommand{vdots}} \spacer
+\texttt{\texcommand{ddots}} \spacer
+\texttt{\texcommand{dots}} \spacer
+\texttt{\texcommand{dotsb}} \spacer
+\texttt{\texcommand{circledR}} \spacer
+\texttt{\texcommand{yen}} \spacer
+\texttt{\texcommand{maltese}} \spacer
+\texttt{\texcommand{circledS}} \spacer
+\texttt{\texcommand{Bbbk}} \spacer
+\texttt{\texcommand{jmath}} \spacer
+\texttt{\texcommand{ulcorner}} \spacer
+\texttt{\texcommand{urcorner}} \spacer
+\texttt{\texcommand{llcorner}} \spacer
+\texttt{\texcommand{lrcorner}} \spacer
+\texttt{\texcommand{dashrightarrow}} \spacer
+\texttt{\texcommand{dashleftarrow}} \spacer
+\texttt{\texcommand{backprime}} \spacer
+\texttt{\texcommand{vartriangle}} \spacer
+\texttt{\texcommand{blacktriangle}} \spacer
+\texttt{\texcommand{triangledown}} \spacer
+\texttt{\texcommand{blacktriangledown}} \spacer
+\texttt{\texcommand{blacksquare}} \spacer
+\texttt{\texcommand{lozenge}} \spacer
+\texttt{\texcommand{blacklozenge}} \spacer
+\texttt{\texcommand{bigstar}} \spacer
+\texttt{\texcommand{sphericalangle}} \spacer
+\texttt{\texcommand{measuredangle}} \spacer
+\texttt{\texcommand{dotplus}} \spacer
+\texttt{\texcommand{ltimes}} \spacer
+\texttt{\texcommand{rtimes}} \spacer
+\texttt{\texcommand{Cap}} \spacer
+\texttt{\texcommand{leftthreetimes}} \spacer
+\texttt{\texcommand{rightthreetimes}} \spacer
+\texttt{\texcommand{Cup}} \spacer
+\texttt{\texcommand{barwedge}} \spacer
+\texttt{\texcommand{curlywedge}} \spacer
+\texttt{\texcommand{veebar}} \spacer
+\texttt{\texcommand{curlyvee}} \spacer
+\texttt{\texcommand{doublebarwedge}} \spacer
+\texttt{\texcommand{boxminus}} \spacer
+\texttt{\texcommand{circleddash}} \spacer
+\texttt{\texcommand{boxtimes}} \spacer
+\texttt{\texcommand{circledast}} \spacer
+\texttt{\texcommand{boxdot}} \spacer
+\texttt{\texcommand{circledcirc}} \spacer
+\texttt{\texcommand{boxplus}} \spacer
+\texttt{\texcommand{centerdot}} \spacer
+\texttt{\texcommand{divideontimes}} \spacer
+\texttt{\texcommand{intercal}} \spacer
+\texttt{\texcommand{leqq}} \spacer
+\texttt{\texcommand{geqq}} \spacer
+\texttt{\texcommand{leqslant}} \spacer
+\texttt{\texcommand{geqslant}} \spacer
+\texttt{\texcommand{eqslantless}} \spacer
+\texttt{\texcommand{eqslantgtr}} \spacer
+\texttt{\texcommand{gtrsim}} \spacer
+\texttt{\texcommand{lessapprox}} \spacer
+\texttt{\texcommand{gtrapprox}} \spacer
+\texttt{\texcommand{approxeq}} \spacer
+\texttt{\texcommand{eqsim}} \spacer
+\texttt{\texcommand{lessdot}} \spacer
+\texttt{\texcommand{gtrdot}} \spacer
+\texttt{\texcommand{lll}} \spacer
+\texttt{\texcommand{ggg}} \spacer
+\texttt{\texcommand{lessgtr}} \spacer
+\texttt{\texcommand{gtrless}} \spacer
+\texttt{\texcommand{lesseqgtr}} \spacer
+\texttt{\texcommand{gtreqless}} \spacer
+\texttt{\texcommand{lesseqqgtr}} \spacer
+\texttt{\texcommand{gtreqqless}} \spacer
+\texttt{\texcommand{doteqdot}} \spacer
+\texttt{\texcommand{eqcirc}} \spacer
+\texttt{\texcommand{risingdotseq}} \spacer
+\texttt{\texcommand{circeq}} \spacer
+\texttt{\texcommand{fallingdotseq}} \spacer
+\texttt{\texcommand{triangleq}} \spacer
+\texttt{\texcommand{backsim}} \spacer
+\texttt{\texcommand{thicksim}} \spacer
+\texttt{\texcommand{backsimeq}} \spacer
+\texttt{\texcommand{thickapprox}} \spacer
+\texttt{\texcommand{subseteqq}} \spacer
+\texttt{\texcommand{supseteqq}} \spacer
+\texttt{\texcommand{Subset}} \spacer
+\texttt{\texcommand{Supset}} \spacer
+\texttt{\texcommand{preccurlyeq}} \spacer
+\texttt{\texcommand{succcurlyeq}} \spacer
+\texttt{\texcommand{curlyeqprec}} \spacer
+\texttt{\texcommand{curlyeqsucc}} \spacer
+\texttt{\texcommand{precsim}} \spacer
+\texttt{\texcommand{succsim}} \spacer
+\texttt{\texcommand{precapprox}} \spacer
+\texttt{\texcommand{succapprox}} \spacer
+\texttt{\texcommand{Vvdash}} \spacer
+\texttt{\texcommand{shortmid}} \spacer
+\texttt{\texcommand{shortparallel}} \spacer
+\texttt{\texcommand{bumpeq}} \spacer
+\texttt{\texcommand{between}} \spacer
+\texttt{\texcommand{Bumpeq}} \spacer
+\texttt{\texcommand{varpropto}} \spacer
+\texttt{\texcommand{backepsilon}} \spacer
+\texttt{\texcommand{blacktriangleleft}} \spacer
+\texttt{\texcommand{blacktriangleright}} \spacer
+\texttt{\texcommand{therefore}} \spacer
+\texttt{\texcommand{because}} \spacer
+\texttt{\texcommand{ngtr}} \spacer
+\texttt{\texcommand{nleqslant}} \spacer
+\texttt{\texcommand{ngeqslant}} \spacer
+\texttt{\texcommand{nleqq}} \spacer
+\texttt{\texcommand{ngeqq}} \spacer
+\texttt{\texcommand{lneqq}} \spacer
+\texttt{\texcommand{gneqq}} \spacer
+\texttt{\texcommand{lvertneqq}} \spacer
+\texttt{\texcommand{gvertneqq}} \spacer
+\texttt{\texcommand{lnsim}} \spacer
+\texttt{\texcommand{gnsim}} \spacer
+\texttt{\texcommand{lnapprox}} \spacer
+\texttt{\texcommand{gnapprox}} \spacer
+\texttt{\texcommand{nprec}} \spacer
+\texttt{\texcommand{nsucc}} \spacer
+\texttt{\texcommand{npreceq}} \spacer
+\texttt{\texcommand{nsucceq}} \spacer
+\texttt{\texcommand{precneqq}} \spacer
+\texttt{\texcommand{succneqq}} \spacer
+\texttt{\texcommand{precnsim}} \spacer
+\texttt{\texcommand{succnsim}} \spacer
+\texttt{\texcommand{precnapprox}} \spacer
+\texttt{\texcommand{succnapprox}} \spacer
+\texttt{\texcommand{nsim}} \spacer
+\texttt{\texcommand{ncong}} \spacer
+\texttt{\texcommand{nshortmid}} \spacer
+\texttt{\texcommand{nshortparallel}} \spacer
+\texttt{\texcommand{nmid}} \spacer
+\texttt{\texcommand{nparallel}} \spacer
+\texttt{\texcommand{nvdash}} \spacer
+\texttt{\texcommand{nvDash}} \spacer
+\texttt{\texcommand{nVdash}} \spacer
+\texttt{\texcommand{nVDash}} \spacer
+\texttt{\texcommand{ntriangleleft}} \spacer
+\texttt{\texcommand{ntriangleright}} \spacer
+\texttt{\texcommand{ntrianglelefteq}} \spacer
+\texttt{\texcommand{ntrianglerighteq}} \spacer
+\texttt{\texcommand{nsubseteq}} \spacer
+\texttt{\texcommand{nsupseteq}} \spacer
+\texttt{\texcommand{nsubseteqq}} \spacer
+\texttt{\texcommand{nsupseteqq}} \spacer
+\texttt{\texcommand{subsetneq}} \spacer
+\texttt{\texcommand{supsetneq}} \spacer
+\texttt{\texcommand{varsubsetneq}} \spacer
+\texttt{\texcommand{varsupsetneq}} \spacer
+\texttt{\texcommand{subsetneqq}} \spacer
+\texttt{\texcommand{supsetneqq}} \spacer
+\texttt{\texcommand{varsubsetneqq}} \spacer
+\texttt{\texcommand{varsupsetneqq}} \spacer
+\texttt{\texcommand{leftleftarrows}} \spacer
+\texttt{\texcommand{rightrightarrows}} \spacer
+\texttt{\texcommand{leftrightarrows}} \spacer
+\texttt{\texcommand{rightleftarrows}} \spacer
+\texttt{\texcommand{Lleftarrow}} \spacer
+\texttt{\texcommand{Rrightarrow}} \spacer
+\texttt{\texcommand{twoheadleftarrow}} \spacer
+\texttt{\texcommand{twoheadrightarrow}} \spacer
+\texttt{\texcommand{leftarrowtail}} \spacer
+\texttt{\texcommand{rightarrowtail}} \spacer
+\texttt{\texcommand{looparrowleft}} \spacer
+\texttt{\texcommand{looparrowright}} \spacer
+\texttt{\texcommand{leftrightharpoons}} \spacer
+\texttt{\texcommand{rightleftharpoons}} \spacer
+\texttt{\texcommand{curvearrowleft}} \spacer
+\texttt{\texcommand{curvearrowright}} \spacer
+\texttt{\texcommand{circlearrowleft}} \spacer
+\texttt{\texcommand{circlearrowright}} \spacer
+\texttt{\texcommand{Lsh}} \spacer
+\texttt{\texcommand{Rsh}} \spacer
+\texttt{\texcommand{upuparrows}} \spacer
+\texttt{\texcommand{downdownarrows}} \spacer
+\texttt{\texcommand{multimap}} \spacer
+\texttt{\texcommand{rightsquigarrow}} \spacer
+\texttt{\texcommand{leftrightsquigarrow}} \spacer
+\texttt{\texcommand{nLeftarrow}} \spacer
+\texttt{\texcommand{nRightarrow}} \spacer
+\texttt{\texcommand{nleftrightarrow}} \spacer
+\texttt{\texcommand{nLeftrightarrow}} \spacer
+\texttt{\texcommand{pitchfork}} \spacer
+\texttt{\texcommand{nexists}} \spacer
+\texttt{\texcommand{lhd}} \spacer
+\texttt{\texcommand{rhd}} \spacer
+\texttt{\texcommand{unlhd}} \spacer
+\texttt{\texcommand{unrhd}} \spacer
+\texttt{\texcommand{leadsto}} \spacer
+\texttt{\texcommand{uplus}} \spacer
+\texttt{\texcommand{diamond}} \spacer
+\texttt{\texcommand{bigtriangleup}} \spacer
+\texttt{\texcommand{bigtriangledown}} \spacer
+\texttt{\texcommand{ominus}} \spacer
+\texttt{\texcommand{oslash}} \spacer
+\texttt{\texcommand{odot}} \spacer
+\texttt{\texcommand{bigcirc}} \spacer
+\texttt{\texcommand{amalg}} \spacer
+\texttt{\texcommand{prec}} \spacer
+\texttt{\texcommand{succ}} \spacer
+\texttt{\texcommand{preceq}} \spacer
+\texttt{\texcommand{succeq}} \spacer
+\texttt{\texcommand{dashv}} \spacer
+\texttt{\texcommand{asymp}} \spacer
+\texttt{\texcommand{doteq}} \spacer
+\texttt{\texcommand{parallel}} \spacer
+\texttt{\texcommand{bowtie}} \spacer
+\texttt{\texcommand{surd}} \spacer
+\texttt{\texcommand{doublecap}} \spacer
+\texttt{\texcommand{restriction}} \spacer
+\texttt{\texcommand{llless}} \spacer
+\texttt{\texcommand{gggtr}} \spacer
+\texttt{\texcommand{Doteq}} \spacer
+\texttt{\texcommand{doublecup}} \spacer
+\texttt{\texcommand{dasharrow}} \spacer
+\texttt{\texcommand{vartriangleleft}} \spacer
+\texttt{\texcommand{vartriangleright}} \spacer
+\texttt{\texcommand{Join}} \lastspacer
+\end{mylist}
+
+\subsection{Large operators}
+
+\begin{mylist}
+\texttt{\texcommand{sum}} \spacer
+\texttt{\texcommand{prod}} \spacer
+\texttt{\texcommand{int}} \spacer
+\texttt{\texcommand{iint}} \spacer
+\texttt{\texcommand{iiint}} \spacer
+\texttt{\texcommand{iiiint}} \spacer
+\texttt{\texcommand{oint}} \spacer
+\texttt{\texcommand{bigcap}} \spacer
+\texttt{\texcommand{bigodot}} \spacer
+\texttt{\texcommand{bigcup}} \spacer
+\texttt{\texcommand{bigotimes}} \spacer
+\texttt{\texcommand{coprod}} \spacer
+\texttt{\texcommand{bigsqcup}} \spacer
+\texttt{\texcommand{bigoplus}} \spacer
+\texttt{\texcommand{bigvee}} \spacer
+\texttt{\texcommand{biguplus}} \spacer
+\texttt{\texcommand{bigwedge}} \lastspacer
+\end{mylist}
+
+\subsection{Symbols only available in text mode}
+
+\begin{mylist}
+\texttt{\texcommand{O}} \spacer
+\texttt{\texcommand{"}} \spacer
+\texttt{\texcommand{'}} \spacer
+\texttt{\texcommand{textbackslash}} \spacer
+\texttt{\texcommand{textvisiblespace}} \spacer
+\texttt{\texcommand{textasciicircum}} \spacer
+\texttt{\texcommand{textasciitilde}} \lastspacer
+\end{mylist}
+
+
+\subsection{Special commands}\label{sec:special-commands}
+
+If the magic command \texttt{\texcommand{strictspacing}} occurs anywhere in the input, blahtex will switch to `strict spacing mode' for the entire equation. This overrides the command-line \texttt{--spacing} setting.
+
+\subsection{Unicode symbol translation in math mode}\label{sec:input-symbol-translation}
+
+In math mode, blahtex accepts a number of non-ASCII symbols just like their command counterpart. These symbols are translated as \TeX{} commands, as detailed in the table below. For instance, the character $\alpha$ (Unicode 0x3B1) is equivalent to the ASCII sequence \verb|\alpha|. The benefit is input formulas that are more compact and more readable, provided that the file encoding and/or console character set allows for it. Note that this applies to both blahtex and blahtexml; see Section~\ref{sec:blahtexml-input-symbol-translation}.
+
+\input{InputSymbolTranslation.tex}
+
+\subsection{Non-ASCII characters in text mode}\label{sec:non-ascii-characters}
+
+Blahtex will serenely transcribe any non-ASCII characters for MathML output, as long as they appear in text mode (for example, surrounded by \texttt{\texcommand{text}\{...\}}). For PNG output, things are more difficult, because \LaTeX{} needs special packages and fonts available. At a minimum, the blahtex command line option \texttt{--use-ucs-package} must be used. The following sections describe which characters are permitted for PNG output.
+
+\subsubsection{Extended Latin}
+
+The following characters are handled directly by the \LaTeX{} \texttt{ucs} package.
+
+\newcommand{\nonasciicharlist}{
+\begin{quote}
+% hmmm latex2html was giving me funny warnings/errors
+% for the first few of these, so I added the leading
+% zero and that seemed to shut it up.
+\unichar{0161}
+\unichar{0163}
+\unichar{0167}
+\unichar{0169}
+\unichar{0172}
+\unichar{0174}
+\unichar{0176}
+\unichar{0181}
+\unichar{0182}
+\unichar{0191}
+\unichar{0192}
+\unichar{0193}
+\unichar{0194}
+\unichar{0195}
+\unichar{0196}
+\unichar{0197}
+\unichar{0198}
+\unichar{0199}
+\unichar{0200}
+\unichar{0201}
+\unichar{0202}
+\unichar{0203}
+\unichar{0204}
+\unichar{0205}
+\unichar{0206}
+\unichar{0207}
+\unichar{0209}
+\unichar{0210}
+\unichar{0211}
+\unichar{0212}
+\unichar{0213}
+\unichar{0214}
+\unichar{0215}
+\unichar{0216}
+\unichar{0217}
+\unichar{0218}
+\unichar{0219}
+\unichar{0220}
+\unichar{0221}
+\unichar{0223}
+\unichar{0224}
+\unichar{0225}
+\unichar{0226}
+\unichar{0227}
+\unichar{0228}
+\unichar{0229}
+\unichar{0230}
+\unichar{0231}
+\unichar{0232}
+\unichar{0233}
+\unichar{0234}
+\unichar{0235}
+\unichar{0236}
+\unichar{0237}
+\unichar{0238}
+\unichar{0241}
+\unichar{0242}
+\unichar{0243}
+\unichar{0244}
+\unichar{0245}
+\unichar{0246}
+\unichar{0247}
+\unichar{0248}
+\unichar{0249}
+\unichar{0250}
+\unichar{0251}
+\unichar{0252}
+\unichar{0253}
+\unichar{0255}
+\unichar{0256}
+\unichar{0257}
+\unichar{0258}
+\unichar{0259}
+\unichar{0262}
+\unichar{0263}
+\unichar{0264}
+\unichar{0265}
+\unichar{0266}
+\unichar{0267}
+\unichar{0268}
+\unichar{0269}
+\unichar{0270}
+\unichar{0271}
+\unichar{0274}
+\unichar{0275}
+\unichar{0276}
+\unichar{0277}
+\unichar{0278}
+\unichar{0279}
+\unichar{0282}
+\unichar{0283}
+\unichar{0284}
+\unichar{0285}
+\unichar{0286}
+\unichar{0287}
+\unichar{0288}
+\unichar{0289}
+\unichar{0290}
+\unichar{0292}
+\unichar{0293}
+\unichar{0296}
+\unichar{0297}
+\unichar{0298}
+\unichar{0299}
+\unichar{0300}
+\unichar{0301}
+\unichar{0304}
+\unichar{0305}
+\unichar{0308}
+\unichar{0309}
+\unichar{0310}
+\unichar{0311}
+\unichar{0313}
+\unichar{0314}
+\unichar{0315}
+\unichar{0316}
+\unichar{0317}
+\unichar{0318}
+\unichar{0321}
+\unichar{0322}
+\unichar{0323}
+\unichar{0324}
+\unichar{0325}
+\unichar{0326}
+\unichar{0327}
+\unichar{0328}
+\unichar{0332}
+\unichar{0333}
+\unichar{0334}
+\unichar{0335}
+\unichar{0336}
+\unichar{0337}
+\unichar{0338}
+\unichar{0339}
+\unichar{0340}
+\unichar{0341}
+\unichar{0342}
+\unichar{0343}
+\unichar{0344}
+\unichar{0345}
+\unichar{0346}
+\unichar{0347}
+\unichar{0348}
+\unichar{0349}
+\unichar{0350}
+\unichar{0351}
+\unichar{0352}
+\unichar{0353}
+\unichar{0354}
+\unichar{0355}
+\unichar{0356}
+\unichar{0357}
+\unichar{0360}
+\unichar{0361}
+\unichar{0362}
+\unichar{0363}
+\unichar{0364}
+\unichar{0365}
+\unichar{0366}
+\unichar{0367}
+\unichar{0368}
+\unichar{0369}
+\unichar{0372}
+\unichar{0373}
+\unichar{0374}
+\unichar{0375}
+\unichar{0376}
+\unichar{0377}
+\unichar{0378}
+\unichar{0379}
+\unichar{0380}
+\unichar{0381}
+\unichar{0382}
+\unichar{0461}
+\unichar{0462}
+\unichar{0463}
+\unichar{0464}
+\unichar{0465}
+\unichar{0466}
+\unichar{0467}
+\unichar{0468}
+\unichar{0482}
+\unichar{0483}
+\unichar{0486}
+\unichar{0487}
+\unichar{0488}
+\unichar{0489}
+\unichar{0496}
+\unichar{0500}
+\unichar{0501}
+\unichar{0504}
+\unichar{0505}
+\unichar{0508}
+\unichar{0509}
+\unichar{0510}
+\unichar{0511}
+\unichar{0536}
+\unichar{0537}
+\unichar{0538}
+\unichar{0539}
+\unichar{0542}
+\unichar{0543}
+\unichar{0550}
+\unichar{0551}
+\unichar{0552}
+\unichar{0553}
+\unichar{0558}
+\unichar{0559}
+\unichar{0562}
+\unichar{0563}
+\end{quote}
+}
+
+\begin{latexonly}
+\nonasciicharlist
+\end{latexonly}
+
+\begin{htmlonly}
+\newcommand{\unichar}[1]{\rawhtml&\##1;\endrawhtml}
+\nonasciicharlist
+\end{htmlonly}
+
+Currently blahtex does not recognise \TeX{}'s accent commands (like \texttt{\textbackslash"o}), so it is necessary to enter characters requiring accents directly in UTF-8.
+
+\subsubsection{Cyrillic}
+
+Blahtex experimentally supports Cyrillic characters, by using \LaTeX's \texttt{fontenc} package with the \texttt{X2} font encoding. Input must be entered in UTF-8, and surrounded by the (nonstandard) \texttt{\texcommand{cyr}\{...\}} command. Commands like \texttt{\texcommand{CYRSHA}} are not supported. Only the basic Cyrillic alphabet is supported, which as far as I can tell is sufficient for Russian.
+
+\textit{Disclaimer:} I don't know anything about Cyrillic, or any languages that use it. If I've messed something up, your advice would be appreciated.
+
+\subsubsection{Japanese}
+
+Blahtex experimentally supports Japanese (Kanji, Hiragana, Katakana) by using the \LaTeX{} \texttt{CJK} package. Input must be entered in UTF-8, and surrounded by the (nonstandard) \texttt{\texcommand{jap}\{...\}} command. The command-line option \texttt{--use-cjk-package} must be used. Additionally, the \TeX{} system must have a Japanese font installed, and blahtex needs to be informed via the command-line option \texttt{--japanese-font}.
+
+\textit{Disclaimer:} I don't know anything about the Japanese language or writing system. If I've messed something up, your advice would be appreciated.
+
+
+\subsection{Partial list of differences between blahtex and texvc}
+
+\subsubsection{Additional commands}
+
+Blahtex supports many \TeX/\LaTeX/AMS-\LaTeX{} commands not supported by texvc, especially many of the symbols in AMS-\LaTeX.
+
+\subsubsection{HTML support}
+
+The main feature of texvc that is missing in blahtex is support for HTML output. This may or may not be added in future.
+
+\subsubsection{Error reporting}
+
+Blahtex has much more robust syntax error reporting than texvc. Rather than a handful of generic error messages, blahtex can generate a wide variety of more detailed error messages to help the user diagnose the problem.
+
+\subsubsection{Parsing differences}
+
+Blahtex generally achieves much higher compatibility with \TeX{}'s parsing than texvc does. Texvc is generally more permissive. For example, the following are legal in texvc, but in \TeX{} and blahtex they require additional grouping braces:
+\begin{itemize}
+\item \texttt{\texcommand{frac} \texcommand{sqrt} a \texcommand{hat} b}
+\item \texttt{x\textasciicircum\texcommand{cong}}
+\item \texttt{x\textasciicircum\texcommand{left}( xyz \texcommand{right})}
+\item \texttt{x\textasciicircum\texcommand{begin}\{matrix\} a \texcommand{end}\{matrix\}}
+\end{itemize}
+
+The characters \texttt{\$} and \texttt{\%} are legal in texvc, but are illegal in blahtex. (Of course \texttt{\texcommand{\$}} and \texttt{\texcommand{\%}} are available.)
+
+These parsing differences may cause problems in replacing texvc with blahtex in an existing MediaWiki installation, since some legacy equations may not be compatible with blahtex. Preliminary research suggests that about 0.5\% of equations on Wikipedia itself (including the ten largest language Wikipedias) would be affected.
+
+\subsubsection{Nonstandard commands}\label{sec:texvc-compatible-commands}
+
+Blahtex has a command-line option (\texttt{--texvc-compatible-commands}) that enables all of the nonstandard commands in texvc's dialect of \TeX{}; that is, commands which are not present in \TeX{}, \LaTeX{}, or AMS-\LaTeX{}. It appears that most of these commands were added to texvc to make life easier for people familiar with HTML entities; for example, \texttt{\texcommand{isin}} is a texvc synonym for the standard \texttt{\texcommand{in}}. This option should be useful for backward compatibility with existing equations in databases like Wikipedia. Here is the complete list:
+
+\begin{mylist}
+\texttt{\texcommand{R}} \spacer
+\texttt{\texcommand{Reals}} \spacer
+\texttt{\texcommand{reals}} \spacer
+\texttt{\texcommand{Z}} \spacer
+\texttt{\texcommand{N}} \spacer
+\texttt{\texcommand{natnums}} \spacer
+\texttt{\texcommand{Complex}} \spacer
+\texttt{\texcommand{cnums}} \spacer
+\texttt{\texcommand{alefsym}} \spacer
+\texttt{\texcommand{alef}} \spacer
+\texttt{\texcommand{larr}} \spacer
+\texttt{\texcommand{rarr}} \spacer
+\texttt{\texcommand{Larr}} \spacer
+\texttt{\texcommand{lArr}} \spacer
+\texttt{\texcommand{Rarr}} \spacer
+\texttt{\texcommand{rArr}} \spacer
+\texttt{\texcommand{uarr}} \spacer
+\texttt{\texcommand{uArr}} \spacer
+\texttt{\texcommand{Uarr}} \spacer
+\texttt{\texcommand{darr}} \spacer
+\texttt{\texcommand{dArr}} \spacer
+\texttt{\texcommand{Darr}} \spacer
+\texttt{\texcommand{lrarr}} \spacer
+\texttt{\texcommand{harr}} \spacer
+\texttt{\texcommand{Lrarr}} \spacer
+\texttt{\texcommand{Harr}} \spacer
+\texttt{\texcommand{lrArr}} \spacer
+\texttt{\texcommand{hAar}} \spacer
+\texttt{\texcommand{sub}} \spacer
+\texttt{\texcommand{supe}} \spacer
+\texttt{\texcommand{sube}} \spacer
+\texttt{\texcommand{infin}} \spacer
+\texttt{\texcommand{lang}} \spacer
+\texttt{\texcommand{rang}} \spacer
+\texttt{\texcommand{real}} \spacer
+\texttt{\texcommand{image}} \spacer
+\texttt{\texcommand{bull}} \spacer
+\texttt{\texcommand{weierp}} \spacer
+\texttt{\texcommand{isin}} \spacer
+\texttt{\texcommand{plusmn}} \spacer
+\texttt{\texcommand{Dagger}} \spacer
+\texttt{\texcommand{exist}} \spacer
+\texttt{\texcommand{sect}} \spacer
+\texttt{\texcommand{clubs}} \spacer
+\texttt{\texcommand{spades}} \spacer
+\texttt{\texcommand{hearts}} \spacer
+\texttt{\texcommand{diamonds}} \spacer
+\texttt{\texcommand{sdot}} \spacer
+\texttt{\texcommand{ang}} \spacer
+\texttt{\texcommand{thetasym}} \spacer
+\texttt{\texcommand{Alpha}} \spacer
+\texttt{\texcommand{Beta}} \spacer
+\texttt{\texcommand{Epsilon}} \spacer
+\texttt{\texcommand{Zeta}} \spacer
+\texttt{\texcommand{Eta}} \spacer
+\texttt{\texcommand{Iota}} \spacer
+\texttt{\texcommand{Kappa}} \spacer
+\texttt{\texcommand{Mu}} \spacer
+\texttt{\texcommand{Nu}} \spacer
+\texttt{\texcommand{Rho}} \spacer
+\texttt{\texcommand{Tau}} \spacer
+\texttt{\texcommand{Chi}} \spacer
+\texttt{\texcommand{arcsec}} \spacer
+\texttt{\texcommand{arccsc}} \spacer
+\texttt{\texcommand{arccot}} \spacer
+\texttt{\texcommand{sgn}} \lastspacer
+\end{mylist}
+
+Also included are the four commands \texttt{\texcommand{empty}},
+\texttt{\texcommand{and}}, \texttt{\texcommand{or}}, \texttt{\texcommand{part}}. These commands \emph{are} part of \TeX{}/\LaTeX{}/AMS-\LaTeX{}, but they do \emph{not} do what texvc thinks they should do! Blahtex emulates texvc's behaviour for these commands (assuming that the \texttt{--texvc-compatible-commands} option is active).
+
+
+\section{The blahtex command-line application}\label{sec:command-line}
+
+The blahtex source code is available from \texttt{www.blahtex.org}. No binaries will be made available. All official releases should have been signed with a PGP key whose ID is 0x6269E206 and whose fingerprint is \texttt{9A51 0B6A B144 6A4D E1E5 0DE6 D604 6405 6269 E206}. This key is valid until 2nd August 2007. You can either get it from the blahtex website, or try searching for `blahtex' on a public keyserver.
+
+Besides reading this document, the interested developer is strongly advised to ``use the source''.
+
+\subsection{System prerequisites}\label{sec:prerequisites}
+
+Blahtex has been successfully compiled and run on the following configurations:
+\begin{itemize}
+\item Linux with gcc 4.0.2 20050808 (prerelease)
+\item Mac OS 10.4.5 (PowerPC) with gcc 4.0.1
+\end{itemize}
+
+Some of the source files seem to need a bit of memory to compile. I had trouble with \texttt{-O3} level optimisation on an older machine with 256MB RAM. It should be fine with 512MB or above.
+
+Other UNIX-based systems might work too. You will probably encounter problems with compilers other than gcc, or with older versions of gcc. (Probably gcc 3.3 is still okay.) I have personally met at least one older Solaris compiler that couldn't stomach the code. Your compiler must support \texttt{wstring} and 32-bit \texttt{wchar\_t}s. If you want to compile it on MS Windows... good luck, let me know how it goes.
+
+You will need an installation of the GNU \texttt{iconv} library. On some systems this is preinstalled, so you don't need to do anything. On my Mac I needed to install it (for example via fink).
+
+\subsubsection{Prerequisites for generating PNG output}
+
+To generate PNGs, you will need \LaTeX{} and the \texttt{dvipng} utility, which is included in many \LaTeX{} distributions. Blahtex assumes that the following \LaTeX{} packages are available: \texttt{color}, \texttt{fontenc}, \texttt{inputenc}, \texttt{amsmath}, \texttt{amsfonts}, \texttt{amssymb}. All of these packages are included in teTeX, one of the most popular \TeX{} distributions for UNIX systems.
+
+Additionally, to handle non-ASCII characters, the \texttt{ucs} package must be installed, and blahtex must be informed by using the \texttt{--use-ucs-package} command line option. To enable computation of height and depth of the output PNG image, the \texttt{preview} package must be installed, and blahtex must be informed by using the \texttt{--use-preview-package} option.
+
+\subsubsection{Modified version of \texttt{dvipng}}
+
+The version of \texttt{dvipng} running on the blahtex website is a slightly modified version of \texttt{dvipng} 1.7. The modification pertains to the automatic hinting method used with the underlying FreeType 2 library, and was made with the help of the author of \texttt{dvipng}, Jan-\AA{}ke Larsson (thanks Jan-\AA{}ke!).
+
+It's quite simple: in the source file \texttt{ft.c}, just replace \texttt{FT\_LOAD\_NO\_HINTING} by \texttt{FT\_LOAD\_TARGET\_LIGHT}, and recompile. The author has indicated that this modification will appear in \texttt{dvipng} version 1.8.
+
+\subsubsection{Prerequisites for Japanese in PNG output}\label{sec:howto-japanese}
+
+To handle Japanese, the \LaTeX{} \texttt{CJK} package must be installed, and a Japanese font must be installed.
+
+\emph{Warning: Installing TrueType CJK fonts for use by \LaTeX{}/dvipng is a dark art. In this section I will describe a sequence of steps that worked for me. I will explain along the way what I believe the purpose of each step to be, and caveats that you should be aware of. \textbf{However, this should not be construed to imply that I have any idea at all of what I am talking about}}.
+
+You will need a Japanese TrueType font. For testing, I have been using the Sazanami gothic font: \mylink{http://sourceforge.jp/projects/efont/files/}. Look inside for the TrueType font file \texttt{sazanami-gothic.ttf}.
+
+\emph{Warning: I have not read the license document for this font. It is mostly in Japanese. It is quite possible that it is \textbf{not legal} to use this font for certain purposes. Since it is advertised as being targeted at OpenOffice, I expect that all is okay, but \textbf{I am not a lawyer}.}
+
+The strategy outlined below is to convert the TrueType font to a bunch of smaller Type 1 fonts, and to provide enough other information to make \LaTeX{} and \texttt{dvipng} happy.
+
+You will need FontForge, from \mylink{http://fontforge.sourceforge.net/}. (Note that to install FontForge on Mac OS X, you will need the StuffIt Expander utility to decompress the installation package. StuffIt Expander was included in Mac OS 10.3.x, but is not shipped with Mac OS 10.4.x. I had a copy available from an older OS, but if you have only OS 10.4.x, you will need to download StuffIt Expander from \mylink{http://www.stuffit.com/mac/expander/}. Also on the Mac you need to make sure that you have an X11 server available. On Mac OS 10.4.x it should be pre-installed in \texttt{/Applications/Utilities/X11.App}. On earlier versions you may need to download X11 from Apple's website.)
+
+Create a temporary working directory somewhere, which I will refer to in these instructions as \texttt{/temp}.
+
+You need to select a name for your font. Probably best to keep it very short. I will use the name `saza' throughout the following example; you will need to replace every `saza' with whatever you have chosen.
+
+Boot up X11, and run FontForge. You should get an `Open Font' dialog; open the \texttt{ttf} file from above. Then select `Generate Fonts...' from the File menu. Navigate to your \texttt{/temp} directory; this is where the output from the `generate fonts' process will be saved. On the drop-down list on the left, select `PS Type 1 (Multiple)'. (The point here is to split the font up into many smaller sub-fonts. This is necessary because \TeX{} can only really work with fonts that contain at most 256 symbols, and CJK fonts have many more than that.) The default file name will be something like \texttt{sazanami-gothic\%s.pfb}; change this to \texttt{saza-uni\%s.pfb}. Now press `Options', and make sure `Output TFM \& ENC' is checked. Then hit `Save'. A new `Find Sub Font Definitions' dialog will pop up. You will need to find the file \texttt{Unicode.sfd} on the web somewhere (Google is your friend); save this file somewhere and tell the dialog where it is. Press OK.
+
+FontForge should go away and think for a while. When it's finished, your \texttt{/temp} directory should be filled with lots of \texttt{.tfm}, \texttt{.pfb}, \texttt{.afm}, and \texttt{.enc} files. You can throw away the last two; we only need the \texttt{.tfm} and \texttt{.pfb} files. In your \texttt{texmf} tree, make a new directory called \texttt{/texmf/fonts/tfm/saza/}, and put all the \texttt{.tfm} files there. Similarly, put all the \texttt{.pfb} files into a directory \texttt{/texmf/fonts/type1/saza/}.
+
+(The \texttt{.tfm} files are `\TeX{} font metric' files. Roughly speaking, they tell \TeX{} how much space each character takes up. The corresponding \texttt{.pfb} files are Adobe Type 1 font files; they describe the actual glyphs for each character.)
+
+Create a plain text file called \texttt{C70saza.fd}, and fill it with the following text:
+\begin{verbatim}
+\DeclareFontFamily{C70}{saza}{\hyphenchar \font\mne}
+\DeclareFontShape{C70}{saza}{m}{n}{<-> CJK * saza-uni}{}
+\DeclareFontShape{C70}{saza}{bx}{n}{<-> CJKb * saza-uni}{CJKbold}
+\end{verbatim}
+Save this file under \texttt{/texmf/tex/latex/saza/}. (I think the idea of this file is to tell \LaTeX{} something about the new font you have installed.)
+
+That's all the files you need. Now you need to run \texttt{mktexlsr} (or \texttt{sudo mktexlsr}) to update \TeX's filename cache.
+
+When you run blahtex, you will need to use the command line options \texttt{--use-cjk-package --use-ucs-package --japanese-font saza}.
+
+\subsection{Compiling blahtex}\label{sec:compiling-blahtex}
+
+Unpack the source into your favourite directory.
+\begin{itemize}
+\item If you're running Linux, just type \texttt{make linux}.
+\item If you're running Mac OS X (as I do), try \texttt{make mac}.
+\end{itemize}
+You should then find an executable \texttt{blahtex} in the current directory. If you want to quickly test it, try \texttt{echo '\texcommand{frac} xy' | ./blahtex --mathml}.
+
+\subsection{Command-line syntax}\label{sec:command-line-syntax}
+
+The basic syntax is: \texttt{blahtex [ options ]}; the command-line options are listed below. The \TeX{} input should be supplied on standard input in UTF-8 encoding, which means plain ASCII if you don't care about Unicode. If no input is given, blahtex will print a help screen. If neither of the \texttt{--mathml} or \texttt{--png} options are selected, then blahtex will still process the input for syntax errors, but will product no output.
+
+\subsubsection{General options}
+
+\begin{itemize}
+\item \texttt{--help}. Prints out a list of command-line options.
+\item \texttt{--texvc-compatible-commands}. Enables use of commands that are specific to texvc, but that are not standard \TeX{}/\LaTeX{}/AMS-\LaTeX{} commands (see section \ref{sec:texvc-compatible-commands}).
+\item \texttt{--print-error-messages}. This will print out a list of all error IDs and corresponding messages that blahtex can possibly emit inside an \texttt{<error>} block (see Section \ref{sec:interpreting-output}).
+\end{itemize}
+
+\subsubsection{MathML-related options}
+
+\begin{itemize}
+\item \texttt{--mathml}. Enables MathML output.
+\item \texttt{--mathml-encoding \textit{type}}. Controls the way blahtex outputs MathML characters.
+\begin{itemize}
+\item \texttt{--mathml-encoding raw}. Use Unicode code points (i.e.~UTF-8) directly in the output.
+\item \texttt{--mathml-encoding numeric} (default). Use XML numeric entities, like \texttt{\&\#x2191;}. This is likely to be the most portable option.
+\item \texttt{--mathml-encoding short}. Use `short' MathML entity names, like \texttt{\&uarr;}.
+\item \texttt{--mathml-encoding long}. Use `long' MathML entity names, like \texttt{\&UpArrow;}.
+\end{itemize}
+Not every MathML character has `short' and/or `long' names; blahtex will fall back on numeric entities in this case.
+\item \texttt{--disallow-plane-1}. Prevents blahtex from outputting any plane-1 Unicode characters, either as UTF-8 or as numeric entities. Instead, it will use named entities like \texttt{\&Afr;} (Fraktur `A'). The rationale is that some browsers have somewhat incomplete support for plane-1 characters, but do okay with these named entities.
+\item \texttt{--mathml-version-1-fonts}. Forbids use of the \texttt{mathvariant} attribute, which is only available in MathML 2.0. Instead, blahtex will use MathML version 1.x font attributes: \texttt{fontfamily}, \texttt{fontstyle} and \texttt{fontweight}, which are all deprecated in MathML 2.0. If these attributes are insufficient, for example characters with \texttt{mathvariant} equal to \texttt{double-struck}, blahtex will substitute explicit MathML entities.
+\item \texttt{--other-encoding \textit{type}}. Controls the way blahtex outputs non-ASCII, non-MathML characters. Such a character could only occur if it was supplied directly in the input.
+\begin{itemize}
+\item \texttt{--other-encoding raw}. Use Unicode code points (i.e.~UTF-8) directly in the output.
+\item \texttt{--other-encoding numeric} (default). Use XML numeric entities.
+\end{itemize}
+Note: the default values for \texttt{--mathml-encoding} and \texttt{--other-encoding} imply that all output is plain ASCII.
+\item \texttt{--indented}. Prints each MathML tag on a separate line, with appropriate indenting.
+\item \texttt{--spacing \textit{type}}. Controls how much MathML spacing markup to use (i.e.~\texttt{<mspace>} tags, and \texttt{lspace}/\texttt{rspace} attributes). Blahtex always uses \TeX{}'s rules (or an approximation thereof) to compute how much space to place between symbols in the equation, but this option describes how often it will actually emit MathML spacing markup to implement its spacing decisions.
+\begin{itemize}
+\item \texttt{--spacing strict} (default). Output spacing markup everywhere possible; leave as little choice as possible to the MathML renderer. This will result in the most bloated output, but hopefully will look as much like \TeX{} output as possible.
+\item \texttt{--spacing moderate}. Output spacing commands whenever blahtex thinks a typical MathML renderer is likely to do something visually unsatisfactory without additional help. The aim is to get good agreement with \TeX{} without overly bloated MathML markup. (It's very difficult to get this right, so I expect it to be under continual review.)
+\item \texttt{--spacing relaxed}. Only output spacing commands when the user specifically asks for them, using \TeX{} commands like \texttt{\texcommand{,}} or \texttt{\texcommand{quad}}.
+\end{itemize}
+The magic command \texttt{\texcommand{strictspacing}} will override this setting (see Section \ref{sec:special-commands}).
+
+Blahtex pays a lot of attention to spacing, because the MathML defaults (via the operator dictionary) are often inadequate. To see the difference, try the simple input \texttt{a := b} on blahtex (with spacing set to moderate or strict) and compare with the output of other translators.
+\end{itemize}
+
+\subsubsection{PNG-related options}
+
+\begin{itemize}
+\item \texttt{--png}. Enables PNG output.
+\item \texttt{--use-ucs-package}. This tells blahtex it may use the \LaTeX{} \texttt{ucs} package to handle non-ASCII characters. Obviously, it is necessary to install the \texttt{ucs} package before using this option. See Section \ref{sec:non-ascii-characters} for more information.
+\item \texttt{--use-cjk-package}. This tells blahtex it may use the \LaTeX{} \texttt{CJK} package to handle Chinese/Japanese/Korean characters. Obviously, it is necessary to install the \texttt{CJK} package before using this option. See also Section \ref{sec:howto-japanese}.
+\item \texttt{--use-preview-package}. This tells blahtex it may use the \LaTeX{} \texttt{preview} package. Obviously, it is necessary to install the \texttt{preview} package before using this option. With this option enabled, blahtex is able to compute the height and depth of the output PNG image (via dvipng).
+\item \texttt{--japanese-font \textit{fontname}}. Specifies which font to use for characters surrounded by \texttt{\texcommand{jap}\{...\}}. See also Section \ref{sec:howto-japanese}.
+\item \texttt{--shell-latex \textit{command}}. Specifies the command to use for running \LaTeX{}. Default is just \texttt{latex}.
+\item \texttt{--shell-dvipng \textit{command}}. Specifies the command to use for running dvipng. Default is just \texttt{dvipng}.
+\item \texttt{--temp-directory \textit{directory}}. Specifies the directory that should be used for the intermediate files used during PNG creation. Default is the current directory.
+\item \texttt{--png-directory \textit{directory}}. Specifies the directory in which the PNG output file should be placed. Default is the current directory.
+\end{itemize}
+
+\subsubsection{Debugging options}
+
+\begin{itemize}
+\item \texttt{--throw-logic-error}. Simulates the effect of a debug assertion occurring, so that you can test any associated error-logging code.
+\item \texttt{--debug \textit{type}}. Enables some debugging output to assist in working out what is going on inside blahtex's head:
+\begin{itemize}
+\item \texttt{--debug parse}. Print the parse tree.
+\item \texttt{--debug layout}. Print the layout tree. This is an intermediate stage between parsing and MathML.
+\item \texttt{--debug purified}. Print `purified \TeX{}'. This is the complete \TeX{} file that blahtex sends to \LaTeX{} for PNG generation.
+\end{itemize}
+Multiple \texttt{--debug} options may be present. The format of debugging output is subject to change, and is not designed to be machine-readable; it will interrupt blahtex's usual XML output format in ghastly ways.
+\item \texttt{--keep-temp-files}. Instructs blahtex not to delete any of the temporary files that get created during PNG generation.
+\end{itemize}
+
+\subsection{Interpreting blahtex's output}\label{sec:interpreting-output}
+
+Blahtex's output looks like XML. (Unless a \emph{really fatal} error occurs :-)) By default, the output is completely ASCII, although there are command-line options which enable UTF-8 output for certain characters. The entire output is surrounded by the tags \texttt{<blahtex>...</blahtex>}. Inside these tags, there are several possibilities:
+
+\begin{itemize}
+\item If a debug assertion occurred (i.e.~if blahtex detected a bug within itself), you will see a \texttt{<logicError>...</logicError>} block. Between the \texttt{<logicError>} tags will be a string describing the error. If you ever see one of these, please report it to me.
+
+\item If there was a syntax error in the \TeX{} input, there will be a single \texttt{<error>...</error>} block which describes the error (the \texttt{<error>} block format is described in detail below). The possible error IDs that can occur here are:
+
+\begin{itemize}
+\item \texttt{InvalidUtf8Input}
+\item \texttt{IllegalCharacter}
+\item \texttt{TooManyTokens}
+\item \texttt{NonAsciiInMathMode}
+\item \texttt{ReservedCommand}
+\item \texttt{IllegalFinalBackslash}
+\item \texttt{UnrecognisedCommand}
+\item \texttt{IllegalCommandInMathMode}
+\item \texttt{IllegalCommandInMathModeWithHint}
+\item \texttt{IllegalCommandInTextMode}
+\item \texttt{IllegalCommandInTextModeWithHint}
+\item \texttt{MissingOpenBraceBefore}
+\item \texttt{MissingOpenBraceAfter}
+\item \texttt{MissingOpenBraceAtEnd}
+\item \texttt{NotEnoughArguments}
+\item \texttt{MissingCommandAfterNewcommand}
+\item \texttt{IllegalRedefinition}
+\item \texttt{MissingOrIllegalParameterCount}
+\item \texttt{MissingOrIllegalParameterIndex}
+\item \texttt{UnmatchedOpenBracket}
+\item \texttt{UnmatchedOpenBrace}
+\item \texttt{UnmatchedCloseBrace}
+\item \texttt{UnmatchedLeft}
+\item \texttt{UnmatchedRight}
+\item \texttt{UnmatchedBegin}
+\item \texttt{UnmatchedEnd}
+\item \texttt{UnexpectedNextCell}
+\item \texttt{UnexpectedNextRow}
+\item \texttt{MismatchedBeginAndEnd}
+\item \texttt{CasesRowTooBig}
+\item \texttt{SubstackRowTooBig}
+\item \texttt{MissingDelimiter}
+\item \texttt{IllegalDelimiter}
+\item \texttt{MisplacedLimits}
+\item \texttt{DoubleSuperscript}
+\item \texttt{DoubleSubscript}
+\item \texttt{AmbiguousInfix}
+\item \texttt{InvalidColour}
+\end{itemize}
+
+\item Assuming there were no syntax errors or debug assertions:
+
+\begin{itemize}
+
+\item If you gave the \texttt{--mathml} option at the command line, you will get a \texttt{<mathml>...</mathml>} block. If the MathML was generated successfully, the \texttt{<mathml>} block will contain a \texttt{<markup>...</markup>} block, containing the actual MathML. If there was a problem generating the MathML, the \texttt{<mathml>} block will instead contain an \texttt{<error>} block describing the problem. The only possible error IDs that can occur here are:
+\begin{itemize}
+\item \texttt{TooManyMathmlNodes}
+\item \texttt{UnavailableSymbolFontCombination}
+\end{itemize}
+
+\item If you gave the \texttt{--png} option at the command line, you will get a \texttt{<png>...</png>} block.
+
+If the PNG image was generated successfully, then it will be stored in a file called \texttt{X.png}, where \texttt{X} is an md5 hash (32 character lowercase hex string); the \texttt{<png>} block will then contain \texttt{<md5>X</md5>}. (In fact \texttt{X} is the md5 hash of the \TeX{} file that got sent to \LaTeX{} to generate the image.) If the option \texttt{--use-preview-package} was used, the \texttt{<png>} block will also contain blocks \texttt{<height>H</height>} and \texttt{<depth>D</depth>} which indicate the height and depth of the image, in pixels. (These are computed by \texttt{dvipng}.) If you want to display the PNG in a web page so that it is aligned with surrounding text, you can use the depth value as follows: \texttt{<img src="..." style="vertical-align:~-\textit{D}px">}.
+
+If there was an error generating the PNG file, the \texttt{<png>} block will instead contain an \texttt{<error>} block describing the problem. The possible error IDs here are:
+\begin{itemize}
+\item \texttt{CannotCreateTexFile}
+\item \texttt{CannotWriteTexFile}
+\item \texttt{CannotRunLatex}
+\item \texttt{CannotRunDvipng}
+\item \texttt{CannotWritePngDirectory}
+\item \texttt{CannotChangeDirectory}
+\item \texttt{LatexPackageUnavailable}
+\item \texttt{WrongFontEncoding}
+\item \texttt{WrongFontEncodingWithHint}
+\item \texttt{IllegalNestedFontEncodings}
+\item \texttt{LatexFontNotSpecified}
+\item \texttt{PngIncompatibleCharacter}
+\end{itemize}
+
+\end{itemize}
+\end{itemize}
+
+The \texttt{<error>} block (mentioned several times above) has the following format. First, it contains an \texttt{<id>...</id>} block, containing an error ID (i.e.~one of the CamelCase strings listed above). Next, a sequence of zero or more \texttt{<arg>...</arg>} blocks, representing the `arguments' of the error. Finally there is a \texttt{<message>...</message>} block, containing a translation of the error into English. For example, one possible error block is:
+
+\begin{quote}
+\texttt{<error>}\\
+\texttt{<id>MismatchedBeginAndEnd</id>}\\
+\texttt{<arg>\texcommand{begin}\{matrix\}</arg>}\\
+\texttt{<arg>\texcommand{end}\{array\}</arg>}\\
+\texttt{<message>The commands "\texcommand{begin}\{matrix\}" and "\texcommand{end}\{array\}" do not match</message>}\\
+\texttt{</error>}
+\end{quote}
+
+The simplest way to report the error to the user is to extract the \texttt{<message>} block. If you want to implement some localisation of error messages, you should use the \texttt{<id>} and \texttt{<arg>} fields. A complete list of error messages can be found in the source file \texttt{Messages.cpp}, or try the command-line option \texttt{--print-error-messages}. The error IDs may change in future versions of blahtex.
+
+\section{The blahtexml command-line application}\label{sec:blahtexml}
+
+The blahtexml source code is available from \texttt{http://gva.noekeon.org/blahtexml}.
+
+\subsection{System prerequisites}\label{sec:blahtexml-prerequisites}
+
+In addition to the prerequisites of blahtex (see Section~\ref{sec:prerequisites}), blahtexml requires one to have Xerces-C 2.x installed. Xerces-C is an XML parser library and is available at \texttt{http://xerces.apache.org/xerces-c/}. Blahtexml dynamically links to Xerces-C.
+
+\subsection{Compiling blahtexml}\label{sec:compiling-blahtexml}
+
+Unpack the source into your favourite directory.
+\begin{itemize}
+\item If you're running Linux, just type \texttt{make blahtexml-linux}.
+\item If you're running Mac OS X, try \texttt{make blahtexml-mac}.
+\end{itemize}
+You should then find an executable \texttt{blahtexml} in the current directory.
+
+\subsection{Using blahtexml}\label{sec:blahtexml-command-line-syntax}
+
+Blahtexml contains blahtex, which means that all the command-line options of blahtex are available with blahtexml. They are described in Section~\ref{sec:command-line-syntax}.
+
+What is specific to blahtexml is the \texttt{--xmlin} option. This tells blahtexml to input an XML file and to convert all the equations it finds into an output XML file, which contains the equivalent MathML code. All the elements, attributes and processing instructions are copied from the input to the output XML file, unchanged. When it encounters an equation in blahtex, it is converted into MathML.
+
+When used, the \texttt{--xmlin} option must be first. Note that, in this case, not all the blahtex command line options work. The options that are ignored when \texttt{--xmlin} is used are: \texttt{--png}, \texttt{--mathml-encoding}, \texttt{--other-encoding} and \texttt{--disallow-plane-1}.
+
+In the following, we describe how blahtexml locates blahtex formulas and how the process works exactly. For this, we assume that the reader has some familiarity with the XML syntax and with the XML namespaces.
+
+In an XML file, blahtexml looks for attributes with name \texttt{m}, \texttt{inline} or \texttt{block} in the namespace \texttt{http://gva.noekeon.org/blahtexml}. It will then remove this attribute and expand the produced MathML inside the element that contains the attribute. Let us just illustrate this with an example.
+
+Consider the following input file:
+\begin{verbatim}
+<?xml version="1.0"?>
+<equations xmlns:b="http://gva.noekeon.org/blahtexml">
+ <equation b:inline="x+y"/>
+ <equation b:block="\exp(-\gamma x)"/>
+</equations>
+\end{verbatim}
+
+By calling \texttt{blahtexml --xmlin < example1.xml}, blahtexml will produce the following output, where for clarity some MathML elements are not written:
+\begin{verbatim}
+<?xml version="1.0" encoding="UTF-8"?>
+<equations xmlns:b="http://gva.noekeon.org/blahtexml">
+ <equation>
+ <math xmlns="http://www.w3.org/1998/Math/MathML">
+ <mi>x</mi>
+ <mo lspace="0.222em" rspace="0.222em">+</mo>
+ <mi>y</mi>
+ </math>
+ </equation>
+ <equation>
+ <math xmlns="http://www.w3.org/1998/Math/MathML"
+ display="block">
+ <mi>exp</mi>[...]
+ </math>
+ </equation>
+</equations>
+\end{verbatim}
+
+As one can see in this example, the \texttt{inline} attribute produces MathML in inline mode (the default of MathML), while the \texttt{block} attribute produces MathML in block mode by adding the attribute \texttt{display="block"} in the \texttt{math} element.
+
+The \texttt{m} element does not create a \texttt{math} element, but instead puts the MathML content as is. This can be useful if, e.g., one wants to type an equation partly in MathML and partly in blahtex. This is illustrated in the next example, where a blahtex equation is given inside a \texttt{msqrt} MathML element. The input file
+\begin{verbatim}
+<root xmlns:b="http://gva.noekeon.org/blahtexml">
+ <math xmlns="http://www.w3.org/1998/Math/MathML">
+ <msqrt b:m="x+y"/>
+ </math>
+</root>
+\end{verbatim}
+yields as output:
+\begin{verbatim}
+<root xmlns:b="http://gva.noekeon.org/blahtexml">
+ <math xmlns="http://www.w3.org/1998/Math/MathML">
+ <msqrt>
+ <mi>x</mi>
+ <mo lspace="0.222em" rspace="0.222em">+</mo>
+ <mi>y</mi>
+ </msqrt>
+ </math>
+</root>
+\end{verbatim}
+
+Note that if more than one attribute in the blahtex namespace are present, only one is processed, with \texttt{m} having the highest priority, then \texttt{inline} and finally \texttt{block}.
+
+\subsubsection{MathML namespace in output file}
+
+The MathML element produced in the output are in the MathML namespace, namely \texttt{http://www.w3.org/1998/Math/MathML}. There are two ways to express the namespace, either by adding the \texttt{xmlns} attribute to the outer MathML element, or by adding a prefix associated to the MathML namespace to all the MathML elements. By default, or using the \texttt{--mathml-nsprefix-auto} option, blahtexml automatically chooses between the two alternatives. Either a prefix already exists and blahtex reuses it, or such a prefix does not exist and an \texttt{xmlns} attribute is added.
+
+From the point of view of XML namespaces, both approaches are equivalent. Nevertheless, some XML applications predate the introduction of XML namespaces and it may sometimes be necessary to force either solution.
+
+\begin{itemize}
+\item \texttt{--mathml-nsprefix-auto}. This is the default option: blahtexml automatically chooses to add a prefix or not.
+\item \texttt{--mathml-nsprefix-none}. The produced MathML elements are not prefixed. The \texttt{xmlns} attribute is added to the outer MathML element.
+\item \texttt{--mathml-nsprefix}. This option requires a parameter: the prefix (string). The produced MathML elements are prefixed with the given prefix and a colon.
+\end{itemize}
+
+Consider the following input file:
+\begin{verbatim}
+<root xmlns:b="http://gva.noekeon.org/blahtexml">
+ <eq b:inline="x"/>
+ <eq xmlns:m="http://www.w3.org/1998/Math/MathML" b:inline="x"/>
+</root>
+\end{verbatim}
+
+Invoking blahtexml using the default option \texttt{--mathml-nsprefix-auto}, one gets the following result:
+\begin{verbatim}
+<root xmlns:b="http://gva.noekeon.org/blahtexml">
+ <eq>
+ <math xmlns="http://www.w3.org/1998/Math/MathML">
+ <mi>x</mi>
+ </math>
+ </eq>
+ <eq xmlns:m="http://www.w3.org/1998/Math/MathML">
+ <m:math><m:mi>x</m:mi></m:math>
+ </eq>
+</root>
+\end{verbatim}
+
+Using \texttt{--mathml-nsprefix-none}, one gets the following result:
+\begin{verbatim}
+<root xmlns:b="http://gva.noekeon.org/blahtexml">
+ <eq>
+ <math xmlns="http://www.w3.org/1998/Math/MathML">
+ <mi>x</mi>
+ </math>
+ </eq>
+ <eq xmlns:m="http://www.w3.org/1998/Math/MathML">
+ <math xmlns="http://www.w3.org/1998/Math/MathML">
+ <mi>x</mi>
+ </math>
+ </eq>
+</root>
+\end{verbatim}
+
+And using \texttt{--mathml-nsprefix m}, one gets the following result:
+\begin{verbatim}
+<root xmlns:b="http://gva.noekeon.org/blahtexml">
+ <eq>
+ <m:math xmlns:m="http://www.w3.org/1998/Math/MathML">
+ <m:mi>x</m:mi>
+ </m:math>
+ </eq>
+ <eq xmlns:m="http://www.w3.org/1998/Math/MathML">
+ <m:math><m:mi>x</m:mi></m:math>
+ </eq>
+</root>
+\end{verbatim}
+
+
+\subsubsection{Output document type}
+
+By default, the generated XML file does not contain a document type declaration. If the output file is intended to a given XML application, a \texttt{DOCTYPE} declaration may be needed. The \texttt{--doctype-}* command-line options provide a way to specify this.
+
+\begin{itemize}
+\item \texttt{--doctype-system}. This option takes a reference to a DTD (string) as argument and causes blahtexml to output a \texttt{SYSTEM} document type declaration with the given reference.
+\item \texttt{--doctype-public}. This option takes two arguments: a public ID (string) and a reference to a DTD (string). Blahtex produces a \texttt{PUBLIC} document type declaration with the given public ID and reference.
+\item \texttt{--doctype-xhtml+mathml}. This option is equivalent to \texttt{--mathml-nsprefix-none} \texttt{--doctype-public} \texttt{"-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN"}
+\newline
+\texttt{"http://www.w3.org/TR/MathML2/dtd/xhtml-math11-f.dtd"} and is useful to produce valid XHTML+MathML output.
+\end{itemize}
+
+\subsubsection{Error reporting}
+
+If a blahtex equation given in the input XML file generates an error during its conversion to MathML, blahtexml adds an \texttt{error} element (in the blahtex namespace) instead of the MathML elements. The blahtex formula is not discarded, so that the user can more easily see what caused the problem. Furthermore, the number of errors encountered is reported on the screen.
+
+For instance, the following input file
+\begin{verbatim}
+<root xmlns:b="http://gva.noekeon.org/blahtexml">
+ <eq b:inline="\qwerty"/>
+</root>
+\end{verbatim}
+generates the following output file
+\begin{verbatim}
+<root xmlns:b="http://gva.noekeon.org/blahtexml">
+ <eq b:inline="\qwerty">
+ <error xmlns="http://gva.noekeon.org/blahtexml">
+ Unrecognised command "\qwerty"
+ </error>
+ </eq>
+</root>
+\end{verbatim}
+
+\subsubsection{Unicode symbol translation in math mode}\label{sec:blahtexml-input-symbol-translation}
+
+As detailed in Section~\ref{sec:input-symbol-translation}, blahtexml accepts some Unicode symbols and translates them into \TeX commands. For instance, the following three lines are equivalent and will give the same output:
+\begin{itemize}
+\item \verb|<eq b:inline="\Phi \leq \Omega \approx \Gamma"/>|
+\item \verb|<eq b:inline="|$\Phi \leq \Omega \approx \Gamma$\verb|"/>|
+\item \verb|<eq b:inline="&#x3A6;&#x2264;&#x3A9;&#x2248;&#x393;"/>|
+\end{itemize}
+
+The first line uses the traditional \TeX commands. The second line uses the Unicode symbols directly, assuming that the encoding of the XML file allows for it. Note that UTF-8, the default encoding in XML, includes all Unicode characters. The third line shows that it is also possible to use XML entities to input Unicode characters.
+
+\section{The blahtex API}\label{sec:API}
+
+This section gives a summary of how to link blahtex directly into a C++ application. You will need to write a wrapper if you want to use a different language. (If you do this, please consider sending me the wrapper so I can make it available for others to use.)
+
+\subsection{Core vs non-core}
+
+The blahtex source code is divided into two parts:
+\begin{itemize}
+\item The `blahtex core', whose source files are all in the \texttt{BlahtexCore} subdirectory. The core does all the hard work involved in translating \TeX{} to MathML, and the not-as-hard work of preparing a complete \TeX{} file to be sent to \LaTeX{} to generate the PNG image. It does not include any functionality which may be more OS-dependent; pretty much all it does is allocate memory and push strings around.
+\item The blahtex command-line application, whose source files are in the main \texttt{source} directory. This `non-core' source is basically a wrapper that turns the blahtex core into a command-line application, and additionally handles shelling out to \LaTeX{} to generate the PNG output.
+\end{itemize}
+
+
+\subsection{How to use the core}
+
+To use the blahtex core in your C++ application, you should follow these steps:
+
+\begin{enumerate}
+\item Copy the \texttt{BlahtexCore} directory to wherever your project is.
+\item Any source file that wants to access the blahtex core needs to \texttt{\#include "BlahtexCore/Interface.h"}.
+\item Everything in the blahtex core is in the \texttt{blahtex} namespace. So, you might also consider \texttt{using namespace blahtex}.
+\item Declare an object of type \texttt{blahtex::Interface}. (It's perfectly okay to have several \texttt{Interface} objects lying around; they won't get in each other's way.)
+\item You can set various conversion options by setting the public member variables of the \texttt{Interface} object. See the header file \texttt{Interface.h} for a list of members. The structs \texttt{MathmlOptions}, \texttt{EncodingOptions} and \texttt{PurifiedTexOptions} are described in detail in the header file \texttt{Misc.h}; they basically correspond to various command-line options (see Section \ref{sec:command-line-syntax}).
+\item Call the member function \texttt{Interface::ProcessInput(x)}, where \texttt{x} is a \texttt{wstring} containing the input \TeX{}.
+\item You can call the member function \texttt{Interface::GetMathml()} to get the MathML translation as a \texttt{wstring}.
+\item You can call the member function \texttt{Interface::GetPurifiedTex()} to get the `purified \TeX{}' as a \texttt{wstring}; this is a complete \TeX{} file that could be sent to \LaTeX{} to generate graphical output.
+\item Any of the above functions can throw exception objects if something goes wrong, so you probably need to worry about \texttt{catch}ing them. They will throw a \texttt{std::logic\_error} object if a debug assertion occurs. They will throw a \texttt{blahtex::Exception} object to indicate a syntax error in the input, or if there is a problem in generating the MathML or purified \TeX{}. The \texttt{blahtex::Exception} object is documented in \texttt{Misc.h}. If you need the error translated to English, you probably want to check out the \texttt{GetErrorMessage} function in \texttt{Messages.cpp} (not part of the blahtex core).
+\end{enumerate}
+
+
+\subsection{Dealing with \texttt{wstring}}
+
+The blahtex core is internally Unicode throughout, and works exclusively with wide strings --- \texttt{wstring}, not \texttt{string}. If your code only deals with ASCII strings, or UTF-8, you will need a way of converting between narrow and wide strings. The blahtex command-line application has a class \texttt{UnicodeConverter} which provides precisely this functionality; it is essentially a C++ wrapper for the \texttt{iconv} library in terms of \texttt{string} (for storing UTF-8 strings) and \texttt{wstring} (for storing UCS-32 strings; endianness depends on the platform). To use this class:
+\begin{enumerate}
+\item Put \texttt{UnicodeConverter.cpp} and \texttt{UnicodeConverter.h} in your project directory, and make sure you \texttt{\#include "UnicodeConverter.h"}.
+\item Link against the \texttt{iconv} library. You may need to compile and install \texttt{iconv}, and possibly use the linker switch \texttt{-liconv}.
+\item On some systems (including Mac OS X, but not Linux), you need to define the constant \texttt{BLAHTEX\_ICONV\_CONST} for \texttt{UnicodeConverter.cpp}, otherwise you'll probably get compiler warnings. See the source for an explanation.
+\item Declare a \texttt{UnicodeConverter} object and call \texttt{Open()}. This sets up the underlying \texttt{iconv\_t} handles.
+\item Use the \texttt{ConvertIn} and \texttt{ConvertOut} member functions to convert between UTF-8 and UCS-32.
+\item The \texttt{UnicodeConverter} class can also throw exceptions if something goes wrong (for example, invalid UTF-8 input). See the source for details.
+\end{enumerate}
+
+\section{History/changelog}\label{sec:history}
+
+\begin{itemize}
+\item Version 0.1 (Jul/2005). You don't want to know about this one.
+\item Version 0.2 (2/Aug/2005). Initial public release.
+\item Version 0.2.1 (8/Aug/2005). Now compiles under Linux.
+\item Version 0.3.x (Aug 2005 to Jan 2006). Series of internal development releases, everything getting completely rewritten. It would be an act of irresponsibility to list every change.
+\item Version 0.4 (29/Jan/2006). Accompanies announcement of test wiki.
+\item Version 0.4.1 (8/Feb/2006). Added \texttt{--compute-vertical-shift} option.
+\item Version 0.4.2 (12/Feb/2006).
+\begin{itemize}
+\item Greatly improved coverage of symbols in \LaTeX{} and AMS-\LaTeX.
+\item Greatly improved coverage of \texttt{\texcommand{not}}.
+\item Now \texttt{UnavailableSymbolFontCombination} and \texttt{InvalidNegation} errors are only flagged during MathML output; i.e.~these errors no longer prevent PNG output.
+\item Added \texttt{--keep-temp-files} option.
+\item Fixed a PNG clipping bug in certain cases where dvips gets the PS bounding box incorrect. For example, when translating \texttt{\texcommand{displaystyle} \texcommand{int}}, half of the integral sign would go missing. (This bug affects texvc too.)
+\item Changed behaviour of \texttt{<vshift>} block; now such a block appears even if the shift should be zero.
+\item Fixed a few incorrect MathML characters.
+\end{itemize}
+\item Version 0.4.3 (25/Feb/2006).
+\begin{itemize}
+\item Now supports \texttt{\texcommand{color}}; added corresponding error code \texttt{InvalidColour}.
+\item Numerous internal structural changes, especially an overhaul of the MathML output code.
+\item Improved node merging heuristics, for things like \texttt{123\textasciicircum5}.
+\item Corrected parsing of \texttt{\texcommand{not}}. Now blahtex will make a reasonable attempt on any \texttt{\texcommand{not}} that comes its way; the \texttt{InvalidNegation} error message has consequently been removed.
+\item Fixed a bug that caused incorrect font attributes for input like \texttt{\texcommand{rm} \texcommand{boldsymbol} x}.
+\item Added the \texttt{\texcommand{ast}} command (how did I ever miss that?)
+\end{itemize}
+\item Version 0.4.4 (25/Mar/2006).
+\begin{itemize}
+\item Changed default spacing mode from \texttt{moderate} to \texttt{strict}.
+\item Changed from using dvips/ImageMagick to dvipng. Consequently the \texttt{--shell-dvips}, \texttt{--shell-convert} and \texttt{--convert-options} options have been removed, and replaced by \texttt{--shell-dvipng}. The error messages \texttt{CannotRunConvert} and \texttt{CannotRunDvips} have been removed and replaced by \texttt{CannotRunDvipng} and \texttt{CannotWritePngDirectory}.
+\item Added flag \texttt{--use-preview-package}.
+\item Removed the \texttt{--compute-vertical-shift} option; now the vertical shift is always computed (by dvipng) as long as the \LaTeX{} \texttt{preview} package is loaded, but its name has been changed to `depth'. Accordingly, the \texttt{<vshift>} output block has been replaced by \texttt{<height>} and \texttt{<depth>} blocks. The numbers themselves are now computed by dvipng, which is much neater and more reliable.
+\item Added support for Cyrillic and Japanese in PNG output:
+\begin{itemize}
+\item Added \texttt{--use-cjk-package} and \texttt{--japanese-font} options.
+\item Added commands \texttt{\texcommand{cyr}} and \texttt{\texcommand{jap}}.
+\item Added error messages:
+\begin{itemize}
+\item \texttt{WrongFontEncoding}
+\item \texttt{WrongFontEncodingWithHint}
+\item \texttt{IllegalNestedFontEncodigs}
+\item \texttt{LatexPackageUnavailable}
+\item \texttt{LatexFontNotSpecified}
+\end{itemize}
+\end{itemize}
+\item Corrected MathML characters for \texttt{\texcommand{longrightarrow}} and friends; however they are currently disabled because of poor font support.
+\item Fixed spacing for \texttt{\texcommand{substack}} and the \texttt{aligned} environment. Note however that Firefox still doesn't support the requisite \texttt{rowspacing} and \texttt{columnspacing} attributes, so it won't look right yet in Firefox.
+\item Changed format of \texttt{--print-error-messages} slightly.
+\item Finished adding MathML character names for all commands added in version 0.4.2.
+\end{itemize}
+\item Version blahtexml 0.4.4 (2/Nov/2007) by GVA
+\begin{itemize}
+\item Added the blahtexml extension.
+\end{itemize}
+\item Version blahtexml 0.5 (16/May/2008) by GVA
+\begin{itemize}
+\item Added input symbol translation.
+\item Improved makefile based on user feedback (Mac compilation, lower optimization level, documentation generation).
+\end{itemize}
+\end{itemize}
+
+\end{document}
diff --git a/blahtexml/source/BlahtexCore/ISTtoCpp.xslt b/blahtexml/source/BlahtexCore/ISTtoCpp.xslt
new file mode 100644
index 0000000..686dbb7
--- /dev/null
+++ b/blahtexml/source/BlahtexCore/ISTtoCpp.xslt
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+// File "ISTtoCpp.xslt"
+//
+// blahtexml (version 0.5)
+// Copyright (C) 2008, Gilles Van Assche
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+-->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version='1.0'>
+
+<xsl:output method="text" indent="no" encoding="UTF-8"/>
+
+<xsl:template match="symbols">
+ <xsl:apply-templates select="symbol"/>
+</xsl:template>
+
+<xsl:template match="symbol">
+ <xsl:text> make_pair(L'\U</xsl:text>
+ <xsl:value-of select="@unicode"/>
+ <xsl:text>', L"</xsl:text>
+ <xsl:if test="starts-with(@tex, '\')">
+ <xsl:text>\</xsl:text>
+ </xsl:if>
+ <xsl:value-of select="@tex"/>
+ <xsl:text>")</xsl:text>
+ <xsl:if test="position()&lt;last()">
+ <xsl:text>,</xsl:text>
+ </xsl:if>
+ <xsl:text>
+</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/blahtexml/source/BlahtexCore/InputSymbolTranslation.cpp b/blahtexml/source/BlahtexCore/InputSymbolTranslation.cpp
new file mode 100644
index 0000000..261808a
--- /dev/null
+++ b/blahtexml/source/BlahtexCore/InputSymbolTranslation.cpp
@@ -0,0 +1,55 @@
+// File "InputSymbolTranslation.cpp"
+//
+// blahtexml (version 0.5)
+// Copyright (C) 2008, Gilles Van Assche
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include <map>
+#include <memory>
+#include <stdexcept>
+#include "Misc.h"
+#include "InputSymbolTranslation.h"
+
+using namespace std;
+
+namespace blahtex {
+
+pair<wchar_t, wstring> equivalentInputSymbolsArray[] =
+{
+#include "InputSymbolTranslation.inc"
+};
+wishful_hash_map<wchar_t, wstring> equivalentInputSymbolsTable(
+ equivalentInputSymbolsArray,
+ END_ARRAY(equivalentInputSymbolsArray)
+);
+
+wstring translateToken(const wstring& token)
+{
+ if (token.length() == 1) {
+ wishful_hash_map<wchar_t, wstring>::const_iterator
+ equivalentInputSymbolsLookup = equivalentInputSymbolsTable.find(token[0]);
+ if (equivalentInputSymbolsLookup != equivalentInputSymbolsTable.end())
+ return equivalentInputSymbolsLookup->second;
+ else
+ return token;
+ }
+ else
+ return token;
+}
+
+}
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/BlahtexCore/InputSymbolTranslation.h b/blahtexml/source/BlahtexCore/InputSymbolTranslation.h
new file mode 100644
index 0000000..c1b67f3
--- /dev/null
+++ b/blahtexml/source/BlahtexCore/InputSymbolTranslation.h
@@ -0,0 +1,33 @@
+// File "InputSymbolTranslation.h"
+//
+// blahtexml (version 0.5)
+// Copyright (C) 2008, Gilles Van Assche
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#ifndef BLAHTEX_INPUTSYMBOLTRANSLATION_H
+#define BLAHTEX_INPUTSYMBOLTRANSLATION_H
+
+
+namespace blahtex
+{
+
+std::wstring translateToken(const std::wstring& token);
+
+}
+
+#endif
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/BlahtexCore/InputSymbolTranslation.inc b/blahtexml/source/BlahtexCore/InputSymbolTranslation.inc
new file mode 100644
index 0000000..c16a004
--- /dev/null
+++ b/blahtexml/source/BlahtexCore/InputSymbolTranslation.inc
@@ -0,0 +1,344 @@
+ make_pair(L'\U000000AC', L"\\lnot"),
+ make_pair(L'\U000000B1', L"\\pm"),
+ make_pair(L'\U000000D7', L"\\times"),
+ make_pair(L'\U000000F7', L"\\div"),
+ make_pair(L'\U00000393', L"\\Gamma"),
+ make_pair(L'\U00000394', L"\\Delta"),
+ make_pair(L'\U00000398', L"\\Theta"),
+ make_pair(L'\U0000039B', L"\\Lambda"),
+ make_pair(L'\U0000039E', L"\\Xi"),
+ make_pair(L'\U000003A0', L"\\Pi"),
+ make_pair(L'\U000003A3', L"\\Sigma"),
+ make_pair(L'\U000003A5', L"\\Upsilon"),
+ make_pair(L'\U000003A6', L"\\Phi"),
+ make_pair(L'\U000003A8', L"\\Psi"),
+ make_pair(L'\U000003A9', L"\\Omega"),
+ make_pair(L'\U000003B1', L"\\alpha"),
+ make_pair(L'\U000003B2', L"\\beta"),
+ make_pair(L'\U000003B3', L"\\gamma"),
+ make_pair(L'\U000003B4', L"\\delta"),
+ make_pair(L'\U000003B5', L"\\varepsilon"),
+ make_pair(L'\U000003B6', L"\\zeta"),
+ make_pair(L'\U000003B7', L"\\eta"),
+ make_pair(L'\U000003B8', L"\\theta"),
+ make_pair(L'\U000003B9', L"\\iota"),
+ make_pair(L'\U000003BA', L"\\kappa"),
+ make_pair(L'\U000003BB', L"\\lambda"),
+ make_pair(L'\U000003BC', L"\\mu"),
+ make_pair(L'\U000003BD', L"\\nu"),
+ make_pair(L'\U000003BE', L"\\xi"),
+ make_pair(L'\U000003C0', L"\\pi"),
+ make_pair(L'\U000003C1', L"\\rho"),
+ make_pair(L'\U000003C2', L"\\varsigma"),
+ make_pair(L'\U000003C3', L"\\sigma"),
+ make_pair(L'\U000003C4', L"\\tau"),
+ make_pair(L'\U000003C5', L"\\upsilon"),
+ make_pair(L'\U000003C6', L"\\varphi"),
+ make_pair(L'\U000003C7', L"\\chi"),
+ make_pair(L'\U000003C8', L"\\psi"),
+ make_pair(L'\U000003C9', L"\\omega"),
+ make_pair(L'\U000003D1', L"\\vartheta"),
+ make_pair(L'\U000003D5', L"\\phi"),
+ make_pair(L'\U000003D6', L"\\varpi"),
+ make_pair(L'\U000003DD', L"\\digamma"),
+ make_pair(L'\U000003F0', L"\\varkappa"),
+ make_pair(L'\U000003F1', L"\\varrho"),
+ make_pair(L'\U000003F5', L"\\epsilon"),
+ make_pair(L'\U000003F6', L"\\backepsilon"),
+ make_pair(L'\U00002020', L"\\dagger"),
+ make_pair(L'\U00002021', L"\\ddagger"),
+ make_pair(L'\U00002022', L"\\bullet"),
+ make_pair(L'\U00002026', L"\\dots"),
+ make_pair(L'\U00002032', L"\\prime"),
+ make_pair(L'\U00002035', L"\\backprime"),
+ make_pair(L'\U00002190', L"\\leftarrow"),
+ make_pair(L'\U00002191', L"\\uparrow"),
+ make_pair(L'\U00002192', L"\\rightarrow"),
+ make_pair(L'\U00002193', L"\\downarrow"),
+ make_pair(L'\U00002194', L"\\leftrightarrow"),
+ make_pair(L'\U00002195', L"\\updownarrow"),
+ make_pair(L'\U00002196', L"\\nwarrow"),
+ make_pair(L'\U00002197', L"\\nearrow"),
+ make_pair(L'\U00002198', L"\\searrow"),
+ make_pair(L'\U00002199', L"\\swarrow"),
+ make_pair(L'\U0000219A', L"\\nleftarrow"),
+ make_pair(L'\U0000219B', L"\\nrightarrow"),
+ make_pair(L'\U0000219D', L"\\rightsquigarrow"),
+ make_pair(L'\U0000219E', L"\\twoheadleftarrow"),
+ make_pair(L'\U000021A0', L"\\twoheadrightarrow"),
+ make_pair(L'\U000021A2', L"\\leftarrowtail"),
+ make_pair(L'\U000021A3', L"\\rightarrowtail"),
+ make_pair(L'\U000021A6', L"\\mapsto"),
+ make_pair(L'\U000021A9', L"\\hookleftarrow"),
+ make_pair(L'\U000021AA', L"\\hookrightarrow"),
+ make_pair(L'\U000021AB', L"\\looparrowleft"),
+ make_pair(L'\U000021AC', L"\\looparrowright"),
+ make_pair(L'\U000021AD', L"\\leftrightsquigarrow"),
+ make_pair(L'\U000021AE', L"\\nleftrightarrow"),
+ make_pair(L'\U000021B0', L"\\Lsh"),
+ make_pair(L'\U000021B1', L"\\Rsh"),
+ make_pair(L'\U000021B6', L"\\curvearrowleft"),
+ make_pair(L'\U000021B7', L"\\curvearrowright"),
+ make_pair(L'\U000021BA', L"\\circlearrowleft"),
+ make_pair(L'\U000021BB', L"\\circlearrowright"),
+ make_pair(L'\U000021BC', L"\\leftharpoonup"),
+ make_pair(L'\U000021BD', L"\\leftharpoondown"),
+ make_pair(L'\U000021BE', L"\\upharpoonright"),
+ make_pair(L'\U000021BF', L"\\upharpoonleft"),
+ make_pair(L'\U000021C0', L"\\rightharpoonup"),
+ make_pair(L'\U000021C1', L"\\rightharpoondown"),
+ make_pair(L'\U000021C2', L"\\downharpoonright"),
+ make_pair(L'\U000021C3', L"\\downharpoonleft"),
+ make_pair(L'\U000021C4', L"\\rightleftarrows"),
+ make_pair(L'\U000021C6', L"\\leftrightarrows"),
+ make_pair(L'\U000021C7', L"\\leftleftarrows"),
+ make_pair(L'\U000021C8', L"\\upuparrows"),
+ make_pair(L'\U000021C9', L"\\rightrightarrows"),
+ make_pair(L'\U000021CA', L"\\downdownarrows"),
+ make_pair(L'\U000021CB', L"\\leftrightharpoons"),
+ make_pair(L'\U000021CC', L"\\rightleftharpoons"),
+ make_pair(L'\U000021CD', L"\\nLeftarrow"),
+ make_pair(L'\U000021CE', L"\\nLeftrightarrow"),
+ make_pair(L'\U000021CF', L"\\nRightarrow"),
+ make_pair(L'\U000021D0', L"\\Leftarrow"),
+ make_pair(L'\U000021D1', L"\\Uparrow"),
+ make_pair(L'\U000021D2', L"\\Rightarrow"),
+ make_pair(L'\U000021D3', L"\\Downarrow"),
+ make_pair(L'\U000021D4', L"\\Leftrightarrow"),
+ make_pair(L'\U000021D5', L"\\Updownarrow"),
+ make_pair(L'\U000021DA', L"\\Lleftarrow"),
+ make_pair(L'\U000021DB', L"\\Rrightarrow"),
+ make_pair(L'\U000021DD', L"\\leadsto"),
+ make_pair(L'\U00002200', L"\\forall"),
+ make_pair(L'\U00002201', L"\\complement"),
+ make_pair(L'\U00002203', L"\\exists"),
+ make_pair(L'\U00002204', L"\\nexists"),
+ make_pair(L'\U00002207', L"\\nabla"),
+ make_pair(L'\U00002208', L"\\in"),
+ make_pair(L'\U00002209', L"\\notin"),
+ make_pair(L'\U0000220B', L"\\ni"),
+ make_pair(L'\U0000220F', L"\\prod"),
+ make_pair(L'\U00002210', L"\\coprod"),
+ make_pair(L'\U00002211', L"\\sum"),
+ make_pair(L'\U00002213', L"\\mp"),
+ make_pair(L'\U00002214', L"\\dotplus"),
+ make_pair(L'\U00002218', L"\\circ"),
+ make_pair(L'\U0000221A', L"\\surd"),
+ make_pair(L'\U0000221D', L"\\propto"),
+ make_pair(L'\U00002220', L"\\angle"),
+ make_pair(L'\U00002221', L"\\measuredangle"),
+ make_pair(L'\U00002222', L"\\sphericalangle"),
+ make_pair(L'\U00002224', L"\\nmid"),
+ make_pair(L'\U00002225', L"\\parallel"),
+ make_pair(L'\U00002226', L"\\nparallel"),
+ make_pair(L'\U00002227', L"\\wedge"),
+ make_pair(L'\U00002228', L"\\vee"),
+ make_pair(L'\U00002229', L"\\cap"),
+ make_pair(L'\U0000222A', L"\\cup"),
+ make_pair(L'\U0000222B', L"\\int"),
+ make_pair(L'\U0000222C', L"\\iint"),
+ make_pair(L'\U0000222D', L"\\iiint"),
+ make_pair(L'\U0000222E', L"\\oint"),
+ make_pair(L'\U00002234', L"\\therefore"),
+ make_pair(L'\U00002235', L"\\because"),
+ make_pair(L'\U0000223C', L"\\sim"),
+ make_pair(L'\U0000223D', L"\\backsim"),
+ make_pair(L'\U00002240', L"\\wr"),
+ make_pair(L'\U00002241', L"\\nsim"),
+ make_pair(L'\U00002242', L"\\eqsim"),
+ make_pair(L'\U00002243', L"\\simeq"),
+ make_pair(L'\U00002245', L"\\cong"),
+ make_pair(L'\U00002247', L"\\ncong"),
+ make_pair(L'\U00002248', L"\\approx"),
+ make_pair(L'\U0000224A', L"\\approxeq"),
+ make_pair(L'\U0000224E', L"\\Bumpeq"),
+ make_pair(L'\U0000224F', L"\\bumpeq"),
+ make_pair(L'\U00002250', L"\\doteq"),
+ make_pair(L'\U00002251', L"\\doteqdot"),
+ make_pair(L'\U00002252', L"\\fallingdotseq"),
+ make_pair(L'\U00002253', L"\\risingdotseq"),
+ make_pair(L'\U00002256', L"\\eqcirc"),
+ make_pair(L'\U00002257', L"\\circeq"),
+ make_pair(L'\U0000225C', L"\\triangleq"),
+ make_pair(L'\U00002260', L"\\neq"),
+ make_pair(L'\U00002261', L"\\equiv"),
+ make_pair(L'\U00002264', L"\\leq"),
+ make_pair(L'\U00002265', L"\\geq"),
+ make_pair(L'\U00002266', L"\\leqq"),
+ make_pair(L'\U00002267', L"\\geqq"),
+ make_pair(L'\U00002268', L"\\lneqq"),
+ make_pair(L'\U00002269', L"\\gneqq"),
+ make_pair(L'\U0000226A', L"\\ll"),
+ make_pair(L'\U0000226B', L"\\gg"),
+ make_pair(L'\U0000226C', L"\\between"),
+ make_pair(L'\U0000226E', L"\\nless"),
+ make_pair(L'\U0000226F', L"\\ngtr"),
+ make_pair(L'\U00002270', L"\\nleq"),
+ make_pair(L'\U00002271', L"\\ngeq"),
+ make_pair(L'\U00002272', L"\\lesssim"),
+ make_pair(L'\U00002273', L"\\gtrsim"),
+ make_pair(L'\U00002276', L"\\lessgtr"),
+ make_pair(L'\U00002277', L"\\gtrless"),
+ make_pair(L'\U0000227A', L"\\prec"),
+ make_pair(L'\U0000227B', L"\\succ"),
+ make_pair(L'\U0000227C', L"\\preccurlyeq"),
+ make_pair(L'\U0000227D', L"\\succcurlyeq"),
+ make_pair(L'\U0000227E', L"\\precsim"),
+ make_pair(L'\U0000227F', L"\\succsim"),
+ make_pair(L'\U00002280', L"\\nprec"),
+ make_pair(L'\U00002281', L"\\nsucc"),
+ make_pair(L'\U00002282', L"\\subset"),
+ make_pair(L'\U00002283', L"\\supset"),
+ make_pair(L'\U00002286', L"\\subseteq"),
+ make_pair(L'\U00002287', L"\\supseteq"),
+ make_pair(L'\U00002288', L"\\nsubseteq"),
+ make_pair(L'\U00002289', L"\\nsupseteq"),
+ make_pair(L'\U0000228A', L"\\subsetneq"),
+ make_pair(L'\U0000228B', L"\\supsetneq"),
+ make_pair(L'\U0000228E', L"\\uplus"),
+ make_pair(L'\U0000228F', L"\\sqsubset"),
+ make_pair(L'\U00002290', L"\\sqsupset"),
+ make_pair(L'\U00002291', L"\\sqsubseteq"),
+ make_pair(L'\U00002292', L"\\sqsupseteq"),
+ make_pair(L'\U00002293', L"\\sqcap"),
+ make_pair(L'\U00002294', L"\\sqcup"),
+ make_pair(L'\U00002295', L"\\oplus"),
+ make_pair(L'\U00002296', L"\\ominus"),
+ make_pair(L'\U00002297', L"\\otimes"),
+ make_pair(L'\U00002298', L"\\oslash"),
+ make_pair(L'\U00002299', L"\\odot"),
+ make_pair(L'\U0000229A', L"\\circledcirc"),
+ make_pair(L'\U0000229B', L"\\circledast"),
+ make_pair(L'\U0000229D', L"\\circleddash"),
+ make_pair(L'\U0000229E', L"\\boxplus"),
+ make_pair(L'\U0000229F', L"\\boxminus"),
+ make_pair(L'\U000022A0', L"\\boxtimes"),
+ make_pair(L'\U000022A1', L"\\boxdot"),
+ make_pair(L'\U000022A2', L"\\vdash"),
+ make_pair(L'\U000022A3', L"\\dashv"),
+ make_pair(L'\U000022A4', L"\\top"),
+ make_pair(L'\U000022A5', L"\\bot"),
+ make_pair(L'\U000022A7', L"\\models"),
+ make_pair(L'\U000022A8', L"\\vDash"),
+ make_pair(L'\U000022A9', L"\\Vdash"),
+ make_pair(L'\U000022AA', L"\\Vvdash"),
+ make_pair(L'\U000022AC', L"\\nvdash"),
+ make_pair(L'\U000022AD', L"\\nvDash"),
+ make_pair(L'\U000022AE', L"\\nVdash"),
+ make_pair(L'\U000022AF', L"\\nVDash"),
+ make_pair(L'\U000022B2', L"\\lhd"),
+ make_pair(L'\U000022B3', L"\\rhd"),
+ make_pair(L'\U000022B4', L"\\unlhd"),
+ make_pair(L'\U000022B5', L"\\unrhd"),
+ make_pair(L'\U000022B8', L"\\multimap"),
+ make_pair(L'\U000022BA', L"\\intercal"),
+ make_pair(L'\U000022BB', L"\\veebar"),
+ make_pair(L'\U000022C0', L"\\bigwedge"),
+ make_pair(L'\U000022C1', L"\\bigvee"),
+ make_pair(L'\U000022C2', L"\\bigcap"),
+ make_pair(L'\U000022C3', L"\\bigcup"),
+ make_pair(L'\U000022C4', L"\\diamond"),
+ make_pair(L'\U000022C5', L"\\cdot"),
+ make_pair(L'\U000022C6', L"\\star"),
+ make_pair(L'\U000022C7', L"\\divideontimes"),
+ make_pair(L'\U000022C8', L"\\bowtie"),
+ make_pair(L'\U000022C9', L"\\ltimes"),
+ make_pair(L'\U000022CA', L"\\rtimes"),
+ make_pair(L'\U000022CB', L"\\leftthreetimes"),
+ make_pair(L'\U000022CC', L"\\rightthreetimes"),
+ make_pair(L'\U000022CD', L"\\backsimeq"),
+ make_pair(L'\U000022CE', L"\\curlyvee"),
+ make_pair(L'\U000022CF', L"\\curlywedge"),
+ make_pair(L'\U000022D0', L"\\Subset"),
+ make_pair(L'\U000022D1', L"\\Supset"),
+ make_pair(L'\U000022D2', L"\\Cap"),
+ make_pair(L'\U000022D3', L"\\Cup"),
+ make_pair(L'\U000022D4', L"\\pitchfork"),
+ make_pair(L'\U000022D6', L"\\lessdot"),
+ make_pair(L'\U000022D7', L"\\gtrdot"),
+ make_pair(L'\U000022D8', L"\\lll"),
+ make_pair(L'\U000022D9', L"\\ggg"),
+ make_pair(L'\U000022DA', L"\\lesseqgtr"),
+ make_pair(L'\U000022DB', L"\\gtreqless"),
+ make_pair(L'\U000022DE', L"\\curlyeqprec"),
+ make_pair(L'\U000022DF', L"\\curlyeqsucc"),
+ make_pair(L'\U000022E6', L"\\lnsim"),
+ make_pair(L'\U000022E7', L"\\gnsim"),
+ make_pair(L'\U000022E8', L"\\precnsim"),
+ make_pair(L'\U000022E9', L"\\succnsim"),
+ make_pair(L'\U000022EA', L"\\ntriangleleft"),
+ make_pair(L'\U000022EB', L"\\ntriangleright"),
+ make_pair(L'\U000022EC', L"\\ntrianglelefteq"),
+ make_pair(L'\U000022ED', L"\\ntrianglerighteq"),
+ make_pair(L'\U000022EE', L"\\vdots"),
+ make_pair(L'\U000022EF', L"\\cdots"),
+ make_pair(L'\U000022F1', L"\\ddots"),
+ make_pair(L'\U00002305', L"\\barwedge"),
+ make_pair(L'\U00002306', L"\\doublebarwedge"),
+ make_pair(L'\U00002308', L"\\lceil"),
+ make_pair(L'\U00002309', L"\\rceil"),
+ make_pair(L'\U0000230A', L"\\lfloor"),
+ make_pair(L'\U0000230B', L"\\rfloor"),
+ make_pair(L'\U0000231C', L"\\ulcorner"),
+ make_pair(L'\U0000231D', L"\\urcorner"),
+ make_pair(L'\U0000231E', L"\\llcorner"),
+ make_pair(L'\U0000231F', L"\\lrcorner"),
+ make_pair(L'\U00002322', L"\\frown"),
+ make_pair(L'\U00002323', L"\\smile"),
+ make_pair(L'\U00002329', L"\\langle"),
+ make_pair(L'\U0000232A', L"\\rangle"),
+ make_pair(L'\U000025A1', L"\\square"),
+ make_pair(L'\U000025B3', L"\\triangle"),
+ make_pair(L'\U000025B4', L"\\blacktriangle"),
+ make_pair(L'\U000025B5', L"\\vartriangle"),
+ make_pair(L'\U000025B6', L"\\blacktriangleright"),
+ make_pair(L'\U000025B9', L"\\triangleright"),
+ make_pair(L'\U000025BD', L"\\bigtriangledown"),
+ make_pair(L'\U000025BE', L"\\blacktriangledown"),
+ make_pair(L'\U000025BF', L"\\triangledown"),
+ make_pair(L'\U000025C0', L"\\blacktriangleleft"),
+ make_pair(L'\U000025C3', L"\\triangleleft"),
+ make_pair(L'\U000025CA', L"\\lozenge"),
+ make_pair(L'\U000025EF', L"\\bigcirc"),
+ make_pair(L'\U000025FC', L"\\blacksquare"),
+ make_pair(L'\U00002605', L"\\bigstar"),
+ make_pair(L'\U00002660', L"\\spadesuit"),
+ make_pair(L'\U00002663', L"\\clubsuit"),
+ make_pair(L'\U00002665', L"\\heartsuit"),
+ make_pair(L'\U00002666', L"\\diamondsuit"),
+ make_pair(L'\U0000266D', L"\\flat"),
+ make_pair(L'\U0000266E', L"\\natural"),
+ make_pair(L'\U0000266F', L"\\sharp"),
+ make_pair(L'\U00002713', L"\\checkmark"),
+ make_pair(L'\U0000290E', L"\\dashleftarrow"),
+ make_pair(L'\U0000290F', L"\\dashrightarrow"),
+ make_pair(L'\U000029EB', L"\\blacklozenge"),
+ make_pair(L'\U00002A00', L"\\bigodot"),
+ make_pair(L'\U00002A01', L"\\bigoplus"),
+ make_pair(L'\U00002A02', L"\\bigotimes"),
+ make_pair(L'\U00002A04', L"\\biguplus"),
+ make_pair(L'\U00002A06', L"\\bigsqcup"),
+ make_pair(L'\U00002A0C', L"\\iiiint"),
+ make_pair(L'\U00002A3F', L"\\amalg"),
+ make_pair(L'\U00002A7D', L"\\leqslant"),
+ make_pair(L'\U00002A7E', L"\\geqslant"),
+ make_pair(L'\U00002A85', L"\\lessapprox"),
+ make_pair(L'\U00002A86', L"\\gtrapprox"),
+ make_pair(L'\U00002A89', L"\\lnapprox"),
+ make_pair(L'\U00002A8A', L"\\gnapprox"),
+ make_pair(L'\U00002A8B', L"\\lesseqqgtr"),
+ make_pair(L'\U00002A8C', L"\\gtreqqless"),
+ make_pair(L'\U00002A95', L"\\eqslantless"),
+ make_pair(L'\U00002A96', L"\\eqslantgtr"),
+ make_pair(L'\U00002AAF', L"\\preceq"),
+ make_pair(L'\U00002AB0', L"\\succeq"),
+ make_pair(L'\U00002AB5', L"\\precneqq"),
+ make_pair(L'\U00002AB6', L"\\succneqq"),
+ make_pair(L'\U00002AB7', L"\\precapprox"),
+ make_pair(L'\U00002AB8', L"\\succapprox"),
+ make_pair(L'\U00002AB9', L"\\precnapprox"),
+ make_pair(L'\U00002ABA', L"\\succnapprox"),
+ make_pair(L'\U00002AC5', L"\\subseteqq"),
+ make_pair(L'\U00002AC6', L"\\supseteqq"),
+ make_pair(L'\U00002ACB', L"\\subsetneqq"),
+ make_pair(L'\U00002ACC', L"\\supsetneqq")
diff --git a/blahtexml/source/BlahtexCore/InputSymbolTranslation.xml b/blahtexml/source/BlahtexCore/InputSymbolTranslation.xml
new file mode 100644
index 0000000..6ee7ebf
--- /dev/null
+++ b/blahtexml/source/BlahtexCore/InputSymbolTranslation.xml
@@ -0,0 +1,367 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+// File "InputSymbolTranslation.xml"
+//
+// blahtexml (version 0.5)
+// Copyright (C) 2008, Gilles Van Assche
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+-->
+<symbols>
+ <symbol unicode="000000AC" tex="\lnot"/>
+ <symbol unicode="000000B1" tex="\pm"/>
+ <symbol unicode="000000D7" tex="\times"/>
+ <symbol unicode="000000F7" tex="\div"/>
+ <symbol unicode="00000393" tex="\Gamma"/>
+ <symbol unicode="00000394" tex="\Delta"/>
+ <symbol unicode="00000398" tex="\Theta"/>
+ <symbol unicode="0000039B" tex="\Lambda"/>
+ <symbol unicode="0000039E" tex="\Xi"/>
+ <symbol unicode="000003A0" tex="\Pi"/>
+ <symbol unicode="000003A3" tex="\Sigma"/>
+ <symbol unicode="000003A5" tex="\Upsilon"/>
+ <symbol unicode="000003A6" tex="\Phi"/>
+ <symbol unicode="000003A8" tex="\Psi"/>
+ <symbol unicode="000003A9" tex="\Omega"/>
+ <symbol unicode="000003B1" tex="\alpha"/>
+ <symbol unicode="000003B2" tex="\beta"/>
+ <symbol unicode="000003B3" tex="\gamma"/>
+ <symbol unicode="000003B4" tex="\delta"/>
+ <symbol unicode="000003B5" tex="\varepsilon"/>
+ <symbol unicode="000003B6" tex="\zeta"/>
+ <symbol unicode="000003B7" tex="\eta"/>
+ <symbol unicode="000003B8" tex="\theta"/>
+ <symbol unicode="000003B9" tex="\iota"/>
+ <symbol unicode="000003BA" tex="\kappa"/>
+ <symbol unicode="000003BB" tex="\lambda"/>
+ <symbol unicode="000003BC" tex="\mu"/>
+ <symbol unicode="000003BD" tex="\nu"/>
+ <symbol unicode="000003BE" tex="\xi"/>
+ <symbol unicode="000003C0" tex="\pi"/>
+ <symbol unicode="000003C1" tex="\rho"/>
+ <symbol unicode="000003C2" tex="\varsigma"/>
+ <symbol unicode="000003C3" tex="\sigma"/>
+ <symbol unicode="000003C4" tex="\tau"/>
+ <symbol unicode="000003C5" tex="\upsilon"/>
+ <symbol unicode="000003C6" tex="\varphi"/>
+ <symbol unicode="000003C7" tex="\chi"/>
+ <symbol unicode="000003C8" tex="\psi"/>
+ <symbol unicode="000003C9" tex="\omega"/>
+ <symbol unicode="000003D1" tex="\vartheta"/>
+ <symbol unicode="000003D5" tex="\phi"/>
+ <symbol unicode="000003D6" tex="\varpi"/>
+ <symbol unicode="000003DD" tex="\digamma"/>
+ <symbol unicode="000003F0" tex="\varkappa"/>
+ <symbol unicode="000003F1" tex="\varrho"/>
+ <symbol unicode="000003F5" tex="\epsilon"/>
+ <symbol unicode="000003F6" tex="\backepsilon"/>
+ <symbol unicode="00002020" tex="\dagger"/>
+ <symbol unicode="00002021" tex="\ddagger"/>
+ <symbol unicode="00002022" tex="\bullet"/>
+ <symbol unicode="00002026" tex="\dots"/>
+ <symbol unicode="00002032" tex="\prime"/>
+ <symbol unicode="00002035" tex="\backprime"/>
+ <symbol unicode="00002190" tex="\leftarrow"/>
+ <symbol unicode="00002191" tex="\uparrow"/>
+ <symbol unicode="00002192" tex="\rightarrow"/>
+ <symbol unicode="00002193" tex="\downarrow"/>
+ <symbol unicode="00002194" tex="\leftrightarrow"/>
+ <symbol unicode="00002195" tex="\updownarrow"/>
+ <symbol unicode="00002196" tex="\nwarrow"/>
+ <symbol unicode="00002197" tex="\nearrow"/>
+ <symbol unicode="00002198" tex="\searrow"/>
+ <symbol unicode="00002199" tex="\swarrow"/>
+ <symbol unicode="0000219A" tex="\nleftarrow"/>
+ <symbol unicode="0000219B" tex="\nrightarrow"/>
+ <symbol unicode="0000219D" tex="\rightsquigarrow"/>
+ <symbol unicode="0000219E" tex="\twoheadleftarrow"/>
+ <symbol unicode="000021A0" tex="\twoheadrightarrow"/>
+ <symbol unicode="000021A2" tex="\leftarrowtail"/>
+ <symbol unicode="000021A3" tex="\rightarrowtail"/>
+ <symbol unicode="000021A6" tex="\mapsto"/>
+ <symbol unicode="000021A9" tex="\hookleftarrow"/>
+ <symbol unicode="000021AA" tex="\hookrightarrow"/>
+ <symbol unicode="000021AB" tex="\looparrowleft"/>
+ <symbol unicode="000021AC" tex="\looparrowright"/>
+ <symbol unicode="000021AD" tex="\leftrightsquigarrow"/>
+ <symbol unicode="000021AE" tex="\nleftrightarrow"/>
+ <symbol unicode="000021B0" tex="\Lsh"/>
+ <symbol unicode="000021B1" tex="\Rsh"/>
+ <symbol unicode="000021B6" tex="\curvearrowleft"/>
+ <symbol unicode="000021B7" tex="\curvearrowright"/>
+ <symbol unicode="000021BA" tex="\circlearrowleft"/>
+ <symbol unicode="000021BB" tex="\circlearrowright"/>
+ <symbol unicode="000021BC" tex="\leftharpoonup"/>
+ <symbol unicode="000021BD" tex="\leftharpoondown"/>
+ <symbol unicode="000021BE" tex="\upharpoonright"/>
+ <symbol unicode="000021BF" tex="\upharpoonleft"/>
+ <symbol unicode="000021C0" tex="\rightharpoonup"/>
+ <symbol unicode="000021C1" tex="\rightharpoondown"/>
+ <symbol unicode="000021C2" tex="\downharpoonright"/>
+ <symbol unicode="000021C3" tex="\downharpoonleft"/>
+ <symbol unicode="000021C4" tex="\rightleftarrows"/>
+ <symbol unicode="000021C6" tex="\leftrightarrows"/>
+ <symbol unicode="000021C7" tex="\leftleftarrows"/>
+ <symbol unicode="000021C8" tex="\upuparrows"/>
+ <symbol unicode="000021C9" tex="\rightrightarrows"/>
+ <symbol unicode="000021CA" tex="\downdownarrows"/>
+ <symbol unicode="000021CB" tex="\leftrightharpoons"/>
+ <symbol unicode="000021CC" tex="\rightleftharpoons"/>
+ <symbol unicode="000021CD" tex="\nLeftarrow"/>
+ <symbol unicode="000021CE" tex="\nLeftrightarrow"/>
+ <symbol unicode="000021CF" tex="\nRightarrow"/>
+ <symbol unicode="000021D0" tex="\Leftarrow"/>
+ <symbol unicode="000021D1" tex="\Uparrow"/>
+ <symbol unicode="000021D2" tex="\Rightarrow"/>
+ <symbol unicode="000021D3" tex="\Downarrow"/>
+ <symbol unicode="000021D4" tex="\Leftrightarrow"/>
+ <symbol unicode="000021D5" tex="\Updownarrow"/>
+ <symbol unicode="000021DA" tex="\Lleftarrow"/>
+ <symbol unicode="000021DB" tex="\Rrightarrow"/>
+ <symbol unicode="000021DD" tex="\leadsto"/>
+ <symbol unicode="00002200" tex="\forall"/>
+ <symbol unicode="00002201" tex="\complement"/>
+ <symbol unicode="00002203" tex="\exists"/>
+ <symbol unicode="00002204" tex="\nexists"/>
+ <symbol unicode="00002207" tex="\nabla"/>
+ <symbol unicode="00002208" tex="\in"/>
+ <symbol unicode="00002209" tex="\notin"/>
+ <symbol unicode="0000220B" tex="\ni"/>
+ <symbol unicode="0000220F" tex="\prod"/>
+ <symbol unicode="00002210" tex="\coprod"/>
+ <symbol unicode="00002211" tex="\sum"/>
+ <symbol unicode="00002213" tex="\mp"/>
+ <symbol unicode="00002214" tex="\dotplus"/>
+ <symbol unicode="00002218" tex="\circ"/>
+ <symbol unicode="0000221A" tex="\surd"/>
+ <symbol unicode="0000221D" tex="\propto"/>
+ <symbol unicode="00002220" tex="\angle"/>
+ <symbol unicode="00002221" tex="\measuredangle"/>
+ <symbol unicode="00002222" tex="\sphericalangle"/>
+ <symbol unicode="00002224" tex="\nmid"/>
+ <symbol unicode="00002225" tex="\parallel"/>
+ <symbol unicode="00002226" tex="\nparallel"/>
+ <symbol unicode="00002227" tex="\wedge"/>
+ <symbol unicode="00002228" tex="\vee"/>
+ <symbol unicode="00002229" tex="\cap"/>
+ <symbol unicode="0000222A" tex="\cup"/>
+ <symbol unicode="0000222B" tex="\int"/>
+ <symbol unicode="0000222C" tex="\iint"/>
+ <symbol unicode="0000222D" tex="\iiint"/>
+ <symbol unicode="0000222E" tex="\oint"/>
+ <symbol unicode="00002234" tex="\therefore"/>
+ <symbol unicode="00002235" tex="\because"/>
+ <symbol unicode="0000223C" tex="\sim"/>
+ <symbol unicode="0000223D" tex="\backsim"/>
+ <symbol unicode="00002240" tex="\wr"/>
+ <symbol unicode="00002241" tex="\nsim"/>
+ <symbol unicode="00002242" tex="\eqsim"/>
+ <symbol unicode="00002243" tex="\simeq"/>
+ <symbol unicode="00002245" tex="\cong"/>
+ <symbol unicode="00002247" tex="\ncong"/>
+ <symbol unicode="00002248" tex="\approx"/>
+ <symbol unicode="0000224A" tex="\approxeq"/>
+ <symbol unicode="0000224E" tex="\Bumpeq"/>
+ <symbol unicode="0000224F" tex="\bumpeq"/>
+ <symbol unicode="00002250" tex="\doteq"/>
+ <symbol unicode="00002251" tex="\doteqdot"/>
+ <symbol unicode="00002252" tex="\fallingdotseq"/>
+ <symbol unicode="00002253" tex="\risingdotseq"/>
+ <symbol unicode="00002256" tex="\eqcirc"/>
+ <symbol unicode="00002257" tex="\circeq"/>
+ <symbol unicode="0000225C" tex="\triangleq"/>
+ <symbol unicode="00002260" tex="\neq"/>
+ <symbol unicode="00002261" tex="\equiv"/>
+ <symbol unicode="00002264" tex="\leq"/>
+ <symbol unicode="00002265" tex="\geq"/>
+ <symbol unicode="00002266" tex="\leqq"/>
+ <symbol unicode="00002267" tex="\geqq"/>
+ <symbol unicode="00002268" tex="\lneqq"/>
+ <symbol unicode="00002269" tex="\gneqq"/>
+ <symbol unicode="0000226A" tex="\ll"/>
+ <symbol unicode="0000226B" tex="\gg"/>
+ <symbol unicode="0000226C" tex="\between"/>
+ <symbol unicode="0000226E" tex="\nless"/>
+ <symbol unicode="0000226F" tex="\ngtr"/>
+ <symbol unicode="00002270" tex="\nleq"/>
+ <symbol unicode="00002271" tex="\ngeq"/>
+ <symbol unicode="00002272" tex="\lesssim"/>
+ <symbol unicode="00002273" tex="\gtrsim"/>
+ <symbol unicode="00002276" tex="\lessgtr"/>
+ <symbol unicode="00002277" tex="\gtrless"/>
+ <symbol unicode="0000227A" tex="\prec"/>
+ <symbol unicode="0000227B" tex="\succ"/>
+ <symbol unicode="0000227C" tex="\preccurlyeq"/>
+ <symbol unicode="0000227D" tex="\succcurlyeq"/>
+ <symbol unicode="0000227E" tex="\precsim"/>
+ <symbol unicode="0000227F" tex="\succsim"/>
+ <symbol unicode="00002280" tex="\nprec"/>
+ <symbol unicode="00002281" tex="\nsucc"/>
+ <symbol unicode="00002282" tex="\subset"/>
+ <symbol unicode="00002283" tex="\supset"/>
+ <symbol unicode="00002286" tex="\subseteq"/>
+ <symbol unicode="00002287" tex="\supseteq"/>
+ <symbol unicode="00002288" tex="\nsubseteq"/>
+ <symbol unicode="00002289" tex="\nsupseteq"/>
+ <symbol unicode="0000228A" tex="\subsetneq"/>
+ <symbol unicode="0000228B" tex="\supsetneq"/>
+ <symbol unicode="0000228E" tex="\uplus"/>
+ <symbol unicode="0000228F" tex="\sqsubset"/>
+ <symbol unicode="00002290" tex="\sqsupset"/>
+ <symbol unicode="00002291" tex="\sqsubseteq"/>
+ <symbol unicode="00002292" tex="\sqsupseteq"/>
+ <symbol unicode="00002293" tex="\sqcap"/>
+ <symbol unicode="00002294" tex="\sqcup"/>
+ <symbol unicode="00002295" tex="\oplus"/>
+ <symbol unicode="00002296" tex="\ominus"/>
+ <symbol unicode="00002297" tex="\otimes"/>
+ <symbol unicode="00002298" tex="\oslash"/>
+ <symbol unicode="00002299" tex="\odot"/>
+ <symbol unicode="0000229A" tex="\circledcirc"/>
+ <symbol unicode="0000229B" tex="\circledast"/>
+ <symbol unicode="0000229D" tex="\circleddash"/>
+ <symbol unicode="0000229E" tex="\boxplus"/>
+ <symbol unicode="0000229F" tex="\boxminus"/>
+ <symbol unicode="000022A0" tex="\boxtimes"/>
+ <symbol unicode="000022A1" tex="\boxdot"/>
+ <symbol unicode="000022A2" tex="\vdash"/>
+ <symbol unicode="000022A3" tex="\dashv"/>
+ <symbol unicode="000022A4" tex="\top"/>
+ <symbol unicode="000022A5" tex="\bot"/>
+ <symbol unicode="000022A7" tex="\models"/>
+ <symbol unicode="000022A8" tex="\vDash"/>
+ <symbol unicode="000022A9" tex="\Vdash"/>
+ <symbol unicode="000022AA" tex="\Vvdash"/>
+ <symbol unicode="000022AC" tex="\nvdash"/>
+ <symbol unicode="000022AD" tex="\nvDash"/>
+ <symbol unicode="000022AE" tex="\nVdash"/>
+ <symbol unicode="000022AF" tex="\nVDash"/>
+ <symbol unicode="000022B2" tex="\lhd"/>
+ <symbol unicode="000022B3" tex="\rhd"/>
+ <symbol unicode="000022B4" tex="\unlhd"/>
+ <symbol unicode="000022B5" tex="\unrhd"/>
+ <symbol unicode="000022B8" tex="\multimap"/>
+ <symbol unicode="000022BA" tex="\intercal"/>
+ <symbol unicode="000022BB" tex="\veebar"/>
+ <symbol unicode="000022C0" tex="\bigwedge"/>
+ <symbol unicode="000022C1" tex="\bigvee"/>
+ <symbol unicode="000022C2" tex="\bigcap"/>
+ <symbol unicode="000022C3" tex="\bigcup"/>
+ <symbol unicode="000022C4" tex="\diamond"/>
+ <symbol unicode="000022C5" tex="\cdot"/>
+ <symbol unicode="000022C6" tex="\star"/>
+ <symbol unicode="000022C7" tex="\divideontimes"/>
+ <symbol unicode="000022C8" tex="\bowtie"/>
+ <symbol unicode="000022C9" tex="\ltimes"/>
+ <symbol unicode="000022CA" tex="\rtimes"/>
+ <symbol unicode="000022CB" tex="\leftthreetimes"/>
+ <symbol unicode="000022CC" tex="\rightthreetimes"/>
+ <symbol unicode="000022CD" tex="\backsimeq"/>
+ <symbol unicode="000022CE" tex="\curlyvee"/>
+ <symbol unicode="000022CF" tex="\curlywedge"/>
+ <symbol unicode="000022D0" tex="\Subset"/>
+ <symbol unicode="000022D1" tex="\Supset"/>
+ <symbol unicode="000022D2" tex="\Cap"/>
+ <symbol unicode="000022D3" tex="\Cup"/>
+ <symbol unicode="000022D4" tex="\pitchfork"/>
+ <symbol unicode="000022D6" tex="\lessdot"/>
+ <symbol unicode="000022D7" tex="\gtrdot"/>
+ <symbol unicode="000022D8" tex="\lll"/>
+ <symbol unicode="000022D9" tex="\ggg"/>
+ <symbol unicode="000022DA" tex="\lesseqgtr"/>
+ <symbol unicode="000022DB" tex="\gtreqless"/>
+ <symbol unicode="000022DE" tex="\curlyeqprec"/>
+ <symbol unicode="000022DF" tex="\curlyeqsucc"/>
+ <symbol unicode="000022E6" tex="\lnsim"/>
+ <symbol unicode="000022E7" tex="\gnsim"/>
+ <symbol unicode="000022E8" tex="\precnsim"/>
+ <symbol unicode="000022E9" tex="\succnsim"/>
+ <symbol unicode="000022EA" tex="\ntriangleleft"/>
+ <symbol unicode="000022EB" tex="\ntriangleright"/>
+ <symbol unicode="000022EC" tex="\ntrianglelefteq"/>
+ <symbol unicode="000022ED" tex="\ntrianglerighteq"/>
+ <symbol unicode="000022EE" tex="\vdots"/>
+ <symbol unicode="000022EF" tex="\cdots"/>
+ <symbol unicode="000022F1" tex="\ddots"/>
+ <symbol unicode="00002305" tex="\barwedge"/>
+ <symbol unicode="00002306" tex="\doublebarwedge"/>
+ <symbol unicode="00002308" tex="\lceil"/>
+ <symbol unicode="00002309" tex="\rceil"/>
+ <symbol unicode="0000230A" tex="\lfloor"/>
+ <symbol unicode="0000230B" tex="\rfloor"/>
+ <symbol unicode="0000231C" tex="\ulcorner"/>
+ <symbol unicode="0000231D" tex="\urcorner"/>
+ <symbol unicode="0000231E" tex="\llcorner"/>
+ <symbol unicode="0000231F" tex="\lrcorner"/>
+ <symbol unicode="00002322" tex="\frown"/>
+ <symbol unicode="00002323" tex="\smile"/>
+ <symbol unicode="00002329" tex="\langle"/>
+ <symbol unicode="0000232A" tex="\rangle"/>
+ <symbol unicode="000025A1" tex="\square"/>
+ <symbol unicode="000025B3" tex="\triangle"/>
+ <symbol unicode="000025B4" tex="\blacktriangle"/>
+ <symbol unicode="000025B5" tex="\vartriangle"/>
+ <symbol unicode="000025B6" tex="\blacktriangleright"/>
+ <symbol unicode="000025B9" tex="\triangleright"/>
+ <symbol unicode="000025BD" tex="\bigtriangledown"/>
+ <symbol unicode="000025BE" tex="\blacktriangledown"/>
+ <symbol unicode="000025BF" tex="\triangledown"/>
+ <symbol unicode="000025C0" tex="\blacktriangleleft"/>
+ <symbol unicode="000025C3" tex="\triangleleft"/>
+ <symbol unicode="000025CA" tex="\lozenge"/>
+ <symbol unicode="000025EF" tex="\bigcirc"/>
+ <symbol unicode="000025FC" tex="\blacksquare"/>
+ <symbol unicode="00002605" tex="\bigstar"/>
+ <symbol unicode="00002660" tex="\spadesuit"/>
+ <symbol unicode="00002663" tex="\clubsuit"/>
+ <symbol unicode="00002665" tex="\heartsuit"/>
+ <symbol unicode="00002666" tex="\diamondsuit"/>
+ <symbol unicode="0000266D" tex="\flat"/>
+ <symbol unicode="0000266E" tex="\natural"/>
+ <symbol unicode="0000266F" tex="\sharp"/>
+ <symbol unicode="00002713" tex="\checkmark"/>
+ <symbol unicode="0000290E" tex="\dashleftarrow"/>
+ <symbol unicode="0000290F" tex="\dashrightarrow"/>
+ <symbol unicode="000029EB" tex="\blacklozenge"/>
+ <symbol unicode="00002A00" tex="\bigodot"/>
+ <symbol unicode="00002A01" tex="\bigoplus"/>
+ <symbol unicode="00002A02" tex="\bigotimes"/>
+ <symbol unicode="00002A04" tex="\biguplus"/>
+ <symbol unicode="00002A06" tex="\bigsqcup"/>
+ <symbol unicode="00002A0C" tex="\iiiint"/>
+ <symbol unicode="00002A3F" tex="\amalg"/>
+ <symbol unicode="00002A7D" tex="\leqslant"/>
+ <symbol unicode="00002A7E" tex="\geqslant"/>
+ <symbol unicode="00002A85" tex="\lessapprox"/>
+ <symbol unicode="00002A86" tex="\gtrapprox"/>
+ <symbol unicode="00002A89" tex="\lnapprox"/>
+ <symbol unicode="00002A8A" tex="\gnapprox"/>
+ <symbol unicode="00002A8B" tex="\lesseqqgtr"/>
+ <symbol unicode="00002A8C" tex="\gtreqqless"/>
+ <symbol unicode="00002A95" tex="\eqslantless"/>
+ <symbol unicode="00002A96" tex="\eqslantgtr"/>
+ <symbol unicode="00002AAF" tex="\preceq"/>
+ <symbol unicode="00002AB0" tex="\succeq"/>
+ <symbol unicode="00002AB5" tex="\precneqq"/>
+ <symbol unicode="00002AB6" tex="\succneqq"/>
+ <symbol unicode="00002AB7" tex="\precapprox"/>
+ <symbol unicode="00002AB8" tex="\succapprox"/>
+ <symbol unicode="00002AB9" tex="\precnapprox"/>
+ <symbol unicode="00002ABA" tex="\succnapprox"/>
+ <symbol unicode="00002AC5" tex="\subseteqq"/>
+ <symbol unicode="00002AC6" tex="\supseteqq"/>
+ <symbol unicode="00002ACB" tex="\subsetneqq"/>
+ <symbol unicode="00002ACC" tex="\supsetneqq"/>
+</symbols>
diff --git a/blahtexml/source/BlahtexCore/Interface.cpp b/blahtexml/source/BlahtexCore/Interface.cpp
new file mode 100644
index 0000000..e63be77
--- /dev/null
+++ b/blahtexml/source/BlahtexCore/Interface.cpp
@@ -0,0 +1,63 @@
+// File "Interface.cpp"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// blahtexml (version 0.5)
+// Copyright (C) 2007-2008, Gilles Van Assche
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include <sstream>
+#include "Interface.h"
+#include "MathmlNode.h"
+
+using namespace std;
+
+namespace blahtex
+{
+
+void Interface::ProcessInput(const wstring& input)
+{
+ mManager.reset(new Manager);
+ mManager->ProcessInput(input, mTexvcCompatibility);
+}
+
+wstring Interface::GetMathml()
+{
+ wostringstream output;
+ auto_ptr<MathmlNode> root = mManager->GenerateMathml(mMathmlOptions);
+ root->Print(output, mEncodingOptions, mIndented);
+ return output.str();
+}
+
+wstring Interface::GetPurifiedTex()
+{
+ return mManager->GeneratePurifiedTex(mPurifiedTexOptions);
+}
+
+#ifdef BLAHTEXML_USING_XERCES
+void Interface::PrintAsSAX2(ContentHandler& sax, const wstring& prefix, bool ignoreFirstmrow) const
+{
+ wostringstream output;
+ auto_ptr<MathmlNode> root = mManager->GenerateMathml(mMathmlOptions);
+ root->PrintAsSAX2(sax, prefix, ignoreFirstmrow);
+}
+#endif
+
+}
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/BlahtexCore/Interface.h b/blahtexml/source/BlahtexCore/Interface.h
new file mode 100644
index 0000000..d2d3ad4
--- /dev/null
+++ b/blahtexml/source/BlahtexCore/Interface.h
@@ -0,0 +1,90 @@
+// File "Interface.h"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// blahtexml (version 0.5)
+// Copyright (C) 2007-2008, Gilles Van Assche
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#ifndef BLAHTEX_INTERFACE_H
+#define BLAHTEX_INTERFACE_H
+
+#include <string>
+#include <memory>
+#include "Misc.h"
+#include "Manager.h"
+#include "XmlEncode.h"
+
+#ifdef BLAHTEXML_USING_XERCES
+#include <xercesc/sax2/ContentHandler.hpp>
+XERCES_CPP_NAMESPACE_USE
+#endif
+
+namespace blahtex
+{
+
+// If you want to use blahtex in your own code, using an Interface object
+// is probably the easiest way to do it. It's essentially a wrapper for
+// the Manager class, putting all the options and methods in one convenient
+// place.
+//
+// To use it:
+// (1) Declare an Interface object
+// (2) Set the various public members to control options
+// (the data types are explained in Misc.h)
+// (3) Call ProcessInput() on your input
+// (4) Call GetMathml() to get the MathML output
+// (5) Call GetPurifiedTex() to get a complete TeX file that could be sent
+// to latex to generate graphical output
+
+class Interface
+{
+private:
+ std::auto_ptr<Manager> mManager;
+
+public:
+ MathmlOptions mMathmlOptions;
+ EncodingOptions mEncodingOptions;
+ PurifiedTexOptions mPurifiedTexOptions;
+ bool mTexvcCompatibility;
+ bool mIndented;
+
+ Interface() :
+ mTexvcCompatibility(false),
+ mIndented(false)
+ {
+ }
+
+ const Manager* GetManager() const
+ {
+ return mManager.get();
+ }
+
+ void ProcessInput(const std::wstring& input);
+ std::wstring GetMathml();
+ std::wstring GetPurifiedTex();
+#ifdef BLAHTEXML_USING_XERCES
+ void PrintAsSAX2(ContentHandler& sax, const std::wstring& prefix, bool ignoreFirstmrow) const;
+#endif
+};
+
+}
+
+#endif
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/BlahtexCore/LayoutTree.cpp b/blahtexml/source/BlahtexCore/LayoutTree.cpp
new file mode 100644
index 0000000..77a15c0
--- /dev/null
+++ b/blahtexml/source/BlahtexCore/LayoutTree.cpp
@@ -0,0 +1,1677 @@
+// File "LayoutTree.cpp"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include <iomanip>
+#include <sstream>
+#include <stdexcept>
+#include <list>
+#include <set>
+#include <map>
+#include "MathmlNode.h"
+#include "LayoutTree.h"
+
+using namespace std;
+
+namespace blahtex
+{
+
+MathmlEnvironment::MathmlEnvironment(
+ LayoutTree::Node::Style style,
+ RGBColour colour
+)
+{
+ mColour = colour;
+ mDisplayStyle = (style == LayoutTree::Node::cStyleDisplay);
+
+ switch (style)
+ {
+ case LayoutTree::Node::cStyleDisplay:
+ case LayoutTree::Node::cStyleText:
+ mScriptLevel = 0;
+ break;
+
+ case LayoutTree::Node::cStyleScript:
+ mScriptLevel = 1;
+ break;
+
+ case LayoutTree::Node::cStyleScriptScript:
+ mScriptLevel = 2;
+ break;
+
+ default:
+ throw logic_error(
+ "Unexpected style value in "
+ "MathmlEnvironment::MathmlEnvironment"
+ );
+ }
+}
+
+
+bool operator== (const MathmlEnvironment& x, const MathmlEnvironment& y)
+{
+ return
+ (x.mDisplayStyle == y.mDisplayStyle) &&
+ (x.mScriptLevel == y.mScriptLevel) &&
+ (x.mColour == y.mColour);
+}
+
+
+namespace LayoutTree
+{
+
+Row::~Row()
+{
+ for (list<Node*>::iterator
+ p = mChildren.begin();
+ p != mChildren.end();
+ p++
+ )
+ delete *p;
+}
+
+Table::~Table()
+{
+ for (vector<vector<Node*> >::iterator
+ p = mRows.begin();
+ p != mRows.end();
+ p++
+ )
+ for (vector<Node*>::iterator q = p->begin(); q != p->end(); q++)
+ delete *q;
+}
+
+
+void IncrementNodeCount(unsigned& nodeCount)
+{
+ if (++nodeCount >= cMaxMathmlNodeCount)
+ throw Exception(L"TooManyMathmlNodes");
+}
+
+
+// This function obtains the core of a MathML expression. (See
+// "embellished operators" in the MathML spec.) This is used to find any
+// <mo> node which should have its "lspace" and/or "rspace" attributes set.
+MathmlNode* GetCore(MathmlNode* node)
+{
+ // FIX: this code is not quite right. It doesn't handle situations where
+ // <mrow> or <mstyle> or something similar contain a single node which
+ // is an embellished operator. I don't think this really matters
+ // because I don't think these situations can actually arise, but
+ // maybe should be fixed just in case.
+
+ if (!node)
+ return NULL;
+
+ switch (node->mType)
+ {
+ case MathmlNode::cTypeMsub:
+ case MathmlNode::cTypeMsup:
+ case MathmlNode::cTypeMsubsup:
+ case MathmlNode::cTypeMunder:
+ case MathmlNode::cTypeMover:
+ case MathmlNode::cTypeMunderover:
+ return GetCore(node->mChildren.front());
+
+ default:
+ return node;
+ }
+}
+
+
+// Converts an RGBColour to "#rrggbb" format.
+wstring FormatColour(RGBColour colour)
+{
+ wostringstream os;
+ os << L"#" << hex << setfill(L'0') << setw(6) << colour;
+ return os.str();
+}
+
+
+// This function compares sourceEnvironment to targetEnvironment. It then
+// modifies "node" by inserting appropriate attributes or possibly an
+// <mstyle> node, so that the node receives the desired "target
+// environment", assuming that it inherited the indicated "source
+// environment".
+
+// FIX: sometimes firefox doesn't get the scriptlevel correct for
+// tables. (see mozilla bug 328141). So for the moment, we force an
+// extra <mstyle> node around every table to handle the scriptlevel.
+#define MOZILLA_BUG_328141_WORKAROUND 1
+
+auto_ptr<MathmlNode> AdjustMathmlEnvironment(
+ auto_ptr<MathmlNode> node,
+ MathmlEnvironment sourceEnvironment,
+ MathmlEnvironment targetEnvironment
+)
+{
+ if (
+ sourceEnvironment.mDisplayStyle == targetEnvironment.mDisplayStyle
+ && sourceEnvironment.mScriptLevel == targetEnvironment.mScriptLevel
+ && sourceEnvironment.mColour == targetEnvironment.mColour
+#if MOZILLA_BUG_328141_WORKAROUND
+ && node->mType != MathmlNode::cTypeMtable
+#endif
+ )
+ return node;
+
+ auto_ptr<MathmlNode> newNode(new MathmlNode(MathmlNode::cTypeMstyle));
+
+ if (sourceEnvironment.mDisplayStyle != targetEnvironment.mDisplayStyle)
+ {
+ if (node->mType == MathmlNode::cTypeMtable)
+ {
+ // Special case if the node in question is <mtable>, because
+ // the MathML spec says that the displaystyle attribute needs
+ // to be set on the <mtable> element itself, since the default
+ // "false" overrides any enclosing <mstyle>.
+ node->mAttributes[MathmlNode::cAttributeDisplaystyle] =
+ (targetEnvironment.mDisplayStyle) ? L"true" : L"false";
+ }
+ else
+ {
+ newNode->mAttributes[MathmlNode::cAttributeDisplaystyle] =
+ (targetEnvironment.mDisplayStyle) ? L"true" : L"false";
+ }
+ }
+
+ if (
+ sourceEnvironment.mScriptLevel != targetEnvironment.mScriptLevel
+#if MOZILLA_BUG_328141_WORKAROUND
+ || node->mType == MathmlNode::cTypeMtable
+#endif
+ )
+ {
+ wostringstream os;
+ os << targetEnvironment.mScriptLevel;
+ newNode->mAttributes
+ [MathmlNode::cAttributeScriptlevel] = os.str();
+ }
+
+ if (sourceEnvironment.mColour != targetEnvironment.mColour)
+ {
+ // If the child is a token element, we can just add mathcolor
+ // directly
+ switch (node->mType)
+ {
+ case MathmlNode::cTypeMi:
+ case MathmlNode::cTypeMo:
+ case MathmlNode::cTypeMn:
+ case MathmlNode::cTypeMtext:
+ node->mAttributes[MathmlNode::cAttributeMathcolor] =
+ FormatColour(targetEnvironment.mColour);
+ break;
+
+ default:
+ newNode->mAttributes[MathmlNode::cAttributeMathcolor] =
+ FormatColour(targetEnvironment.mColour);
+ break;
+ }
+ }
+
+ if (newNode->mAttributes.empty())
+ // In some cases we don't actually need an <mstyle> node, and just
+ // return the original node. (This can happen if either (1) the
+ // child is an <mtable> where only the displaystyle got modified,
+ // or (2) it was a token element and only the colour got modified.)
+ return node;
+
+ if (node->mType == MathmlNode::cTypeMrow)
+ // If the child is an mrow, we can splice it out
+ newNode->mChildren.swap(node->mChildren);
+ else
+ newNode->mChildren.push_back(node.release());
+
+ return newNode;
+}
+
+
+auto_ptr<MathmlNode> Row::BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+) const
+{
+ // The strategy is:
+ // First write each output node to "outputNode", but simultaneously
+ // keep a list of MathmlEnvironments corresponding to the desired
+ // environment for each node. Then, do a second pass inserting <mstyle>
+ // nodes to implement those desired environments.
+
+ auto_ptr<MathmlNode> outputNode(new MathmlNode(MathmlNode::cTypeMrow));
+ list<MathmlNode*>& outputList = outputNode->mChildren;
+
+ IncrementNodeCount(nodeCount);
+
+ if (mChildren.empty())
+ return outputNode;
+
+ vector<MathmlEnvironment> environments;
+
+ for (list<Node*>::const_iterator
+ source = mChildren.begin();
+ true;
+ ++source
+ )
+ {
+ MathmlNode* previousTarget =
+ outputList.empty() ? NULL : outputList.back();
+
+ int spaceWidth = 0;
+ bool isUserRequested = false;
+
+ Space* sourceAsSpace =
+ (source == mChildren.end())
+ ? NULL : dynamic_cast<Space*>(*source);
+ if (sourceAsSpace)
+ {
+ spaceWidth = sourceAsSpace->mWidth;
+ isUserRequested = sourceAsSpace->mIsUserRequested;
+ source++;
+ }
+
+ MathmlNode* currentTarget = NULL;
+ if (source != mChildren.end())
+ {
+ environments.push_back(
+ MathmlEnvironment((*source)->mStyle, (*source)->mColour)
+ );
+
+ outputList.push_back(
+ (*source)->BuildMathmlTree(
+ options,
+ environments.back(),
+ nodeCount
+ ).release()
+ );
+
+ currentTarget = outputList.back();
+ }
+
+ // Now decide about whether to insert markup for the
+ // space between currentNode and previousNode.
+
+ MathmlNode* currentNucleus = GetCore(currentTarget);
+ MathmlNode* previousNucleus = GetCore(previousTarget);
+
+ bool isPreviousMo =
+ previousNucleus &&
+ (previousNucleus->mType == MathmlNode::cTypeMo);
+
+ bool isCurrentMo =
+ currentNucleus &&
+ (currentNucleus->mType == MathmlNode::cTypeMo);
+
+ bool doSpace = false;
+
+ if (
+ options.mSpacingControl
+ == MathmlOptions::cSpacingControlStrict
+ || isUserRequested
+ )
+ doSpace = true;
+
+ else if (options.mSpacingControl ==
+ MathmlOptions::cSpacingControlModerate
+ )
+ {
+ // The user has asked for "moderate" spacing mode, so we
+ // need to give the MathML renderer a helping hand with
+ // spacing decisions, without being *too* pushy.
+
+ // This section of code is likely to change a LOT.
+
+ // Note: I scratched most of this as of blahtex 0.4.4....
+ // it was getting really ugly and I need to think of another
+ // way to do it
+
+ if (!isPreviousMo && !isCurrentMo)
+ doSpace = (spaceWidth != 0);
+ }
+
+ if (doSpace)
+ {
+ // We have established that we want to mark up some space,
+ // now need to decide how to do it.
+
+ // We use <mspace>, unless we have an <mo> node on either
+ // side (or both sides), in which case we use "lspace"
+ // and/or "rspace" attributes.
+
+ wstring widthAsString;
+ if (spaceWidth == 0)
+ widthAsString = L"0";
+ else
+ {
+ wostringstream wos;
+ wos << fixed << setprecision(3)
+ << (spaceWidth / 18.0) << L"em";
+ widthAsString = wos.str();
+ }
+
+ if (isPreviousMo)
+ {
+ previousNucleus->mAttributes
+ [MathmlNode::cAttributeRspace] = widthAsString;
+ if (isCurrentMo)
+ currentNucleus->mAttributes
+ [MathmlNode::cAttributeLspace] = L"0";
+ }
+ else if (isCurrentMo)
+ currentNucleus->mAttributes
+ [MathmlNode::cAttributeLspace] = widthAsString;
+ else
+ {
+ // FIX: this <mi>-specific stuff is a nasty hack because
+ // Firefox likes to mess around with the space between
+ // adjacent <mi> nodes in some situations.
+ // See https://bugzilla.mozilla.org/show_bug.cgi?id=320294
+
+ bool isPreviousMi =
+ previousNucleus &&
+ (previousNucleus->mType == MathmlNode::cTypeMi);
+
+ bool isCurrentMi =
+ currentNucleus &&
+ (currentNucleus->mType == MathmlNode::cTypeMi);
+
+ if (spaceWidth != 0 || (isPreviousMi && isCurrentMi))
+ {
+ auto_ptr<MathmlNode> spaceNode(
+ new MathmlNode(MathmlNode::cTypeMspace)
+ );
+ IncrementNodeCount(nodeCount);
+ spaceNode->mAttributes
+ [MathmlNode::cAttributeWidth] = widthAsString;
+
+ if (currentTarget)
+ {
+ outputList.insert(
+ --outputList.end(),
+ spaceNode.release()
+ );
+ environments.push_back(environments.back());
+ }
+ else
+ {
+ outputList.push_back(spaceNode.release());
+ environments.push_back(
+ MathmlEnvironment(mStyle, mColour)
+ );
+ }
+ }
+ }
+ }
+
+ if (source == mChildren.end())
+ break;
+ }
+
+ // Now do second pass where styles get adjusted
+ list<MathmlNode*>::iterator outputPtr = outputList.end();
+ for (vector<MathmlEnvironment>::reverse_iterator
+ environment = environments.rbegin();
+ environment != environments.rend();
+ environment++
+ )
+ {
+ if (outputPtr != outputList.begin())
+ outputPtr--;
+
+ if (environment == environments.rbegin())
+ continue;
+
+ if (!(environment[-1] == environment[0]))
+ {
+ list<MathmlNode*>::iterator previousOutputPtr = outputPtr;
+ previousOutputPtr++;
+
+ auto_ptr<MathmlNode> enclosedNode;
+
+ if (--outputList.end() == previousOutputPtr)
+ {
+ // If outputPtr is already the last node, we don't need
+ // to create a new <mrow>
+ enclosedNode.reset(*previousOutputPtr);
+ outputList.pop_back();
+ }
+ else
+ {
+ enclosedNode.reset(new MathmlNode(MathmlNode::cTypeMrow));
+ enclosedNode->mChildren.splice(
+ enclosedNode->mChildren.begin(),
+ outputList,
+ previousOutputPtr,
+ outputList.end()
+ );
+ }
+
+ outputList.push_back(
+ AdjustMathmlEnvironment(
+ enclosedNode,
+ environment[0],
+ environment[-1]
+ ).release()
+ );
+ }
+ }
+
+ // If the result is an <mrow> with a single child, just return the
+ // child by itself.
+ // (We don't use list::size() here because that's O(n) :-))
+ if (!outputNode->mChildren.empty() &&
+ outputNode->mChildren.front() == outputNode->mChildren.back()
+ )
+ {
+ MathmlNode* child = outputNode->mChildren.back();
+ outputNode->mChildren.pop_back(); // relinquish ownership
+ outputNode.reset(child);
+ }
+
+ return AdjustMathmlEnvironment(
+ outputNode,
+ inheritedEnvironment,
+ environments[0]
+ );
+}
+
+
+// This function converts a "MathML styled text" plane-1 character from the
+// code point that it SHOULD be at to the code point that it REALLY is at.
+//
+// For example, the double-struck "C" (&Copf;) should be at U+1D53A, but for
+// historical reasons it ended up at U+2102.
+wchar_t FixOutOfSequenceMathmlCharacter(wchar_t c)
+{
+ switch (c)
+ {
+ case L'\U0001D49D': return L'\U0000212C'; // script B
+ case L'\U0001D4A0': return L'\U00002130'; // script E
+ case L'\U0001D4A1': return L'\U00002131'; // script F
+ case L'\U0001D4A3': return L'\U0000210B'; // script H
+ case L'\U0001D4A4': return L'\U00002110'; // script I
+ case L'\U0001D4A7': return L'\U00002112'; // script L
+ case L'\U0001D4A8': return L'\U00002133'; // script M
+ case L'\U0001D4AD': return L'\U0000211B'; // script R
+ case L'\U0001D53A': return L'\U00002102'; // double struck C
+ case L'\U0001D53F': return L'\U0000210D'; // double struck H
+ case L'\U0001D545': return L'\U00002115'; // double struck N
+ case L'\U0001D547': return L'\U00002119'; // double struck P
+ case L'\U0001D548': return L'\U0000211A'; // double struck Q
+ case L'\U0001D549': return L'\U0000211D'; // double struck R
+ case L'\U0001D551': return L'\U00002124'; // double struck Z
+ case L'\U0001D506': return L'\U0000212D'; // fraktur C
+ case L'\U0001D50B': return L'\U0000210C'; // fraktur H
+ case L'\U0001D50C': return L'\U00002111'; // fraktur I
+ case L'\U0001D515': return L'\U0000211C'; // fraktur R
+ case L'\U0001D51D': return L'\U00002128'; // fraktur Z
+ }
+
+ return c;
+}
+
+
+auto_ptr<MathmlNode> SymbolIdentifier::BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+) const
+{
+ auto_ptr<MathmlNode> node(new MathmlNode(MathmlNode::cTypeMi, mText));
+ IncrementNodeCount(nodeCount);
+
+ // Here we have a special case to deal with the "fancy" fonts
+ // (fraktur, script, bold-fraktur, bold-script, double-struck)
+ // when MathML version 1.x fonts are requested, since then we need
+ // to explicitly substitute MathML entities.
+ if (
+ options.mUseVersion1FontAttributes &&
+ (
+ mFont == cMathmlFontFraktur ||
+ mFont == cMathmlFontBoldFraktur ||
+ mFont == cMathmlFontDoubleStruck ||
+ mFont == cMathmlFontScript ||
+ mFont == cMathmlFontBoldScript
+ )
+ )
+ {
+ if (mText.size() != 1)
+ throw logic_error(
+ "Unexpected string length in "
+ "SymbolIdentifier::BuildMathmlTree()"
+ );
+
+ wchar_t replacement = 0;
+
+ // These hold the explicit characters for "A" and "a" in the
+ // desired font (or zero if unavailable)
+ wchar_t baseUppercase = 0, baseLowercase = 0;
+
+ switch (mFont)
+ {
+ case cMathmlFontBoldScript:
+ if (options.mAllowPlane1)
+ {
+ baseUppercase = L'\U0001D4D0';
+ break;
+ }
+ else
+ {
+ // If we don't have plane 1 characters available, then
+ // we'll just have to do e.g.
+ // <mi fontweight="bold">&Acal;</mi>
+ // since there aren't specific MathML names for bold
+ // script capitals.
+ node->mAttributes
+ [MathmlNode::cAttributeFontweight] = L"bold";
+ baseUppercase = L'\U0001D49C';
+ break;
+ }
+
+ case cMathmlFontScript:
+ baseUppercase = L'\U0001D49C';
+ break;
+
+ case cMathmlFontBoldFraktur:
+ if (options.mAllowPlane1)
+ {
+ baseUppercase = L'\U0001D56C';
+ baseLowercase = L'\U0001D586';
+ break;
+ }
+ else
+ {
+ // See comments above under cMathmlFontBoldScript
+ node->mAttributes
+ [MathmlNode::cAttributeFontweight] = L"bold";
+ baseUppercase = L'\U0001D504';
+ baseLowercase = L'\U0001D51E';
+ break;
+ }
+
+ case cMathmlFontFraktur:
+ baseUppercase = L'\U0001D504';
+ baseLowercase = L'\U0001D51E';
+ break;
+
+ case cMathmlFontDoubleStruck:
+ baseUppercase = L'\U0001D538';
+ break;
+ }
+
+ if (baseUppercase && mText[0] >= 'A' && mText[0] <= 'Z')
+ replacement = baseUppercase + (mText[0] - 'A');
+ if (baseLowercase && mText[0] >= 'a' && mText[0] <= 'z')
+ replacement = baseLowercase + (mText[0] - 'a');
+
+ if (!replacement)
+ throw logic_error(
+ "Unexpected character/font combination in "
+ "SymbolIdentifier::BuildMathmlTree()"
+ );
+
+ node->mText =
+ wstring(1, FixOutOfSequenceMathmlCharacter(replacement));
+
+ return AdjustMathmlEnvironment(
+ node,
+ inheritedEnvironment,
+ MathmlEnvironment(mStyle, mColour)
+ );
+ }
+
+ node->AddFontAttributes(mFont, options);
+ return AdjustMathmlEnvironment(
+ node, inheritedEnvironment, MathmlEnvironment(mStyle, mColour)
+ );
+}
+
+
+auto_ptr<MathmlNode> SymbolOperator::BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+) const
+{
+ // These are all the operators that stretch by default in the normative
+ // operator dictionary. If we *don't* want them to stretch, we need
+ // to explicitly say so.
+ static wchar_t stretchyByDefaultArray[] =
+ {
+ L'(',
+ L')',
+ L'[',
+ L']',
+ L'{',
+ L'}',
+ L'|',
+ L'/',
+ L'\U000002DC', // DiacriticalTilde
+ L'\U000002C7', // Hacek
+ L'\U000002D8', // Breve
+ L'\U00002216', // Backslash
+ L'\U00002329', // LeftAngleBracket
+ L'\U0000232A', // RightAngleBracket
+ L'\U00002308', // LeftCeiling
+ L'\U00002309', // RightCeiling
+ L'\U0000230A', // LeftFloor
+ L'\U0000230B', // RightFloor
+ L'\U00002211', // Sum
+ L'\U0000220F', // Product
+ L'\U0000222B', // Integral
+ L'\U0000222C', // Int
+ L'\U0000222D', // iiint
+ L'\U00002A0C', // iiiint
+ L'\U0000222E', // ContourIntegral
+ L'\U000022C2', // Intersection
+ L'\U00002A00', // bigodot
+ L'\U00002A02', // bigotimes
+ L'\U00002210', // Coproduct
+ L'\U00002A06', // bigsqcup
+ L'\U00002A01', // bigoplus
+ L'\U000022C1', // Vee
+ L'\U00002A04', // biguplus
+ L'\U000022C0' // Wedge
+ };
+ static wishful_hash_set<wchar_t> stretchyByDefaultTable(
+ stretchyByDefaultArray,
+ END_ARRAY(stretchyByDefaultArray)
+ );
+
+ // Special case for "\not":
+ if (mText == L"NOT")
+ {
+ auto_ptr<MathmlNode> node(new MathmlNode(MathmlNode::cTypeMpadded));
+ auto_ptr<MathmlNode> space(new MathmlNode(MathmlNode::cTypeMspace));
+ space->mAttributes[MathmlNode::cAttributeWidth] = L"0.1em";
+ node->mChildren.push_back(space.release());
+ node->mChildren.push_back(
+ new MathmlNode(MathmlNode::cTypeMo, L"/")
+ );
+ node->mAttributes[MathmlNode::cAttributeWidth] = L"0";
+ return node;
+ }
+
+ // And these are the characters that are accents by default;
+ // again we may need to modify this explicitly.
+ static wchar_t accentByDefaultArray[] =
+ {
+ L'\U0000FE37',
+ L'\U0000FE38'
+ };
+ static wishful_hash_set<wchar_t> accentByDefaultTable(
+ accentByDefaultArray,
+ END_ARRAY(accentByDefaultArray)
+ );
+
+ auto_ptr<MathmlNode> node(new MathmlNode(MathmlNode::cTypeMo, mText));
+
+ if (mIsStretchy)
+ {
+ node->mAttributes[MathmlNode::cAttributeStretchy] = L"true";
+ if (!mSize.empty())
+ node->mAttributes[MathmlNode::cAttributeMinsize] =
+ node->mAttributes[MathmlNode::cAttributeMaxsize] = mSize;
+ }
+ else if (mText.size() == 1 && stretchyByDefaultTable.count(mText[0]))
+ node->mAttributes[MathmlNode::cAttributeStretchy] = L"false";
+
+ if (mIsAccent)
+ {
+ node->mAttributes[MathmlNode::cAttributeAccent] = L"true";
+ return node;
+ }
+ else if (mText.size() == 1 && accentByDefaultTable.count(mText[0]))
+ node->mAttributes[MathmlNode::cAttributeAccent] = L"false";
+
+ node->AddFontAttributes(mFont, options);
+
+ return AdjustMathmlEnvironment(
+ node, inheritedEnvironment, MathmlEnvironment(mStyle, mColour)
+ );
+}
+
+
+auto_ptr<MathmlNode> SymbolNumber::BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+) const
+{
+ // FIX: what about merging commas, decimal points into <mn> nodes?
+ // Might need to special-case it.
+
+ auto_ptr<MathmlNode> node(new MathmlNode(MathmlNode::cTypeMn, mText));
+ IncrementNodeCount(nodeCount);
+ node->AddFontAttributes(mFont, options);
+ return AdjustMathmlEnvironment(
+ node, inheritedEnvironment, MathmlEnvironment(mStyle, mColour)
+ );
+}
+
+
+auto_ptr<MathmlNode> SymbolText::BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+) const
+{
+ auto_ptr<MathmlNode> node(
+ new MathmlNode(MathmlNode::cTypeMtext, mText)
+ );
+ IncrementNodeCount(nodeCount);
+ node->AddFontAttributes(mFont, options);
+ return AdjustMathmlEnvironment(
+ node, inheritedEnvironment, MathmlEnvironment(mStyle, mColour)
+ );
+}
+
+
+auto_ptr<MathmlNode> Sqrt::BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+) const
+{
+ MathmlEnvironment desiredEnvironment(mStyle, mColour);
+
+ auto_ptr<MathmlNode> child =
+ mChild->BuildMathmlTree(
+ options, desiredEnvironment, nodeCount
+ );
+
+ auto_ptr<MathmlNode> node;
+
+ if (child->mType == MathmlNode::cTypeMrow)
+ {
+ // This removes redundant <mrow>s, i.e. things like
+ // <msqrt><mrow>...</mrow></msqrt>
+ node = child;
+ node->mType = MathmlNode::cTypeMsqrt;
+ }
+ else
+ {
+ node.reset(new MathmlNode(MathmlNode::cTypeMsqrt));
+ IncrementNodeCount(nodeCount);
+ node->mChildren.push_back(child.release());
+ }
+
+ return AdjustMathmlEnvironment(
+ node, inheritedEnvironment, desiredEnvironment
+ );
+}
+
+
+auto_ptr<MathmlNode> Root::BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+) const
+{
+ auto_ptr<MathmlNode> node(new MathmlNode(MathmlNode::cTypeMroot));
+ IncrementNodeCount(nodeCount);
+
+ MathmlEnvironment desiredEnvironment(mStyle, mColour);
+
+ node->mChildren.push_back(
+ mInside->BuildMathmlTree(
+ options,
+ desiredEnvironment,
+ nodeCount
+ ).release()
+ );
+
+ node->mChildren.push_back(
+ mOutside->BuildMathmlTree(
+ options,
+ MathmlEnvironment(false, 2, mColour),
+ nodeCount
+ ).release()
+ );
+
+ return AdjustMathmlEnvironment(
+ node, inheritedEnvironment, desiredEnvironment
+ );
+}
+
+
+auto_ptr<MathmlNode> Scripts::BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+) const
+{
+ // Simulate the change in rendering environment for the super/
+ // sub/over/underscripts.
+ MathmlEnvironment baseEnvironment(mStyle, mColour);
+ MathmlEnvironment scriptEnvironment = baseEnvironment;
+ scriptEnvironment.mDisplayStyle = false;
+ scriptEnvironment.mScriptLevel++;
+
+ auto_ptr<MathmlNode> base;
+ if (mBase.get())
+ base = mBase->BuildMathmlTree(options, baseEnvironment, nodeCount);
+ else
+ {
+ // An empty base gets represented by "<mrow/>"
+ base.reset(new MathmlNode(MathmlNode::cTypeMrow));
+ IncrementNodeCount(nodeCount);
+ }
+
+ MathmlNode::Type type;
+
+ if (mUpper.get())
+ {
+ if (mLower.get())
+ type = mIsSideset
+ ? MathmlNode::cTypeMsubsup
+ : MathmlNode::cTypeMunderover;
+ else
+ type = mIsSideset
+ ? MathmlNode::cTypeMsup
+ : MathmlNode::cTypeMover;
+ }
+ else
+ type = mIsSideset
+ ? MathmlNode::cTypeMsub
+ : MathmlNode::cTypeMunder;
+
+ auto_ptr<MathmlNode> scriptsNode(new MathmlNode(type));
+ IncrementNodeCount(nodeCount);
+ scriptsNode->mChildren.push_back(base.release());
+
+ if (mUpper.get())
+ {
+ if (mLower.get())
+ {
+ scriptsNode->mChildren.push_back(
+ mLower->BuildMathmlTree(
+ options, scriptEnvironment, nodeCount
+ ).release()
+ );
+ scriptsNode->mChildren.push_back(
+ mUpper->BuildMathmlTree(
+ options, scriptEnvironment, nodeCount
+ ).release()
+ );
+ }
+ else
+ {
+ scriptsNode->mChildren.push_back(
+ mUpper->BuildMathmlTree(
+ options, scriptEnvironment, nodeCount
+ ).release()
+ );
+ }
+ }
+ else
+ {
+ scriptsNode->mChildren.push_back(
+ mLower->BuildMathmlTree(
+ options, scriptEnvironment, nodeCount
+ ).release()
+ );
+ }
+
+ if (!mIsSideset && mStyle != cStyleDisplay)
+ {
+ // This situation should be quite unusual, since the user would
+ // have to force things using "\limits". If there's an operator in
+ // the core, we need to set movablelimits just to be safe.
+
+ // FIX: this code might let the user induce quadratic time, with
+ // something like this:
+ // "\textstyle \mathop{\mathop{\mathop{\mathop ... {x} ...
+ // \limits^x}\limits^x}\limits^x}\limits^x" etc
+
+ // FIX: we could add a table to check whether the operator inside
+ // is likely to need movablelimits adjusted because of the
+ // operator dictionary.
+
+ MathmlNode* core = GetCore(scriptsNode->mChildren.front());
+ if (core->mType == MathmlNode::cTypeMo)
+ core->mAttributes
+ [MathmlNode::cAttributeMovablelimits] = L"false";
+ }
+
+ return AdjustMathmlEnvironment(
+ scriptsNode, inheritedEnvironment, baseEnvironment
+ );
+}
+
+
+auto_ptr<MathmlNode> Fraction::BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+) const
+{
+ // Determine the rendering style for the numerator and denominator.
+ MathmlEnvironment baseEnvironment(mStyle, mColour);
+ MathmlEnvironment smallerEnvironment = baseEnvironment;
+ if (smallerEnvironment.mDisplayStyle)
+ smallerEnvironment.mDisplayStyle = false;
+ else
+ smallerEnvironment.mScriptLevel++;
+
+ auto_ptr<MathmlNode> node(new MathmlNode(MathmlNode::cTypeMfrac));
+ IncrementNodeCount(nodeCount);
+
+ node->mChildren.push_back(
+ mNumerator->BuildMathmlTree(
+ options, smallerEnvironment, nodeCount
+ ).release()
+ );
+ node->mChildren.push_back(
+ mDenominator->BuildMathmlTree(
+ options, smallerEnvironment, nodeCount
+ ).release()
+ );
+
+ if (!mIsLineVisible)
+ node->mAttributes
+ [MathmlNode::cAttributeLinethickness] = L"0";
+
+ return AdjustMathmlEnvironment(
+ node, inheritedEnvironment, baseEnvironment
+ );
+}
+
+
+auto_ptr<MathmlNode> Space::BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+) const
+{
+ if (!mIsUserRequested)
+ throw logic_error(
+ "Unexpected lonely automatic space in Space::BuildMathmlTree"
+ );
+
+ // FIX: what happens with negative space?
+
+ auto_ptr<MathmlNode> node(new MathmlNode(MathmlNode::cTypeMspace));
+ IncrementNodeCount(nodeCount);
+
+ wostringstream wos;
+ wos << fixed << setprecision(3) << (mWidth / 18.0) << L"em";
+ node->mAttributes[MathmlNode::cAttributeWidth] = wos.str();
+
+ return node;
+}
+
+
+auto_ptr<MathmlNode> Fenced::BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+) const
+{
+ auto_ptr<MathmlNode> inside = mChild->BuildMathmlTree(
+ options, MathmlEnvironment(mStyle, mColour), nodeCount
+ );
+
+ if (mLeftDelimiter.empty() && mRightDelimiter.empty())
+ return inside;
+
+ if (inside->mType != MathmlNode::cTypeMrow)
+ {
+ // Ensure that the stuff between the fences is surrounded by
+ // an <mrow>. (I don't really understand why this is necessary,
+ // but the MathML spec suggests it, and Firefox seems a bit fussy,
+ // so let's just do it.)
+ auto_ptr<MathmlNode> temp(new MathmlNode(MathmlNode::cTypeMrow));
+ IncrementNodeCount(nodeCount);
+ temp->mChildren.push_back(inside.release());
+ inside = temp;
+ }
+
+ // And surround the whole thing by an <mrow> as well.
+ // (This one makes more sense... we want the delimiters to stretch
+ // around the correct stuff.)
+ auto_ptr<MathmlNode> output(new MathmlNode(MathmlNode::cTypeMrow));
+ IncrementNodeCount(nodeCount);
+
+ if (!mLeftDelimiter.empty())
+ {
+ auto_ptr<MathmlNode> node(
+ new MathmlNode(MathmlNode::cTypeMo, mLeftDelimiter)
+ );
+ IncrementNodeCount(nodeCount);
+ node->mAttributes[MathmlNode::cAttributeStretchy] = L"true";
+ output->mChildren.push_back(node.release());
+ }
+
+ output->mChildren.push_back(inside.release());
+
+ if (!mRightDelimiter.empty())
+ {
+ auto_ptr<MathmlNode> node(
+ new MathmlNode(MathmlNode::cTypeMo, mRightDelimiter)
+ );
+ IncrementNodeCount(nodeCount);
+ node->mAttributes[MathmlNode::cAttributeStretchy] = L"true";
+ output->mChildren.push_back(node.release());
+ }
+
+ return AdjustMathmlEnvironment(
+ output, inheritedEnvironment, MathmlEnvironment(mStyle, mColour)
+ );
+}
+
+
+auto_ptr<MathmlNode> Table::BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+) const
+{
+ auto_ptr<MathmlNode> node(new MathmlNode(MathmlNode::cTypeMtable));
+ IncrementNodeCount(nodeCount);
+
+ // Compute the table width. We do this so we can "fill out" each
+ // row with the correct number of entries. Although the MathML spec
+ // doesn't require this, it seems that Firefox doesn't always align
+ // the entries properly unless we fill in the missing entries.
+ int tableWidth = 0;
+ for (vector<vector<Node*> >::const_iterator
+ row = mRows.begin();
+ row != mRows.end();
+ row++
+ )
+ {
+ if (tableWidth < row->size())
+ tableWidth = row->size();
+ }
+
+ if (mAlign == cAlignLeft)
+ node->mAttributes[MathmlNode::cAttributeColumnalign] = L"left";
+ else if (mAlign == cAlignRightLeft)
+ {
+ wstring alignString = L"right";
+ for (int i = 1; i < tableWidth; i++)
+ alignString += (i % 2) ? L" left" : L" right";
+ node->mAttributes[MathmlNode::cAttributeColumnalign] = alignString;
+
+ wstring spacingString = L"0.2em";
+ for (int i = 2; i < tableWidth; i++)
+ spacingString += (i % 2) ? L" 0.2em" : L" 1em";
+ node->mAttributes[MathmlNode::cAttributeColumnspacing] =
+ spacingString;
+ }
+
+ // FIX: need to test this for Firefox whenever they get that bug fixed
+ // (mozilla bug 330964)
+ if (mRowSpacing == cRowSpacingTight)
+ node->mAttributes[MathmlNode::cAttributeRowspacing] = L"0.3ex";
+
+ for (vector<vector<Node*> >::const_iterator
+ inRow = mRows.begin();
+ inRow != mRows.end();
+ inRow++
+ )
+ {
+ auto_ptr<MathmlNode> outRow(new MathmlNode(MathmlNode::cTypeMtr));
+ IncrementNodeCount(nodeCount);
+ int count = 0;
+ for (vector<Node*>::const_iterator
+ inEntry = inRow->begin();
+ inEntry != inRow->end();
+ inEntry++, count++
+ )
+ {
+ auto_ptr<MathmlNode> outEntry(
+ new MathmlNode(MathmlNode::cTypeMtd)
+ );
+ IncrementNodeCount(nodeCount);
+
+ auto_ptr<MathmlNode> child =
+ (*inEntry)->BuildMathmlTree(
+ options, MathmlEnvironment(mStyle, mColour), nodeCount
+ );
+
+ // Firefox has a bug (#236963) where it doesn't correctly put
+ // an "inferred mrow" inside a <mtd> block, so for the moment
+ // we add the <mrow> ourselves.
+#define MOZILLA_BUG_236963_WORKAROUND 1
+
+#if MOZILLA_BUG_236963_WORKAROUND
+ if (child->mType == MathmlNode::cTypeMrow)
+ {
+ child->mType = MathmlNode::cTypeMtd;
+ outEntry = child;
+ }
+ else
+ outEntry->mChildren.push_back(child.release());
+#else
+ if (child->mType != MathmlNode::cTypeMrow)
+ {
+ auto_ptr<MathmlNode> temp(
+ new MathmlNode(MathmlNode::cTypeMrow)
+ );
+ IncrementNodeCount(nodeCount);
+ temp->mChildren.push_back(child.release());
+ child = temp;
+ }
+ outEntry->mChildren.push_back(child.release());
+#endif
+
+ outRow->mChildren.push_back(outEntry.release());
+ }
+
+ // fill out the extra table entries:
+ for (; count < tableWidth; count++)
+ {
+ outRow->mChildren.push_back(
+ new MathmlNode(MathmlNode::cTypeMtd)
+ );
+ IncrementNodeCount(nodeCount);
+ }
+
+ node->mChildren.push_back(outRow.release());
+ }
+
+ return AdjustMathmlEnvironment(
+ node, inheritedEnvironment, MathmlEnvironment(mStyle, mColour)
+ );
+}
+
+
+// This is a list of all operators that we know how to negate.
+pair<wstring, wstring> gNegationArray[] =
+{
+ // Element => NotElement
+ make_pair(L"\U00002208", L"\U00002209"),
+ // Congruent => NotCongruent
+ make_pair(L"\U00002261", L"\U00002262"),
+ // Exists => NotExists
+ make_pair(L"\U00002203", L"\U00002204"),
+ // = => NotEqual
+ make_pair(L"=", L"\U00002260"),
+ // SubsetEqual => NotSubsetEqual
+ make_pair(L"\U00002286", L"\U00002288"),
+ // Tilde => NotTilde
+ make_pair(L"\U0000223C", L"\U00002241"),
+ // LeftArrow => nleftarrow
+ make_pair(L"\U00002190", L"\U0000219A"),
+ // RightArrow => nrightarrow
+ make_pair(L"\U00002192", L"\U0000219B"),
+ // LeftRightArrow => nleftrightarrow
+ make_pair(L"\U00002194", L"\U000021AE"),
+ // DoubleLeftArrow => nLeftArrow
+ make_pair(L"\U000021D0", L"\U000021CD"),
+ // DoubleRightArrow => nRightArrow
+ make_pair(L"\U000021D2", L"\U000021CF"),
+ // DoubleLeftRightArrow => nLeftrightArrow
+ make_pair(L"\U000021D4", L"\U000021CE"),
+ // ReverseElement => NotReverseElement
+ make_pair(L"\U0000220B", L"\U0000220C"),
+ // FIX: what happens to the pipe character?
+ // VerticalBar => NotVerticalBar
+ make_pair(L"\U00002223", L"\U00002224"),
+ // DoubleVerticalBar => NotDoubleVerticalBar
+ make_pair(L"\U00002225", L"\U00002226"),
+ // TildeEqual => NotTildeEqual
+ make_pair(L"\U00002243", L"\U00002244"),
+ // TildeFullEqual => NotTildeFullEqual
+ make_pair(L"\U00002245", L"\U00002247"),
+ // TildeTilde => NotTildeTilde
+ make_pair(L"\U00002248", L"\U00002249"),
+ // > => NotLess
+ make_pair(L"<", L"\U0000226E"),
+ // < => NotGreater
+ make_pair(L">", L"\U0000226F"),
+ // leq => NotLessEqual
+ make_pair(L"\U00002264", L"\U00002270"),
+ // GreaterEqual => NotGreaterEqual
+ make_pair(L"\U00002265", L"\U00002271"),
+ // FIX: what about "Precedes", "Succeeds"?
+ // subset => nsub
+ make_pair(L"\U00002282", L"\U00002284"),
+ // Superset => nsup
+ make_pair(L"\U00002283", L"\U00002285"),
+ // SubsetEqual => NotSubsetEqual
+ make_pair(L"\U00002286", L"\U00002288"),
+ // SupersetEqual => NotSupersetEqual
+ make_pair(L"\U00002287", L"\U00002289"),
+ // RightTee => nvdash
+ make_pair(L"\U000022A2", L"\U000022AC"),
+ // DoubleRightTee => nvDash
+ make_pair(L"\U000022A8", L"\U000022AD"),
+ // Vdash => nVdash
+ make_pair(L"\U000022A9", L"\U000022AE"),
+ // SquareSubsetEqual => NotSquareSubsetEqual
+ make_pair(L"\U00002291", L"\U000022E2"),
+ // SquareSupersetEqual => NotSquareSupersetEqual
+ make_pair(L"\U00002292", L"\U000022E3"),
+ // LeftTriangle => NotLeftTriangle
+ make_pair(L"\U000022B2", L"\U000022EA"),
+ // RightTriangle => NotRightTriangle
+ make_pair(L"\U000022B3", L"\U000022EB"),
+ // LeftTriangleEqual => NotLeftTriangleEqual
+ make_pair(L"\U000022B4", L"\U000022EC"),
+ // RightTriangleEqual => NotRightTriangleEqual
+ make_pair(L"\U000022B5", L"\U000022ED")
+};
+wishful_hash_map<wstring, wstring> gNegationTable(
+ gNegationArray,
+ END_ARRAY(gNegationArray)
+);
+
+
+void Row::Optimise()
+{
+ list<Node*>::iterator lastSpace = mChildren.end();
+ list<Node*>::iterator lastNonSpace = mChildren.end();
+
+ // Throughout this loop, we ensure that:
+ // * lastNonSpace points to the most recently seen non-Space node,
+ // or mChildren.end() if none have yet been seen;
+ // * lastSpace points to the most recently seen Space node *following*
+ // lastNonSpace, or just the most recently seen Space node if
+ // lastNonSpace == mChildren.end().
+
+ for (list<Node*>::iterator
+ current = mChildren.begin(); current != mChildren.end(); ++current
+ )
+ {
+ // Recurse:
+ (*current)->Optimise();
+
+ Space* currentAsSpace = dynamic_cast<Space*>(*current);
+ if (currentAsSpace)
+ {
+ if (lastSpace == mChildren.end())
+ lastSpace = current;
+ else
+ {
+ // Merge the two adjacent Space nodes.
+ Space* lastSpaceAsSpace = dynamic_cast<Space*>(*lastSpace);
+ if (lastSpaceAsSpace->mIsUserRequested)
+ currentAsSpace->mIsUserRequested = true;
+ currentAsSpace->mWidth += lastSpaceAsSpace->mWidth;
+ mChildren.erase(lastSpace);
+ lastSpace = current;
+ }
+ }
+ else
+ {
+ if (
+ lastNonSpace != mChildren.end() &&
+ (
+ lastSpace == mChildren.end() ||
+ (dynamic_cast<Space*>(*lastSpace))->mWidth == 0
+ )
+ )
+ {
+ // We have found two non-Space nodes with zero space between
+ // them. Now determine whether we want to merge them.
+
+ // The first special case is if the first symbol is a
+ // "\not" command, and we try to come up with a MathML
+ // character which represents the negation of the following
+ // operator.
+ SymbolOperator* lastNonSpaceAsOperator =
+ dynamic_cast<SymbolOperator*>(*lastNonSpace);
+ SymbolOperator* currentAsOperator =
+ dynamic_cast<SymbolOperator*>(*current);
+ wishful_hash_map<wstring, wstring>::const_iterator
+ negationLookup;
+
+ if (
+ lastNonSpaceAsOperator &&
+ lastNonSpaceAsOperator->mText == L"NOT" &&
+ currentAsOperator &&
+ (negationLookup =
+ gNegationTable.find(currentAsOperator->mText))
+ != gNegationTable.end()
+ )
+ {
+ // Replace with appropriate negated character.
+
+ if (lastSpace != mChildren.end())
+ mChildren.erase(lastSpace);
+
+ currentAsOperator->mText = negationLookup->second;
+ mChildren.erase(lastNonSpace);
+ }
+ else
+ {
+
+ // OK, that special case didn't work out.
+ // If the current node is a scripts node, find its core.
+ Node* currentCore = *current;
+ Scripts* currentCoreAsScripts;
+ while (
+ currentCore &&
+ (currentCoreAsScripts =
+ dynamic_cast<Scripts*>(currentCore))
+ )
+ currentCore = currentCoreAsScripts->mBase.get();
+
+ // Check candidates are Symbols and their fonts, styles,
+ // colours match, and then either:
+ // * both are SymbolNumber, or
+ // * both are SymbolText, or
+ // * both are SymbolIdentifier and their fonts are both
+ // normal (this case covers things like <mi>sin</mi>)
+
+ Symbol* currentCoreAsSymbol =
+ dynamic_cast<Symbol*>(currentCore);
+ Symbol* lastNonSpaceAsSymbol =
+ dynamic_cast<Symbol*>(*lastNonSpace);
+
+ if (
+ currentCoreAsSymbol && lastNonSpaceAsSymbol
+ &&
+ currentCoreAsSymbol->mFont ==
+ lastNonSpaceAsSymbol->mFont
+ &&
+ currentCoreAsSymbol->mStyle ==
+ lastNonSpaceAsSymbol->mStyle
+ &&
+ currentCoreAsSymbol->mColour ==
+ lastNonSpaceAsSymbol->mColour
+ &&
+ (
+ (dynamic_cast<SymbolNumber*>(currentCore) &&
+ dynamic_cast<SymbolNumber*>(*lastNonSpace))
+ ||
+ (dynamic_cast<SymbolText*>(currentCore) &&
+ dynamic_cast<SymbolText*>(*lastNonSpace))
+ ||
+ (
+ dynamic_cast<SymbolIdentifier*>
+ (currentCore)
+ &&
+ dynamic_cast<SymbolIdentifier*>
+ (*lastNonSpace)
+ &&
+ currentCoreAsSymbol->mFont ==
+ cMathmlFontNormal
+ &&
+ lastNonSpaceAsSymbol->mFont ==
+ cMathmlFontNormal
+ )
+ )
+ )
+ {
+ // Let's MERGE.
+ // (We do this a slightly odd way to maintain O(n)
+ // complexity.)
+
+ if (lastSpace != mChildren.end())
+ mChildren.erase(lastSpace);
+
+ lastNonSpaceAsSymbol->mText +=
+ currentCoreAsSymbol->mText;
+
+ lastNonSpaceAsSymbol->mText.swap(
+ currentCoreAsSymbol->mText
+ );
+
+ mChildren.erase(lastNonSpace);
+ }
+ }
+ }
+
+ lastNonSpace = current;
+ lastSpace = mChildren.end();
+ }
+ }
+}
+
+
+void Scripts::Optimise()
+{
+ if (mBase.get())
+ mBase->Optimise();
+ if (mLower.get())
+ mLower->Optimise();
+ if (mUpper.get())
+ mUpper->Optimise();
+}
+
+
+void Fraction::Optimise()
+{
+ mNumerator->Optimise();
+ mDenominator->Optimise();
+}
+
+
+void Fenced::Optimise()
+{
+ mChild->Optimise();
+}
+
+
+void Sqrt::Optimise()
+{
+ mChild->Optimise();
+}
+
+
+void Root::Optimise()
+{
+ mInside->Optimise();
+ mOutside->Optimise();
+}
+
+
+void Table::Optimise()
+{
+ for (
+ vector<vector<Node*> >::iterator row = mRows.begin();
+ row != mRows.end();
+ ++row
+ )
+ for (
+ vector<Node*>::iterator entry = row->begin();
+ entry != row->end();
+ ++entry
+ )
+ (*entry)->Optimise();
+}
+
+
+// =========================================================================
+// Now all the LayoutTree debugging code
+
+
+wstring indent(int depth)
+{
+ return wstring(2 * depth, L' ');
+}
+
+wstring Node::PrintFields() const
+{
+ static wstring gFlavourStrings[] =
+ {
+ L"ord",
+ L"op",
+ L"bin",
+ L"rel",
+ L"open",
+ L"close",
+ L"punct",
+ L"inner"
+ };
+
+ static wstring gLimitsStrings[] =
+ {
+ L"displaylimits",
+ L"limits",
+ L"nolimits"
+ };
+
+ static wstring gStyleStrings[] =
+ {
+ L"displaystyle",
+ L"textstyle",
+ L"scriptstyle",
+ L"scriptscriptstyle"
+ };
+
+ wstring output = gFlavourStrings[mFlavour];
+ if (mFlavour == cFlavourOp)
+ output += L" " + gLimitsStrings[mLimits];
+ output += L" " + gStyleStrings[mStyle];
+ wostringstream colourHex;
+ colourHex << hex << setw(6) << setfill(L'0') << mColour;
+ output += L" 0x" + colourHex.str();
+ return output;
+}
+
+void Row::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"Row " << PrintFields() << endl;
+ for (list<Node*>::const_iterator
+ ptr = mChildren.begin();
+ ptr != mChildren.end();
+ ptr++
+ )
+ (*ptr)->Print(os, depth+1);
+}
+
+void SymbolIdentifier::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"SymbolIdentifier \"" << mText << L"\" "
+ << gMathmlFontStrings[mFont] << L" " << PrintFields() << endl;
+}
+
+void SymbolNumber::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"SymbolNumber \"" << mText << L"\" "
+ << gMathmlFontStrings[mFont] << L" " << PrintFields() << endl;
+}
+
+void SymbolText::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"SymbolText \"" << mText << L"\" "
+ << gMathmlFontStrings[mFont] << L" " << PrintFields() << endl;
+}
+
+void SymbolOperator::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"SymbolOperator \"" << mText << L"\" "
+ << gMathmlFontStrings[mFont]
+ << (mIsStretchy ? L" stretchy" : L" non-stretchy")
+ << (mIsAccent ? L" accent" : L"");
+ if (!mSize.empty())
+ os << L" size=\"" << mSize << L"\"";
+ os << L" " << PrintFields() << endl;
+}
+
+void Space::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"Space " << mWidth;
+ if (mIsUserRequested)
+ os << L" (user requested)";
+ os << endl;
+}
+
+void Scripts::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"Scripts "
+ << (mIsSideset ? L"sideset" : L"underover")
+ << L" " << PrintFields() << endl;
+
+ if (mBase.get())
+ {
+ os << indent(depth+1) << L"base" << endl;
+ mBase->Print(os, depth+2);
+ }
+ if (mUpper.get())
+ {
+ os << indent(depth+1) << L"upper" << endl;
+ mUpper->Print(os, depth+2);
+ }
+ if (mLower.get())
+ {
+ os << indent(depth+1) << L"lower" << endl;
+ mLower->Print(os, depth+2);
+ }
+}
+
+void Fraction::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"Fraction ";
+ if (!mIsLineVisible)
+ os << L"(no visible line) ";
+ os << PrintFields() << endl;
+ mNumerator->Print(os, depth+1);
+ mDenominator->Print(os, depth+1);
+}
+
+void Fenced::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"Fenced \""
+ << mLeftDelimiter << L"\" \""
+ << mRightDelimiter << L"\" "
+ << PrintFields() << endl;
+ mChild->Print(os, depth+1);
+}
+
+void Sqrt::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"Sqrt " << PrintFields() << endl;
+ mChild->Print(os, depth+1);
+}
+
+void Root::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"Root " << PrintFields() << endl;
+ mInside->Print(os, depth+1);
+ mOutside->Print(os, depth+1);
+}
+
+void Table::Print(wostream& os, int depth) const
+{
+ static wstring gAlignStrings[] =
+ {
+ L"left",
+ L"centre",
+ L"rightleft"
+ };
+
+ os << indent(depth) << L"Table " << PrintFields() << L" "
+ << gAlignStrings[mAlign] << endl;
+ for (vector<vector<Node*> >::const_iterator
+ row = mRows.begin();
+ row != mRows.end();
+ row++
+ )
+ {
+ os << indent(depth+1) << L"Table row" << endl;
+ for (vector<Node*>::const_iterator
+ entry = row->begin();
+ entry != row->end();
+ entry++
+ )
+ (*entry)->Print(os, depth+2);
+ }
+}
+
+
+}
+}
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/BlahtexCore/LayoutTree.h b/blahtexml/source/BlahtexCore/LayoutTree.h
new file mode 100644
index 0000000..512fec2
--- /dev/null
+++ b/blahtexml/source/BlahtexCore/LayoutTree.h
@@ -0,0 +1,670 @@
+// File "LayoutTree.h"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#ifndef BLAHTEX_LAYOUTTREE_H
+#define BLAHTEX_LAYOUTTREE_H
+
+#include "MathmlNode.h"
+
+namespace blahtex
+{
+
+// The maximum number of nodes allowed in the output MathML tree.
+// (This limit is imposed to prevent users eliciting quadratic time by
+// inputting arrays with lots of empty entries.)
+const unsigned cMaxMathmlNodeCount = 2500;
+
+
+struct MathmlEnvironment;
+
+// The LayoutTree namespace contains all classes that represents nodes in
+// the layout tree. The layout tree is an intermediate stage between the
+// parse tree and the final output XML tree.
+namespace LayoutTree
+{
+ // Base class for layout tree nodes.
+ struct Node
+ {
+ virtual ~Node()
+ { }
+
+ // This field is only used during the layout tree building phase, to
+ // determine inter-atomic spacing. The values correspond roughly
+ // to TeX's differently flavoured atoms. (We omit several flavours
+ // that TeX uses, like "acc" and "rad"; these are generally handled
+ // as "ord".)
+ //
+ // This field is ignored for LayoutTree::Space nodes.
+ enum Flavour
+ {
+ cFlavourOrd,
+ cFlavourOp,
+ cFlavourBin,
+ cFlavourRel,
+ cFlavourOpen,
+ cFlavourClose,
+ cFlavourPunct,
+ cFlavourInner
+ }
+ mFlavour;
+
+ // This field is only used during the layout tree building phase, to
+ // determine script placement. It corresponds to TeX's "limits",
+ // "nolimits", "displaylimits" trichotomy.
+ //
+ // It is only valid if mFlavour == cFlavourOp.
+ enum Limits
+ {
+ cLimitsDisplayLimits,
+ cLimitsLimits,
+ cLimitsNoLimits
+ }
+ mLimits;
+
+ // This field corresponds to TeX's displaystyle/textstyle/
+ // scriptstyle/scriptscriptstyle setting. (We ignore the cramped/
+ // uncramped variations.)
+ //
+ // This field is ignored for LayoutTree::Space nodes.
+ enum Style
+ {
+ cStyleDisplay, // like \displaystyle
+ cStyleText, // like \textstyle
+ cStyleScript, // like \scriptstyle
+ cStyleScriptScript // like \scriptscriptstyle
+ }
+ mStyle;
+
+ // Colour of the node. For symbols this is the colour of the symbol;
+ // for fractions it's the colour of the horizontal bar; for radicals
+ // it's the colour of the radical symbol.
+ //
+ // This field is ignored for LayoutTree::Space nodes.
+ RGBColour mColour;
+
+
+ Node(
+ Style style,
+ Flavour flavour,
+ Limits limits,
+ RGBColour colour
+ ) :
+ mStyle(style),
+ mFlavour(flavour),
+ mLimits(limits),
+ mColour(colour)
+ { }
+
+
+ // This function "optimises" the tree beneath the current node:
+ // (1) It merges adjacent Space nodes into single spaces, and
+ // (2) It merges adjacent Symbol nodes in certain situations.
+ // For exammple, we want <mn>12</mn> instead of
+ // <mn>1</mn><mn>2</mn>, and <mi>sin</mi> instead of
+ // <mi mathvariant="normal">s</mi>
+ // <mi mathvariant="normal">i</mi>
+ // <mi mathvariant="normal">n</mi> !!!!
+ virtual void Optimise()
+ { }
+
+
+ // This function converts the layout tree rooted at this node into
+ // a MathML tree.
+ //
+ // The inheritedEnvironment parameter tells it what assumptions to
+ // make about its rendering environment. It uses these to decide
+ // whether to insert extra <mstyle> tags.
+ //
+ // The nodeCount parameter is used to keep track of the total number
+ // of nodes in the MathML tree. For security reasons we put a hard
+ // limit on this. (See cMaxMathmlNodeCount.)
+ virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+ ) const = 0;
+
+
+ // This function recursively prints the layout tree under this node.
+ // Debugging use only.
+ virtual void Print(
+ std::wostream& os,
+ int depth = 0
+ ) const = 0;
+
+ std::wstring PrintFields() const; // used internally by Print
+ };
+
+
+ // A Row stores a list of children nodes. It gets translated into an
+ // <mrow> node in the MathML tree.
+ //
+ // No Row ever has another Row node as its child.
+ struct Row : Node
+ {
+ std::list<Node*> mChildren;
+
+ Row(Style style, RGBColour colour) :
+ Node(style, cFlavourOrd, cLimitsDisplayLimits, colour)
+ { }
+
+ ~Row();
+
+ virtual void Optimise();
+
+ virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth = 0
+ ) const;
+ };
+
+
+ // Symbol is an abstract class; its concrete subclasses are
+ // SymbolIdentifier, SymbolNumber, SymbolOperator, SymbolText. It
+ // represents anything that will get translated as <mn>, <mi>, <mo>
+ // or <mtext>. It describes the text that goes inside the tags (mText)
+ // and what font it should be in (mFont).
+ struct Symbol : Node
+ {
+ std::wstring mText;
+ MathmlFont mFont;
+
+ Symbol(
+ const std::wstring& text,
+ MathmlFont font,
+ Style style,
+ Flavour flavour,
+ Limits limits,
+ RGBColour colour
+ ) :
+ Node(style, flavour, limits, colour),
+ mText(text),
+ mFont(font)
+ { }
+
+ virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+ ) const = 0;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth = 0
+ ) const = 0;
+ };
+
+
+ // SymbolIdentifier represents things translated as <mi>.
+ struct SymbolIdentifier : Symbol
+ {
+ SymbolIdentifier(
+ const std::wstring& text,
+ MathmlFont font,
+ Style style,
+ Flavour flavour,
+ Limits limits,
+ RGBColour colour
+ ) :
+ Symbol(text, font, style, flavour, limits, colour)
+ { }
+
+ virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth = 0
+ ) const;
+ };
+
+
+ // SymbolNumber represents things translated as <mn>.
+ struct SymbolNumber : Symbol
+ {
+ SymbolNumber(
+ const std::wstring& text,
+ MathmlFont font,
+ Style style,
+ Flavour flavour,
+ Limits limits,
+ RGBColour colour
+ ) :
+ Symbol(text, font, style, flavour, limits, colour)
+ { }
+
+ virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth = 0
+ ) const;
+ };
+
+
+ // SymbolText represents things translated as <mtext>.
+ //
+ // Actually, each SymbolText represents just a single character;
+ // they get merged by their parent's Row::BuildMathmlTree() function.
+ struct SymbolText : Symbol
+ {
+ SymbolText(
+ const std::wstring& text,
+ MathmlFont font,
+ Style style,
+ RGBColour colour
+ ) :
+ Symbol(
+ text, font, style, cFlavourOrd, cLimitsDisplayLimits, colour
+ )
+ { }
+
+ virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth = 0
+ ) const;
+ };
+
+
+ // SymbolOperator represents things translated as <mo>.
+ struct SymbolOperator : Symbol
+ {
+ // Whether or not this operator is stretchy.
+ //
+ // Note: because of the existence of the MathML operator dictionary,
+ // BuildMathmlTree() needs to do a bit of work to decide whether
+ // to actually use a "stretchy" attribute to implement this flag.
+ bool mIsStretchy;
+
+ // mSize, if non-empty, indicates the "minsize" and "maxsize"
+ // attributes. It is only valid if mIsStretchy is true.
+ std::wstring mSize;
+
+ // Whether to use the accent="true" attribute.
+ //
+ // Again, BuildMathmlTree needs to do some work to decide if the
+ // "accent" attribute is actually needed.
+ bool mIsAccent;
+
+ SymbolOperator(
+ bool isStretchy,
+ const std::wstring& size,
+ bool isAccent,
+ const std::wstring& text,
+ MathmlFont font,
+ Style style,
+ Flavour flavour,
+ Limits limits,
+ RGBColour colour
+ ) :
+ Symbol(text, font, style, flavour, limits, colour),
+ mIsStretchy(isStretchy),
+ mSize(size),
+ mIsAccent(isAccent)
+ { }
+
+ virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth = 0
+ ) const;
+ };
+
+
+ // Represents a space. This may or not actually end up as MathML markup,
+ // depending on a variety of things.
+ struct Space : Node
+ {
+ // mWidth is the width of the space, measured in mu.
+ // (18mu = 1em in normal font size.)
+ // It may be negative.
+ int mWidth;
+
+ // This flag indicates whether the space was requested by the user
+ // via a TeX spacing command like "\quad". False means that blahtex
+ // computed the space (according to TeX's rules).
+ bool mIsUserRequested;
+
+ Space(
+ int width,
+ bool isUserRequested
+ ) :
+ Node(cStyleDisplay, cFlavourOrd, cLimitsDisplayLimits, 0),
+ mWidth(width),
+ mIsUserRequested(isUserRequested)
+ { }
+
+ virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth = 0
+ ) const;
+ };
+
+
+ // Represents a base with a subscript and/or a superscript,
+ // OR a base with an underscript and/or an overscript.
+ struct Scripts : Node
+ {
+ // Any of the following three fields may be NULL (i.e. empty).
+ std::auto_ptr<Node> mBase, mUpper, mLower;
+
+ // True means sub/superscript; false means under/overscript.
+ //
+ // (This flag is computed from e.g. the "limits" setting of mBase,
+ // and from the current TeX style.)
+ bool mIsSideset;
+
+ Scripts(
+ Style style,
+ Flavour flavour,
+ Limits limits,
+ RGBColour colour,
+ bool isSideset,
+ std::auto_ptr<Node> base,
+ std::auto_ptr<Node> upper,
+ std::auto_ptr<Node> lower
+ ) :
+ Node(style, flavour, limits, colour),
+ mIsSideset(isSideset),
+ mBase(base),
+ mUpper(upper),
+ mLower(lower)
+ { }
+
+ virtual void Optimise();
+
+ virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth = 0
+ ) const;
+ };
+
+
+ // Represents something that will get translated as <mfrac>.
+ struct Fraction : Node
+ {
+ std::auto_ptr<Node> mNumerator, mDenominator;
+
+ // Does the fraction need a visible line?
+ // True for ordinary vanilla fractions; false for things like
+ // binomial coefficients.
+ bool mIsLineVisible;
+
+ Fraction(
+ Style style,
+ RGBColour colour,
+ std::auto_ptr<Node> numerator,
+ std::auto_ptr<Node> denominator,
+ bool isLineVisible
+ ) :
+ Node(style, cFlavourOrd, cLimitsDisplayLimits, colour),
+ mNumerator(numerator),
+ mDenominator(denominator),
+ mIsLineVisible(isLineVisible)
+ { }
+
+ virtual void Optimise();
+
+ virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth = 0
+ ) const;
+ };
+
+
+ // Represents an expression between a pair of delimiters.
+ //
+ // (Blahtex doesn't translate this using <mfenced>, because then we
+ // couldn't use more exotic (non-ASCII) fences in the "open" and
+ // "close" attributes.)
+ struct Fenced : Node
+ {
+ // The opening and closing delimiters, i.e. the text that goes
+ // inside <mo>...</mo>.
+ std::wstring mLeftDelimiter, mRightDelimiter;
+
+ // The expression being surrounded by fences.
+ std::auto_ptr<Node> mChild;
+
+ Fenced(
+ Style style,
+ RGBColour colour,
+ const std::wstring& leftDelimiter,
+ const std::wstring& rightDelimiter,
+ std::auto_ptr<Node> child
+ ) :
+ Node(style, cFlavourInner, cLimitsDisplayLimits, colour),
+ mLeftDelimiter(leftDelimiter),
+ mRightDelimiter(rightDelimiter),
+ mChild(child)
+ { }
+
+ virtual void Optimise();
+
+ virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth = 0
+ ) const;
+ };
+
+
+ // Represents an expression under a square root sign; i.e. something
+ // translated as <msqrt>.
+ struct Sqrt : Node
+ {
+ // The expression under the radical.
+ std::auto_ptr<Node> mChild;
+
+ Sqrt(
+ std::auto_ptr<Node> child,
+ RGBColour colour
+ ) :
+ Node(child->mStyle, cFlavourOrd, cLimitsDisplayLimits, colour),
+ mChild(child)
+ { }
+
+ virtual void Optimise();
+
+ virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth = 0
+ ) const;
+ };
+
+
+ // Represents an expression under a general radical sign; i.e. something
+ // translated as <mroot>.
+ struct Root : Node
+ {
+ // The expressions under and outside the radical.
+ std::auto_ptr<Node> mInside, mOutside;
+
+ Root(
+ std::auto_ptr<Node> inside,
+ std::auto_ptr<Node> outside,
+ RGBColour colour
+ ) :
+ Node(inside->mStyle, cFlavourOrd, cLimitsDisplayLimits, colour),
+ mInside(inside),
+ mOutside(outside)
+ { }
+
+ virtual void Optimise();
+
+ virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth = 0
+ ) const;
+ };
+
+
+ // Represents something translated as <mtable>.
+ struct Table : Node
+ {
+ // Array of rows of table entries.
+ std::vector<std::vector<Node*> > mRows;
+
+ // These values describe the possible alignment values for the
+ // table. Most environments (e.g. "matrix", "pmatrix") use
+ // cAlignCentre. The environments "cases" uses cAlignLeft (all table
+ // entries aligned to the left). cAlignRightLeft alternates columns
+ // aligned right and left; it's used for the "aligned" environment.
+ enum Align
+ {
+ cAlignLeft,
+ cAlignCentre,
+ cAlignRightLeft
+ }
+ mAlign;
+
+ // How much space to put between rows of the table. Currently
+ // "tight" is used for "\substack" blocks, everything else
+ // gets "normal".
+ enum RowSpacing
+ {
+ cRowSpacingNormal,
+ cRowSpacingTight
+ }
+ mRowSpacing;
+
+ Table(
+ Style style,
+ RGBColour colour,
+ RowSpacing rowSpacing = cRowSpacingNormal
+ ) :
+ Node(style, cFlavourOrd, cLimitsDisplayLimits, colour),
+ mAlign(cAlignCentre),
+ mRowSpacing(rowSpacing)
+ { }
+
+ ~Table();
+
+ virtual void Optimise();
+
+ virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
+ const MathmlOptions& options,
+ const MathmlEnvironment& inheritedEnvironment,
+ unsigned& nodeCount
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth = 0
+ ) const;
+ };
+
+} // end LayoutTree namespace
+
+
+// This struct records some information about the rendering environment for
+// a portion of the MathML tree. It is used when building the MathML tree
+// to decide when it is necessary to insert additional <mstyle> tags.
+struct MathmlEnvironment
+{
+ // The "displaystyle" and "scriptlevel" attributes.
+ bool mDisplayStyle;
+ int mScriptLevel;
+
+ // The "mathcolor" attribute.
+ RGBColour mColour;
+
+ MathmlEnvironment(
+ bool displayStyle = false,
+ int scriptLevel = 0,
+ RGBColour colour = 0
+ ) :
+ mDisplayStyle(displayStyle),
+ mScriptLevel(scriptLevel),
+ mColour(colour)
+ { }
+
+ // This constructor determines the displayStyle and scriptLevel settings
+ // corresponding to the given TeX style.
+ MathmlEnvironment(
+ LayoutTree::Node::Style style,
+ RGBColour colour
+ );
+};
+
+}
+
+#endif
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/BlahtexCore/MacroProcessor.cpp b/blahtexml/source/BlahtexCore/MacroProcessor.cpp
new file mode 100644
index 0000000..f1a066d
--- /dev/null
+++ b/blahtexml/source/BlahtexCore/MacroProcessor.cpp
@@ -0,0 +1,332 @@
+// File "MacroProcessor.cpp"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include "MacroProcessor.h"
+
+using namespace std;
+
+namespace blahtex
+{
+
+// Implemented in Parser.cpp:
+extern bool IsInTokenTables(const wstring& token);
+
+// If the input string ends with "Reserved", this function strips it off.
+// All other input is returned unharmed.
+//
+// The purpose is to convert internal commands like "\textReserved" into
+// plain old "\text" for error reporting purposes.
+wstring StripReservedSuffix(const wstring& input)
+{
+ if (input.size() >= 8 &&
+ input.substr(input.size() - 8, 8) == L"Reserved"
+ )
+ return input.substr(0, input.size() - 8);
+ else
+ return input;
+}
+
+MacroProcessor::MacroProcessor(const vector<wstring>& input)
+{
+ copy(input.rbegin(), input.rend(), inserter(mTokens, mTokens.begin()));
+ mCostIncurred = input.size();
+ mIsTokenReady = false;
+}
+
+void MacroProcessor::Advance()
+{
+ if (!mTokens.empty())
+ {
+ mTokens.pop_back();
+ mCostIncurred++;
+ mIsTokenReady = false;
+ }
+}
+
+void MacroProcessor::SkipWhitespace()
+{
+ while (Peek() == L" ")
+ Advance();
+}
+
+void MacroProcessor::SkipWhitespaceRaw()
+{
+ while (!mTokens.empty() && mTokens.back() == L" ")
+ Advance();
+}
+
+bool MacroProcessor::ReadArgument(vector<wstring>& output)
+{
+ SkipWhitespaceRaw();
+ if (mTokens.empty())
+ // Missing argument
+ return false;
+
+ wstring token = mTokens.back();
+ mTokens.pop_back();
+ mCostIncurred++;
+ if (token == L"}")
+ // Argument can't start with "}"
+ return false;
+
+ if (token == L"{")
+ {
+ // Keep track of brace nesting depth so we know which is the
+ // matching closing brace
+ int braceDepth = 1;
+ while (!mTokens.empty())
+ {
+ mCostIncurred++;
+ wstring token = mTokens.back();
+ mTokens.pop_back();
+ if (token == L"{")
+ braceDepth++;
+ else if (token == L"}" && --braceDepth == 0)
+ break;
+ output.push_back(token);
+ }
+ if (braceDepth > 0)
+ throw Exception(L"UnmatchedOpenBrace");
+ }
+ else
+ output.push_back(token);
+
+ mIsTokenReady = false;
+ return true;
+}
+
+wstring MacroProcessor::Get()
+{
+ wstring token = Peek();
+ Advance();
+ return token;
+}
+
+void MacroProcessor::HandleNewcommand()
+{
+ // pop the "\newcommand" command:
+ mTokens.pop_back();
+ mCostIncurred++;
+
+ // gobble opening brace
+ SkipWhitespaceRaw();
+ if (mTokens.empty() || mTokens.back() != L"{")
+ throw Exception(L"MissingOpenBraceAfter", L"\\newcommand");
+ mTokens.pop_back();
+
+ // grab new command being defined
+ SkipWhitespaceRaw();
+ if (mTokens.empty() ||
+ mTokens.back().empty() ||
+ mTokens.back()[0] != L'\\'
+ )
+ throw Exception(L"MissingCommandAfterNewcommand");
+ wstring newCommand = mTokens.back();
+ if (mMacros.count(newCommand) || IsInTokenTables(newCommand))
+ throw Exception(
+ L"IllegalRedefinition",
+ StripReservedSuffix(newCommand)
+ );
+ mTokens.pop_back();
+
+ // gobble close brace
+ SkipWhitespaceRaw();
+ if (mTokens.empty())
+ throw Exception(L"UnmatchedOpenBrace");
+ if (mTokens.back() != L"}")
+ throw Exception(L"MissingCommandAfterNewcommand");
+ mTokens.pop_back();
+
+ Macro& macro = mMacros[newCommand];
+
+ SkipWhitespaceRaw();
+ // Determine the number of arguments, if specified.
+ if (!mTokens.empty() && mTokens.back() == L"[")
+ {
+ mTokens.pop_back();
+
+ SkipWhitespaceRaw();
+ if (mTokens.empty() || mTokens.back().size() != 1)
+ throw Exception(L"MissingOrIllegalParameterCount", newCommand);
+ macro.mParameterCount = static_cast<int>(mTokens.back()[0] - L'0');
+ if (macro.mParameterCount <= 0 || macro.mParameterCount > 9)
+ throw Exception(L"MissingOrIllegalParameterCount", newCommand);
+ mTokens.pop_back();
+
+ SkipWhitespaceRaw();
+ if (mTokens.empty() || mTokens.back() != L"]")
+ throw Exception(L"UnmatchedOpenBracket");
+ mTokens.pop_back();
+ }
+
+ // Read and store the tokens which make up the macro replacement.
+ if (!ReadArgument(macro.mReplacement))
+ throw Exception(L"NotEnoughArguments", L"\\newcommand");
+}
+
+wstring MacroProcessor::Peek()
+{
+ while (!mTokens.empty())
+ {
+ // This is the only place that we check that the user hasn't
+ // exceeded the token limit.
+ if (mTokens.size() + (++mCostIncurred) >= cMaxParseCost)
+ throw Exception(L"TooManyTokens");
+
+ if (mIsTokenReady)
+ return mTokens.back();
+
+ // "\sqrt" needs special handling due to its optional argument.
+ // Something like "\sqrtReserved{x}" gets converted to "\sqrt{x}".
+ // Something like "\sqrtReserved[y]{x}" gets converted to
+ // "\rootReserved{y}{x}".
+ //
+ // (Blahtex doesn't handle grouping of [...] the same way as texvc;
+ // it does it the TeX way. For example, "\sqrt[\sqrt[2]{3}]{4}"
+ // generates an error, whereas it is valid in texvc.)
+ //
+ // We need to take into account grouping braces,
+ // e.g. "\sqrt[{]}]{2}" should be valid.
+ if (mTokens.back() == L"\\sqrtReserved")
+ {
+ mTokens.pop_back();
+
+ SkipWhitespaceRaw();
+ if (!mTokens.empty() && mTokens.back() == L"[")
+ {
+ mTokens.back() = L"{";
+
+ vector<wstring>::reverse_iterator ptr = mTokens.rbegin();
+ ptr++;
+
+ int braceDepth = 0;
+ while (ptr != mTokens.rend() &&
+ (braceDepth > 0 || *ptr != L"]")
+ )
+ {
+ mCostIncurred++;
+ if (*ptr == L"{")
+ braceDepth++;
+ else if (*ptr == L"}")
+ {
+ if (--braceDepth < 0)
+ throw Exception(L"UnmatchedCloseBrace");
+ }
+ ptr++;
+ }
+ if (ptr == mTokens.rend())
+ throw Exception(L"UnmatchedOpenBracket");
+ if (*ptr != L"]")
+ throw Exception(L"NotEnoughArguments", L"\\sqrt");
+ *ptr = L"}";
+ mTokens.push_back(L"\\rootReserved");
+ mIsTokenReady = true;
+ return L"\\rootReserved";
+ }
+ else
+ {
+ mTokens.push_back(L"\\sqrt");
+ mIsTokenReady = true;
+ return L"\\sqrt";
+ }
+ }
+ else
+ {
+ wstring token = mTokens.back();
+ wishful_hash_map<wstring, Macro>::const_iterator
+ macroPtr = mMacros.find(token);
+ if (macroPtr == mMacros.end())
+ {
+ // In this case it's not "\sqrt" and not a macro, so
+ // we're finished here.
+ mIsTokenReady = true;
+ return token;
+ }
+
+ const Macro& macro = macroPtr->second;
+ mTokens.pop_back();
+
+ // It's a macro. Determines the arguments to substitute in....
+ vector<vector<wstring> > arguments(macro.mParameterCount);
+ for (int argumentIndex = 0;
+ argumentIndex < macro.mParameterCount;
+ argumentIndex++
+ )
+ if (!ReadArgument(arguments[argumentIndex]))
+ throw Exception(
+ L"NotEnoughArguments",
+ StripReservedSuffix(token)
+ );
+
+ // ... and now write the replacement, substituting
+ // arguments as we go.
+ const vector<wstring>& replacement = macro.mReplacement;
+ vector<wstring> output;
+ for (vector<wstring>::const_iterator
+ source = replacement.begin();
+ source != replacement.end();
+ source++
+ )
+ {
+ mCostIncurred++;
+ if (*source == L"#")
+ {
+ if (++source == replacement.end() ||
+ source->size() != 1
+ )
+ throw Exception(
+ L"MissingOrIllegalParameterIndex",
+ token
+ );
+
+ int parameterIndex
+ = static_cast<int>((*source)[0] - '1');
+
+ // FIX: perhaps this next error should be flagged when
+ // reading the definition of the macro rather than
+ // during macro expansion
+ if (parameterIndex < 0 ||
+ parameterIndex >= macro.mParameterCount
+ )
+ throw Exception(
+ L"MissingOrIllegalParameterIndex",
+ token
+ );
+ copy(
+ arguments[parameterIndex].begin(),
+ arguments[parameterIndex].end(),
+ back_inserter(output)
+ );
+ mCostIncurred += arguments[parameterIndex].size();
+ }
+ else
+ output.push_back(*source);
+ }
+ copy(output.rbegin(), output.rend(), back_inserter(mTokens));
+ mCostIncurred += output.size();
+ }
+ }
+
+ return L"";
+}
+
+}
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/BlahtexCore/MacroProcessor.h b/blahtexml/source/BlahtexCore/MacroProcessor.h
new file mode 100644
index 0000000..3fdc377
--- /dev/null
+++ b/blahtexml/source/BlahtexCore/MacroProcessor.h
@@ -0,0 +1,125 @@
+// File "MacroProcessor.h"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#ifndef BLAHTEX_MACROPROCESSOR_H
+#define BLAHTEX_MACROPROCESSOR_H
+
+#include <string>
+#include <vector>
+#include <map>
+#include "Misc.h"
+
+namespace blahtex
+{
+
+// The time spent by the parser should be O(cMaxParseCost).
+// The aim is to prevent a nasty user inducing exponential time via
+// tricky macro definitions.
+const unsigned cMaxParseCost = 20000;
+
+
+// MacroProcessor maintains a stack of tokens, can be queried for the next
+// available token, and expands macros automatically. It is the layer
+// between tokenising (handled by the Manager class) and parsing proper
+// (handled by the Parser class).
+//
+// It does not process "\newcommand" commands automatically; instead it
+// passes "\newcommand" straight back to the caller, and the caller is
+// responsible for calling MacroProcessor::HandleNewcommand.
+// (Rationale: this gives results much closer to real TeX parsing. For
+// example, we wouldn't want "x^\newcommand{\stuff}{xyz}\stuff" to be
+// construed as legal input.)
+class MacroProcessor
+{
+public:
+ // Input is a vector of strings, one for each input token.
+ MacroProcessor(const std::vector<std::wstring>& input);
+
+ // Returns the next token on the stack (without removing it), after
+ // expanding macros.
+ // Returns empty string if there are no tokens left.
+ std::wstring Peek();
+
+ // Same as Peek(), but also removes the token.
+ // Returns empty string if there are no tokens left.
+ std::wstring Get();
+
+ // Pops the current token.
+ void Advance();
+
+ // Pops consecutive whitespace tokens.
+ void SkipWhitespace();
+
+ // Assuming that "\newcommand" has just been seen and popped off the
+ // stack, this function processes a subsequent macro definition.
+ void HandleNewcommand();
+
+private:
+
+ // Records information about a single macro.
+ struct Macro
+ {
+ // The number of parameters the macro accepts. (Blahtex doesn't
+ // handle optional arguments.)
+ int mParameterCount;
+
+ // The sequence of tokens that get substituted when this macro is
+ // expanded. Arguments are indicated as follows: first the string
+ // "#", and then the string "n", where n is a number between 1 and
+ // 9, indicating which argument to substitute.
+ std::vector<std::wstring> mReplacement;
+
+ Macro() :
+ mParameterCount(0)
+ { }
+ };
+
+ // List of all currently recognised macros.
+ wishful_hash_map<std::wstring, Macro> mMacros;
+
+ // The token stack; the top of the stack is mTokens.back().
+ std::vector<std::wstring> mTokens;
+
+ // This flag is set if we have already ascertained that the current
+ // token doesn't need to undergo macro expansion.
+ // (This is just an optimisation so that successive calls to Peek/Get
+ // don't have to do extra work.)
+ bool mIsTokenReady;
+
+ // Reads a single macro argument; that is, either a single token, or if
+ // that token is "{", reads all the way up to the matching "}". The
+ // argument (not including delimiting braces) is appended to "output".
+ //
+ // Returns true on success, or false if the argument is missing.
+ bool ReadArgument(std::vector<std::wstring>& output);
+
+ // Skips whitespace without expanding macros.
+ void SkipWhitespaceRaw();
+
+ // Total approximate cost of parsing activity so far.
+ // (See cMaxParseCost.)
+ unsigned mCostIncurred;
+};
+
+}
+
+#endif
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/BlahtexCore/Manager.cpp b/blahtexml/source/BlahtexCore/Manager.cpp
new file mode 100644
index 0000000..25d5a05
--- /dev/null
+++ b/blahtexml/source/BlahtexCore/Manager.cpp
@@ -0,0 +1,553 @@
+// File "Manager.cpp"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include <sstream>
+#include <stdexcept>
+#include "Manager.h"
+#include "Parser.h"
+
+using namespace std;
+
+namespace blahtex
+{
+
+
+// I don't entirely trust the wide versions of isalpha etc, so this
+// function does the job instead.
+bool IsAlphabetic(wchar_t c)
+{
+ return (c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z');
+}
+
+
+// Tokenise() splits the given input into tokens, each represented by a
+// string. The output is APPENDED to "output".
+//
+// There are several types of tokens:
+// * single characters like "a", or "{", or single non-ASCII unicode
+// characters
+// * alphabetic commands like "\frac"
+// * commands like "\," which have a single nonalphabetic character
+// after the backslash
+// * commands like "\ " which have their whitespace collapsed,
+// stored as "\ "
+// * other consecutive whitespace characters which get collapsed to
+// just " "
+// * the sequence "\begin { stuff }" gets stored as the single token
+// "\begin{ stuff }". Note that whitespace is preserved between the
+// braces but not between "\begin" and "{". Similarly for "\end".
+void Tokenise(const wstring& input, vector<wstring>& output)
+{
+ wstring::const_iterator ptr = input.begin();
+
+ while (ptr != input.end())
+ {
+ // merge adjacent whitespace
+ if (iswspace(*ptr))
+ {
+ output.push_back(L" ");
+ do
+ ptr++;
+ while (ptr != input.end() && iswspace(*ptr));
+ }
+ // boring single character tokens
+ else if (*ptr != L'\\')
+ {
+ // Disallow non-printable, non-whitespace ASCII
+ if (*ptr < L' ' || *ptr == 0x7F)
+ throw Exception(L"IllegalCharacter");
+ output.push_back(wstring(1, *ptr++));
+ }
+ else
+ {
+ // tokens starting with backslash
+ wstring token = L"\\";
+
+ if (++ptr == input.end())
+ throw Exception(L"IllegalFinalBackslash");
+ if (IsAlphabetic(*ptr))
+ {
+ // plain alphabetic commands
+ do
+ token += *ptr++;
+ while (ptr != input.end() && IsAlphabetic(*ptr));
+
+ // Special treatment for "\begin" and "\end"; need to
+ // collapse "\begin {xyz}" to "\begin{xyz}", and store it
+ // as a single token.
+ if (token == L"\\begin" || token == L"\\end")
+ {
+ while (ptr != input.end() && iswspace(*ptr))
+ ptr++;
+ if (ptr == input.end() || *ptr != L'{')
+ throw Exception(L"MissingOpenBraceAfter", token);
+ token += *ptr++;
+ while (ptr != input.end() && *ptr != L'}')
+ token += *ptr++;
+ if (ptr == input.end())
+ throw Exception(L"UnmatchedOpenBrace");
+ token += *ptr++;
+ }
+ }
+ else if (iswspace(*ptr))
+ {
+ // commands like "\ "
+ token += L" ";
+ do
+ ptr++;
+ while (ptr != input.end() && iswspace(*ptr));
+ }
+ // commands like "\," and "\;"
+ else
+ token += *ptr++;
+
+ output.push_back(token);
+ }
+ }
+}
+
+
+wstring Manager::gTexvcCompatibilityMacros =
+
+ // First we have some macros which are not part of tex/latex/amslatex
+ // but which texvc recognises, so for backward compatibility we define
+ // them here too. Most of these are apparently intended to cater for
+ // those more familiar with HTML entities.
+
+ L"\\newcommand{\\R}{{\\mathbb R}}"
+ L"\\newcommand{\\Reals}{\\R}"
+ L"\\newcommand{\\reals}{\\R}"
+ L"\\newcommand{\\Z}{{\\mathbb Z}}"
+ L"\\newcommand{\\N}{{\\mathbb N}}"
+ L"\\newcommand{\\natnums}{\\N}"
+ L"\\newcommand{\\Complex}{{\\mathbb C}}"
+ L"\\newcommand{\\cnums}{\\Complex}"
+ L"\\newcommand{\\alefsym}{\\aleph}"
+ L"\\newcommand{\\alef}{\\aleph}"
+ L"\\newcommand{\\larr}{\\leftarrow}"
+ L"\\newcommand{\\rarr}{\\rightarrow}"
+ L"\\newcommand{\\Larr}{\\Leftarrow}"
+ L"\\newcommand{\\lArr}{\\Leftarrow}"
+ L"\\newcommand{\\Rarr}{\\Rightarrow}"
+ L"\\newcommand{\\rArr}{\\Rightarrow}"
+ L"\\newcommand{\\uarr}{\\uparrow}"
+ L"\\newcommand{\\uArr}{\\Uparrow}"
+ L"\\newcommand{\\Uarr}{\\Uparrow}"
+ L"\\newcommand{\\darr}{\\downarrow}"
+ L"\\newcommand{\\dArr}{\\Downarrow}"
+ L"\\newcommand{\\Darr}{\\Downarrow}"
+ L"\\newcommand{\\lrarr}{\\leftrightarrow}"
+ L"\\newcommand{\\harr}{\\leftrightarrow}"
+ L"\\newcommand{\\Lrarr}{\\Leftrightarrow}"
+ L"\\newcommand{\\Harr}{\\Leftrightarrow}"
+ L"\\newcommand{\\lrArr}{\\Leftrightarrow}"
+ // The next one looks like a typo in the texvc source code:
+ L"\\newcommand{\\hAar}{\\Leftrightarrow}"
+ L"\\newcommand{\\sub}{\\subset}"
+ L"\\newcommand{\\supe}{\\supseteq}"
+ L"\\newcommand{\\sube}{\\subseteq}"
+ L"\\newcommand{\\infin}{\\infty}"
+ L"\\newcommand{\\lang}{\\langle}"
+ L"\\newcommand{\\rang}{\\rangle}"
+ L"\\newcommand{\\real}{\\Re}"
+ L"\\newcommand{\\image}{\\Im}"
+ L"\\newcommand{\\bull}{\\bullet}"
+ L"\\newcommand{\\weierp}{\\wp}"
+ L"\\newcommand{\\isin}{\\in}"
+ L"\\newcommand{\\plusmn}{\\pm}"
+ L"\\newcommand{\\Dagger}{\\ddagger}"
+ L"\\newcommand{\\exist}{\\exists}"
+ L"\\newcommand{\\sect}{\\S}"
+ L"\\newcommand{\\clubs}{\\clubsuit}"
+ L"\\newcommand{\\spades}{\\spadesuit}"
+ L"\\newcommand{\\hearts}{\\heartsuit}"
+ L"\\newcommand{\\diamonds}{\\diamondsuit}"
+ L"\\newcommand{\\sdot}{\\cdot}"
+ L"\\newcommand{\\ang}{\\angle}"
+ L"\\newcommand{\\thetasym}{\\theta}"
+ L"\\newcommand{\\Alpha}{A}"
+ L"\\newcommand{\\Beta}{B}"
+ L"\\newcommand{\\Epsilon}{E}"
+ L"\\newcommand{\\Zeta}{Z}"
+ L"\\newcommand{\\Eta}{H}"
+ L"\\newcommand{\\Iota}{I}"
+ L"\\newcommand{\\Kappa}{K}"
+ L"\\newcommand{\\Mu}{M}"
+ L"\\newcommand{\\Nu}{N}"
+ L"\\newcommand{\\Rho}{P}"
+ L"\\newcommand{\\Tau}{T}"
+ L"\\newcommand{\\Chi}{X}"
+ L"\\newcommand{\\arccot}{\\operatorname{arccot}}"
+ L"\\newcommand{\\arcsec}{\\operatorname{arcsec}}"
+ L"\\newcommand{\\arccsc}{\\operatorname{arccsc}}"
+ L"\\newcommand{\\sgn}{\\operatorname{sgn}}"
+
+ // The commands in this next group are defined in tex/latex/amslatex,
+ // but they don't get mapped to what texvc thinks (e.g. "\part" is used
+ // in typesetting books to mean a unit somewhat larger than a chapter,
+ // like "Part IV").
+ //
+ // We'll stick to the way texvc does it, especially since wikipedia has
+ // quite a number of equations using them.
+ L"\\newcommand{\\empty}{\\emptyset}"
+ L"\\newcommand{\\and}{\\wedge}"
+ L"\\newcommand{\\or}{\\vee}"
+ L"\\newcommand{\\part}{\\partial}"
+;
+
+wstring Manager::gStandardMacros =
+
+ // The next group are standard TeX/LaTeX/AMS-LaTeX synonyms.
+ L"\\newcommand{\\|}{\\Vert}"
+ L"\\newcommand{\\implies}{\\;\\Longrightarrow\\;}"
+ L"\\newcommand{\\neg}{\\lnot}"
+ L"\\newcommand{\\ne}{\\neq}"
+ L"\\newcommand{\\ge}{\\geq}"
+ L"\\newcommand{\\le}{\\leq}"
+ L"\\newcommand{\\land}{\\wedge}"
+ L"\\newcommand{\\lor}{\\vee}"
+ L"\\newcommand{\\gets}{\\leftarrow}"
+ L"\\newcommand{\\to}{\\rightarrow}"
+ L"\\newcommand{\\doublecap}{\\Cap}"
+ L"\\newcommand{\\restriction}{\\upharpoonright}"
+ L"\\newcommand{\\llless}{\\lll}"
+ L"\\newcommand{\\gggtr}{\\ggg}"
+ L"\\newcommand{\\Doteq}{\\doteqdot}"
+ L"\\newcommand{\\doublecup}{\\Cup}"
+ L"\\newcommand{\\dasharrow}{\\dashleftarrow}"
+ L"\\newcommand{\\vartriangleleft}{\\lhd}"
+ L"\\newcommand{\\vartriangleright}{\\rhd}"
+ L"\\newcommand{\\trianglelefteq}{\\unlhd}"
+ L"\\newcommand{\\trianglerighteq}{\\unrhd}"
+ L"\\newcommand{\\Join}{\\bowtie}"
+ L"\\newcommand{\\Diamond}{\\lozenge}"
+
+ // The amsfonts package accepts the following two commands, but warns
+ // that they are obsolete, so let's just quietly replace them.
+ L"\\newcommand{\\Bbb}{\\mathbb}"
+ L"\\newcommand{\\bold}{\\mathbf}"
+
+ // Now we come to the xxxReserved commands. These are all implemented
+ // as macros in TeX, so for maximum compatibility, we want to treat
+ // their arguments the way a TeX macro does. The strategy is the
+ // following. First, in Manager::ProcessInput, we convert e.g. "\mbox"
+ // into "\mboxReserved". Then, the MacroProcessor object sees e.g.
+ // "\mboxReserved A" and converts it to "\mbox{A}". This simplifies
+ // things enormously for the parser, since now it can treat "\mbox"
+ // and "\hbox" in the same way. ("\hbox" requires braces around its
+ // argument, even if it's just a single character.) This strategy also
+ // keeps TeX happy when we send off the purified TeX, since TeX doesn't
+ // care about the extra braces.
+
+ L"\\newcommand{\\mboxReserved} [1]{\\mbox{#1}}"
+ L"\\newcommand{\\substackReserved} [1]{\\substack{#1}}"
+ L"\\newcommand{\\oversetReserved} [2]{\\overset{#1}{#2}}"
+ L"\\newcommand{\\undersetReserved} [2]{\\underset{#1}{#2}}"
+
+ // The following are all similar, but they get extra "safety braces"
+ // placed around them. For example, "x^\frac yz" is legal, because it
+ // becomes "x^{y \over z}".
+
+ L"\\newcommand{\\textReserved} [1]{{\\text{#1}}}"
+ L"\\newcommand{\\textitReserved} [1]{{\\textit{#1}}}"
+ L"\\newcommand{\\textrmReserved} [1]{{\\textrm{#1}}}"
+ L"\\newcommand{\\textbfReserved} [1]{{\\textbf{#1}}}"
+ L"\\newcommand{\\textsfReserved} [1]{{\\textsf{#1}}}"
+ L"\\newcommand{\\textttReserved} [1]{{\\texttt{#1}}}"
+ L"\\newcommand{\\emphReserved} [1]{{\\emph{#1}}}"
+ L"\\newcommand{\\fracReserved} [2]{{\\frac{#1}{#2}}}"
+ L"\\newcommand{\\mathrmReserved} [1]{{\\mathrm{#1}}}"
+ L"\\newcommand{\\mathbfReserved} [1]{{\\mathbf{#1}}}"
+ L"\\newcommand{\\mathbbReserved} [1]{{\\mathbb{#1}}}"
+ L"\\newcommand{\\mathitReserved} [1]{{\\mathit{#1}}}"
+ L"\\newcommand{\\mathcalReserved} [1]{{\\mathcal{#1}}}"
+ L"\\newcommand{\\mathfrakReserved} [1]{{\\mathfrak{#1}}}"
+ L"\\newcommand{\\mathttReserved} [1]{{\\mathtt{#1}}}"
+ L"\\newcommand{\\mathsfReserved} [1]{{\\mathsf{#1}}}"
+ L"\\newcommand{\\bigReserved} [1]{{\\big#1}}"
+ L"\\newcommand{\\biggReserved} [1]{{\\bigg#1}}"
+ L"\\newcommand{\\BigReserved} [1]{{\\Big#1}}"
+ L"\\newcommand{\\BiggReserved} [1]{{\\Bigg#1}}"
+
+ L"\\newcommand{\\japReserved} [1]{{\\jap{#1}}}"
+ L"\\newcommand{\\cyrReserved} [1]{{\\cyr{#1}}}"
+;
+
+vector<wstring> Manager::gStandardMacrosTokenised;
+vector<wstring> Manager::gTexvcCompatibilityMacrosTokenised;
+
+Manager::Manager()
+{
+ if (sizeof(RGBColour) != 4)
+ throw runtime_error("The \"unsigned\" type is not 4 bytes wide!");
+
+ // Tokenise the standard macros if it hasn't been done already.
+
+ if (gTexvcCompatibilityMacrosTokenised.empty())
+ Tokenise(
+ gTexvcCompatibilityMacros,
+ gTexvcCompatibilityMacrosTokenised
+ );
+
+ if (gStandardMacrosTokenised.empty())
+ Tokenise(gStandardMacros, gStandardMacrosTokenised);
+
+ mStrictSpacingRequested = false;
+}
+
+void Manager::ProcessInput(const wstring& input, bool texvcCompatibility)
+{
+ // Here are all the commands which get "Reserved" tacked on the end
+ // before the MacroProcessor sees them:
+
+ static wstring reservedCommandArray[] =
+ {
+ L"\\sqrt",
+ L"\\mbox",
+ L"\\text",
+ L"\\textit",
+ L"\\textrm",
+ L"\\textbf",
+ L"\\textsf",
+ L"\\texttt",
+ L"\\jap",
+ L"\\cyr",
+ L"\\emph",
+ L"\\frac",
+ L"\\mathrm",
+ L"\\mathbf",
+ L"\\mathbb",
+ L"\\mathit",
+ L"\\mathcal",
+ L"\\mathfrak",
+ L"\\mathtt",
+ L"\\mathsf",
+ L"\\big",
+ L"\\bigg",
+ L"\\Big",
+ L"\\Bigg",
+ L"\\overset",
+ L"\\underset",
+ L"\\substack"
+ };
+ static wishful_hash_set<wstring> reservedCommandTable(
+ reservedCommandArray,
+ END_ARRAY(reservedCommandArray)
+ );
+
+ vector<wstring> inputTokens;
+ Tokenise(input, inputTokens);
+
+ mStrictSpacingRequested = false;
+
+ // Check that the user hasn't supplied any input directly containing the
+ // "Reserved" suffix, and add Reserved suffixes appropriately.
+ //
+ // Also search for magic commands (currently the only magic command is
+ // "\strictspacing")
+ for (vector<wstring>::iterator
+ ptr = inputTokens.begin();
+ ptr != inputTokens.end();
+ ptr++
+ )
+ {
+ if (reservedCommandTable.count(*ptr))
+ *ptr += L"Reserved";
+
+ else if (
+ ptr->size() >= 8 &&
+ ptr->substr(ptr->size() - 8, 8) == L"Reserved"
+ )
+ throw Exception(L"ReservedCommand", *ptr);
+
+ else if (*ptr == L"\\strictspacing")
+ {
+ mStrictSpacingRequested = true;
+ *ptr = L" ";
+ }
+ }
+
+ vector<wstring> tokens;
+
+ // Append the texvc-compatibility and standard macros where appropriate.
+
+ if (texvcCompatibility)
+ tokens = gTexvcCompatibilityMacrosTokenised;
+
+ copy(
+ gStandardMacrosTokenised.begin(),
+ gStandardMacrosTokenised.end(),
+ back_inserter(tokens)
+ );
+ copy(inputTokens.begin(), inputTokens.end(), back_inserter(tokens));
+
+ // Generate the parse tree and the layout tree.
+ Parser P;
+ mParseTree = P.DoParse(tokens);
+ mHasDelayedMathmlError = false;
+
+ try
+ {
+ TexProcessingState topState;
+ topState.mStyle = LayoutTree::Node::cStyleText;
+ topState.mColour = 0;
+ mLayoutTree = mParseTree->BuildLayoutTree(topState);
+ mLayoutTree->Optimise();
+ }
+ catch (Exception& e)
+ {
+ // Some types of error need to returned as MathML errors, not
+ // parsing errors.
+ if (e.GetCode() == L"UnavailableSymbolFontCombination")
+ {
+ mHasDelayedMathmlError = true;
+ mDelayedMathmlError = e;
+ mLayoutTree.reset(NULL);
+ }
+ else
+ throw e;
+ }
+}
+
+
+auto_ptr<MathmlNode> Manager::GenerateMathml(
+ const MathmlOptions& options
+) const
+{
+ if (mHasDelayedMathmlError)
+ throw mDelayedMathmlError;
+
+ if (!mLayoutTree.get())
+ throw logic_error(
+ "Layout tree not yet built in Manager::GenerateMathml"
+ );
+
+ MathmlOptions optionsCopy = options;
+ if (mStrictSpacingRequested)
+ // Override the spacing control setting if the "\strictspacing"
+ // command appeared somewhere in the input.
+ optionsCopy.mSpacingControl = MathmlOptions::cSpacingControlStrict;
+
+ // Build the MathML tree. The nodeCount variables counts the number
+ // of nodes being generated; if too many appear, an exception is thrown.
+ unsigned nodeCount = 0;
+ auto_ptr<MathmlNode> root = mLayoutTree->BuildMathmlTree(
+ optionsCopy,
+ MathmlEnvironment(LayoutTree::Node::cStyleText, RGBColour(0)),
+ nodeCount
+ );
+
+ return root;
+}
+
+
+wstring Manager::GeneratePurifiedTex(
+ const PurifiedTexOptions& options
+) const
+{
+ if (!mParseTree.get())
+ throw logic_error(
+ "Parse tree not yet built in Manager::GeneratePurifiedTex"
+ );
+
+ wostringstream os;
+ LatexFeatures features;
+ mParseTree->GetPurifiedTex(os, features, cFontEncodingDefault);
+ wstring latex = os.str();
+
+ if (features.mNeedsX2 || features.mNeedsCJK)
+ {
+ features.mNeedsUcs = true;
+ features.mNeedsAmsmath = true; // for the "\text" command
+ }
+
+ // Generate purified tex output
+
+ wostringstream output;
+
+ output <<
+ L"\\nonstopmode\n"
+ L"\\documentclass[12pt]{article}\n";
+
+ if (features.mNeedsAmsmath)
+ output << L"\\usepackage{amsmath}\n";
+ if (features.mNeedsAmsfonts)
+ output << L"\\usepackage{amsfonts}\n";
+ if (features.mNeedsAmssymb)
+ output << L"\\usepackage{amssymb}\n";
+ if (features.mNeedsColor)
+ output << L"\\usepackage[dvips,usenames]{color}\n";
+
+ if (features.mNeedsUcs)
+ {
+ if (!options.mAllowUcs)
+ throw Exception(L"LatexPackageUnavailable", L"ucs");
+
+ output << L"\\usepackage[utf8x]{inputenc}\n";
+ }
+
+ if (features.mNeedsX2)
+ output <<
+ L"\\usepackage[X2,T1]{fontenc}\n"
+ L"\\newcommand{\\cyr}[1]{\\text{"
+ L"\\bgroup\\fontencoding{X2}\\selectfont #1\\egroup}}\n";
+
+ if (features.mNeedsCJK)
+ {
+ if (!options.mAllowCJK)
+ throw Exception(L"LatexPackageUnavailable", L"CJK");
+
+ output << L"\\usepackage{CJK}\n";
+
+ if (features.mNeedsJapaneseFont)
+ {
+ if (options.mJapaneseFont.empty())
+ throw Exception(L"LatexFontNotSpecified", L"japanese");
+
+ output
+ << L"\\newcommand{\\jap}[1]{\\text{\\begin{CJK}{UTF8}{"
+ << options.mJapaneseFont
+ << L"}#1\\end{CJK}}}\n";
+ }
+ }
+
+ if (options.mAllowPreview)
+ output << L"\\usepackage[active]{preview}\n";
+ else
+ output << L"\\pagestyle{empty}\n";
+
+ output << L"\\begin{document}\n";
+
+ if (options.mAllowPreview)
+ output << L"\\begin{preview}\n";
+
+ output << L"$\n" << latex << L"\n$\n";
+
+ if (options.mAllowPreview)
+ output << L"\\end{preview}\n";
+
+ output << L"\\end{document}\n";
+
+ return output.str();
+}
+
+}
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/BlahtexCore/Manager.h b/blahtexml/source/BlahtexCore/Manager.h
new file mode 100644
index 0000000..44b5a22
--- /dev/null
+++ b/blahtexml/source/BlahtexCore/Manager.h
@@ -0,0 +1,127 @@
+// File "Manager.h"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#ifndef BLAHTEX_MANAGER_H
+#define BLAHTEX_MANAGER_H
+
+#include <string>
+#include <vector>
+#include <memory>
+#include <set>
+#include "Misc.h"
+#include "MathmlNode.h"
+#include "LayoutTree.h"
+#include "ParseTree.h"
+
+namespace blahtex
+{
+
+// The Manager class coordinates all the bits and pieces required to convert
+// the given TeX input into MathML and purified TeX output, including
+// tokenising, texvc-compatiblity macros, building the parse and layout
+// trees, deciding which LaTeX packages to include, converting mathvariant
+// to MathML version 1 fonts.
+//
+// The Manager class could be used as an interface between the blahtex core
+// and an external program; alternatively, the Interface class (see
+// Interface.h) provides a simpler interface.
+
+class Manager
+{
+public:
+ Manager();
+
+ // ProcessInput generates a parse tree and a layout tree from the
+ // supplied input.
+ //
+ // If texvcCompatibility is set, then ProcessInput will append a series
+ // of macros to emulate various non-standard commands that texvc
+ // recognises (see gTexvcCompatibilityMacros). This corresponds to the
+ // command line option "--texvc-compatible-commands".
+ void ProcessInput(
+ const std::wstring& input,
+ bool texvcCompatibility = false
+ );
+
+ // GenerateMathml generates a XML tree containing MathML markup.
+ // Returns the root node.
+ std::auto_ptr<MathmlNode> GenerateMathml(
+ const MathmlOptions& options
+ ) const;
+
+ // GeneratePurifiedTex returns a string containing a complete TeX file
+ // (including any required \usepackage commands) that could be fed to
+ // LaTeX to produce a graphical version of the input.
+ std::wstring GeneratePurifiedTex(
+ const PurifiedTexOptions& options
+ ) const;
+
+ // A few accessor functions.
+ const ParseTree::MathNode* GetParseTree() const
+ {
+ return mParseTree.get();
+ }
+
+ const LayoutTree::Node* GetLayoutTree() const
+ {
+ return mLayoutTree.get();
+ }
+
+private:
+ // These store the parse tree and layout tree generated by ProcessInput.
+ std::auto_ptr<ParseTree::MathNode> mParseTree;
+ std::auto_ptr<LayoutTree::Node> mLayoutTree;
+
+ // This flag is set if the user has requested "strict spacing" rules
+ // (see SpacingControl) via the magic "\strictspacing" command.
+ bool mStrictSpacingRequested;
+
+ // There are a handful of errors that get picked up during the layout
+ // tree building phase, but which we want to return as MathML-related
+ // errors; i.e. we can still run PNG generation. If one of these
+ // happens, we cache it in mDelayedMathmlError, and return it when
+ // someone tries to GenerateMathml().
+ // FIX: this is a bit hacky and badly designed.
+ // Come back and fix it up one day.
+ bool mHasDelayedMathmlError;
+ Exception mDelayedMathmlError;
+
+ // gStandardMacros is a string which, in effect, gets inserted at the
+ // beginning of any input string handled by ProcessInput. It contains
+ // a sequence of macro definitions ("\newcommand"s) which set up some
+ // standard TeX synonyms.
+ static std::wstring gStandardMacros;
+
+ // gTexvcCompatibilityMacros is similar; it contains definitions for
+ // commands recognised by texvc but that are not standard TeX/LaTeX/
+ // AMS-LaTeX. (See also the texvcCompatibility flag.)
+ static std::wstring gTexvcCompatibilityMacros;
+
+ // Tokenised version of gStandardMacros and gTexvcCompatibilityMacros
+ // (computed only once, when first used):
+ static std::vector<std::wstring> gStandardMacrosTokenised;
+ static std::vector<std::wstring> gTexvcCompatibilityMacrosTokenised;
+};
+
+}
+
+#endif
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/BlahtexCore/MathmlNode.cpp b/blahtexml/source/BlahtexCore/MathmlNode.cpp
new file mode 100644
index 0000000..6c49dd8
--- /dev/null
+++ b/blahtexml/source/BlahtexCore/MathmlNode.cpp
@@ -0,0 +1,330 @@
+// File "MathmlNode.cpp"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// blahtexml (version 0.5)
+// Copyright (C) 2007-2008, Gilles Van Assche
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include <iomanip>
+#include <sstream>
+#include <stdexcept>
+#include "MathmlNode.h"
+#include "XmlEncode.h"
+
+#ifdef BLAHTEXML_USING_XERCES
+#include <xercesc/util/XMLString.hpp>
+#include "../BlahtexXMLin/AttributesImpl.h"
+#include "../BlahtexXMLin/XercesString.h"
+#endif
+
+using namespace std;
+
+namespace blahtex
+{
+
+// Strings for each MathML "mathvariant" value.
+wstring gMathmlFontStrings[] =
+{
+ L"normal",
+ L"bold",
+ L"italic",
+ L"bold-italic",
+ L"double-struck",
+ L"bold-fraktur",
+ L"script",
+ L"bold-script",
+ L"fraktur",
+ L"sans-serif",
+ L"bold-sans-serif",
+ L"sans-serif-italic",
+ L"sans-serif-bold-italic",
+ L"monospace"
+};
+
+
+MathmlNode::~MathmlNode()
+{
+ for (list<MathmlNode*>::iterator
+ p = mChildren.begin(); p != mChildren.end(); p++
+ )
+ delete *p;
+}
+
+
+void MathmlNode::AddFontAttributes(
+ MathmlFont desiredFont,
+ const MathmlOptions& options
+)
+{
+ if (options.mUseVersion1FontAttributes)
+ {
+ // MathML version 1.x fonts requested.
+
+ if (
+ desiredFont == cMathmlFontDoubleStruck ||
+ desiredFont == cMathmlFontBoldFraktur ||
+ desiredFont == cMathmlFontScript ||
+ desiredFont == cMathmlFontBoldScript ||
+ desiredFont == cMathmlFontFraktur
+ )
+ {
+ // The only way we can end up here is with a fraktur <mn>.
+ // TeX has decent fraktur digits, but unicode doesn't seem to
+ // list them. (FIX: this might be changing in an upcoming
+ // revision of unicode. I thought I saw the stix fonts website
+ // mention something.) Therefore we can't access them with
+ // version 1 font attributes, so let's just map it to bold
+ // instead.
+ if (mType == cTypeMn &&
+ (
+ desiredFont == cMathmlFontFraktur ||
+ desiredFont == cMathmlFontBoldFraktur
+ )
+ )
+ mAttributes[cAttributeFontweight] = L"bold";
+ else
+ throw logic_error(
+ "Unexpected font/symbol combination "
+ "in MathmlNode::AddFontAttributes"
+ );
+ }
+ else
+ {
+
+ bool defaultItalic = (mType == cTypeMi && mText.size() == 1);
+
+ bool desiredItalic = (
+ desiredFont == cMathmlFontItalic ||
+ desiredFont == cMathmlFontBoldItalic ||
+ desiredFont == cMathmlFontSansSerifItalic ||
+ desiredFont == cMathmlFontSansSerifBoldItalic
+ );
+
+ if (defaultItalic != desiredItalic)
+ mAttributes[cAttributeFontstyle] =
+ desiredItalic ? L"italic" : L"normal";
+
+ if (
+ desiredFont == cMathmlFontBold ||
+ desiredFont == cMathmlFontBoldItalic ||
+ desiredFont == cMathmlFontBoldSansSerif ||
+ desiredFont == cMathmlFontSansSerifBoldItalic
+ )
+ mAttributes[cAttributeFontweight] = L"bold";
+
+ if (
+ desiredFont == cMathmlFontSansSerif ||
+ desiredFont == cMathmlFontBoldSansSerif ||
+ desiredFont == cMathmlFontSansSerifItalic ||
+ desiredFont == cMathmlFontSansSerifBoldItalic
+ )
+ mAttributes[cAttributeFontfamily] = L"sans-serif";
+
+ else if (desiredFont == cMathmlFontMonospace)
+ mAttributes[cAttributeFontfamily] = L"monospace";
+ }
+ }
+ else
+ {
+ // MathML version 2.0 fonts requested.
+
+ MathmlFont defaultFont =
+ (mType == cTypeMi && mText.size() == 1)
+ ? cMathmlFontItalic : cMathmlFontNormal;
+
+ if (desiredFont != defaultFont)
+ mAttributes[cAttributeMathvariant] =
+ gMathmlFontStrings[desiredFont];
+ }
+}
+
+
+void WriteIndent(
+ wostream& os,
+ int depth
+)
+{
+ for (int i = 0; i < depth; i++)
+ os << L" ";
+}
+
+ static wstring gTypeArray[] =
+ {
+ L"mi",
+ L"mo",
+ L"mn",
+ L"mspace",
+ L"mtext",
+ L"mrow",
+ L"mstyle",
+ L"msub",
+ L"msup",
+ L"msubsup",
+ L"munder",
+ L"mover",
+ L"munderover",
+ L"mfrac",
+ L"msqrt",
+ L"mroot",
+ L"mtable",
+ L"mtr",
+ L"mtd",
+ L"mpadded"
+ };
+
+void MathmlNode::PrintType(wostream& os) const
+{
+ if (mType < 0 || mType >= sizeof(gTypeArray))
+ throw logic_error("Illegal node type in MathmlNode::PrintType");
+
+ os << gTypeArray[mType];
+}
+
+ static wstring gAttributeArray[] =
+ {
+ L"displaystyle",
+ L"scriptlevel",
+ L"mathvariant",
+ L"mathcolor",
+ L"lspace",
+ L"rspace",
+ L"width",
+ L"stretchy",
+ L"minsize",
+ L"maxsize",
+ L"accent",
+ L"movablelimits",
+ L"linethickness",
+ L"columnalign",
+ L"columnspacing",
+ L"rowspacing",
+ L"fontfamily",
+ L"fontstyle",
+ L"fontweight"
+ };
+
+void MathmlNode::PrintAttributes(wostream& os) const
+{
+ for (map<Attribute, wstring>::const_iterator
+ attribute = mAttributes.begin();
+ attribute != mAttributes.end();
+ attribute++
+ )
+ {
+ if (
+ attribute->first < 0 ||
+ attribute->first >= sizeof(gAttributeArray)
+ )
+ throw logic_error(
+ "Illegal attribute in MathmlNode::PrintAttributes"
+ );
+
+ os << L" " << gAttributeArray[attribute->first] << L"=\""
+ << attribute->second << L"\"";
+ }
+}
+
+void MathmlNode::Print(
+ wostream& os,
+ const EncodingOptions& options,
+ bool indent,
+ int depth
+) const
+{
+ if (indent)
+ WriteIndent(os, depth);
+
+ os << L"<";
+ PrintType(os);
+ PrintAttributes(os);
+ if (mText.empty() && mChildren.empty())
+ os << L"/>";
+ else
+ {
+ if (!mText.empty())
+ {
+ // is a leaf node with text
+ os << L">" << XmlEncode(mText, options);
+ }
+ else
+ {
+ // is a internal node with at least one child
+ os << L">";
+ if (indent)
+ os << endl;
+
+ for (list<MathmlNode*>::const_iterator
+ child = mChildren.begin(); child != mChildren.end(); child++
+ )
+ (*child)->Print(os, options, indent, depth + 1);
+
+ if (indent)
+ WriteIndent(os, depth);
+ }
+
+ os << L"</";
+ PrintType(os);
+ os << L">";
+ }
+
+ if (indent)
+ os << endl;
+}
+
+#ifdef BLAHTEXML_USING_XERCES
+void MathmlNode::PrintAsSAX2(ContentHandler& sax, const wstring& prefix, bool ignoreFirstmrow) const
+{
+ if (mType < 0 || mType >= sizeof(gTypeArray))
+ throw logic_error("Illegal node type in MathmlNode::PrintType");
+ bool skipElement = ignoreFirstmrow && (mType == cTypeMrow) && (mAttributes.size() == 0) && (mText.empty());
+ if (skipElement) {
+ for (list<MathmlNode*>::const_iterator child = mChildren.begin(); child != mChildren.end(); ++child)
+ (*child)->PrintAsSAX2(sax, prefix, false);
+ }
+ else {
+ XercesString elementLocalName(gTypeArray[mType]);
+ XercesString elementQName((prefix == L"") ? gTypeArray[mType] : (prefix + L":" + gTypeArray[mType]));
+ AttributesImpl attributes;
+ for (map<Attribute, wstring>::const_iterator attribute = mAttributes.begin();
+ attribute != mAttributes.end(); ++attribute) {
+ if (attribute->first < 0 || attribute->first >= sizeof(gAttributeArray))
+ throw logic_error("Illegal attribute in MathmlNode::PrintAttributes");
+ XercesString localPart(gAttributeArray[attribute->first]);
+ XercesString qName = localPart;
+ XercesString uri;
+ XercesString value(attribute->second);
+ XercesString type;
+ attributes.addAttribute(qName, uri, localPart, value, type);
+ }
+ XercesString MathMLnamespace("http://www.w3.org/1998/Math/MathML");
+ sax.startElement(MathMLnamespace.c_str(), elementLocalName.c_str(), elementQName.c_str(), attributes);
+ if (!mText.empty()) {
+ XercesString text(mText);
+ sax.characters(text.data(), text.size());
+ }
+ for (list<MathmlNode*>::const_iterator child = mChildren.begin(); child != mChildren.end(); ++child)
+ (*child)->PrintAsSAX2(sax, prefix, false);
+ sax.endElement(MathMLnamespace.c_str(), elementLocalName.c_str(), elementQName.c_str());
+ }
+}
+#endif
+
+}
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/BlahtexCore/MathmlNode.h b/blahtexml/source/BlahtexCore/MathmlNode.h
new file mode 100644
index 0000000..dc4307a
--- /dev/null
+++ b/blahtexml/source/BlahtexCore/MathmlNode.h
@@ -0,0 +1,173 @@
+// File "MathmlNode.h"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// blahtexml (version 0.5)
+// Copyright (C) 2007-2008, Gilles Van Assche
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#ifndef BLAHTEX_MATHMLNODE_H
+#define BLAHTEX_MATHMLNODE_H
+
+#include <iostream>
+#include <map>
+#include <list>
+#include <string>
+#include "Misc.h"
+
+#ifdef BLAHTEXML_USING_XERCES
+#include <xercesc/sax2/ContentHandler.hpp>
+XERCES_CPP_NAMESPACE_USE
+#endif
+
+namespace blahtex
+{
+
+// MathmlFont lists all possible MathML "mathvariant" values. Blahtex
+// uses these to record fonts in the layout tree; they get converted to
+// MathML 1.x font attributes if needed.
+enum MathmlFont
+{
+ cMathmlFontNormal,
+ cMathmlFontBold,
+ cMathmlFontItalic,
+ cMathmlFontBoldItalic,
+ cMathmlFontDoubleStruck,
+ cMathmlFontBoldFraktur,
+ cMathmlFontScript,
+ cMathmlFontBoldScript,
+ cMathmlFontFraktur,
+ cMathmlFontSansSerif,
+ cMathmlFontBoldSansSerif,
+ cMathmlFontSansSerifItalic,
+ cMathmlFontSansSerifBoldItalic,
+ cMathmlFontMonospace
+};
+
+// String versions of the MathML mathvariant fonts.
+// (See enum MathmlFont in LayoutTree.h.)
+extern std::wstring gMathmlFontStrings[];
+
+
+// Represents a node in an MathML tree.
+struct MathmlNode
+{
+ enum Type
+ {
+ // Leaf nodes types ("token elements" in MathML documentation):
+ cTypeMi,
+ cTypeMo,
+ cTypeMn,
+ cTypeMspace,
+ cTypeMtext,
+
+ // Internal nodes types:
+ cTypeMrow,
+ cTypeMstyle,
+ cTypeMsub,
+ cTypeMsup,
+ cTypeMsubsup,
+ cTypeMunder,
+ cTypeMover,
+ cTypeMunderover,
+ cTypeMfrac,
+ cTypeMsqrt,
+ cTypeMroot,
+ cTypeMtable,
+ cTypeMtr,
+ cTypeMtd,
+ cTypeMpadded
+ }
+ mType;
+
+ enum Attribute
+ {
+ cAttributeDisplaystyle,
+ cAttributeScriptlevel,
+ cAttributeMathvariant,
+ cAttributeMathcolor,
+ cAttributeLspace,
+ cAttributeRspace,
+ cAttributeWidth,
+ cAttributeStretchy,
+ cAttributeMinsize,
+ cAttributeMaxsize,
+ cAttributeAccent,
+ cAttributeMovablelimits,
+ cAttributeLinethickness,
+ cAttributeColumnalign,
+ cAttributeColumnspacing,
+ cAttributeRowspacing,
+ cAttributeFontfamily,
+ cAttributeFontstyle,
+ cAttributeFontweight
+ };
+
+ std::map<Attribute, std::wstring> mAttributes;
+
+ // mText is only used for leaf nodes: it holds the text that is
+ // displayed between the opening and closing tags
+ std::wstring mText;
+
+ // mChildren is only used for internal nodes
+ std::list<MathmlNode*> mChildren;
+
+ MathmlNode(Type type, const std::wstring& text = L"") :
+ mType(type),
+ mText(text)
+ { }
+
+ ~MathmlNode();
+
+ // This function adds mathvariant (for MathML 2.0) or fontstyle/
+ // fontweight/fontfamily (for MathML 1.x) as appropriate to this node
+ // to obtain the desired font. It knows about MathML defaults (like the
+ // annoying automatic italic for single character <mi> nodes).
+ void AddFontAttributes(
+ MathmlFont desiredFont,
+ const MathmlOptions& options
+ );
+
+
+ // Print() recursively prints the tree rooted at this node to the
+ // given output stream.
+ //
+ // XML entities translated according to EncodingOptions.
+ //
+ // If "indent" is true, it will print each tag pair on a new line, and
+ // add appropriate indenting.
+ void Print(
+ std::wostream& os,
+ const EncodingOptions& options,
+ bool indent,
+ int depth = 0
+ ) const;
+
+ // Used internally by Print:
+ void PrintType(std::wostream& os) const;
+ void PrintAttributes(std::wostream& os) const;
+#ifdef BLAHTEXML_USING_XERCES
+ void PrintAsSAX2(ContentHandler& sax, const std::wstring& prefix, bool ignoreFirstmrow) const;
+#endif
+};
+
+}
+
+#endif
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/BlahtexCore/Misc.h b/blahtexml/source/BlahtexCore/Misc.h
new file mode 100644
index 0000000..97353dc
--- /dev/null
+++ b/blahtexml/source/BlahtexCore/Misc.h
@@ -0,0 +1,222 @@
+// File "Misc.h"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#ifndef BLAHTEX_MISC_H
+#define BLAHTEX_MISC_H
+
+
+#include <set>
+#include <vector>
+#include <string>
+
+
+// I use wishful_hash_set/map wherever I really want to use hash_set/map.
+// Unfortunately hash_set/map is not quite standard enough yet, so for now
+// it just gets mapped to set/map.
+#define wishful_hash_map std::map
+#define wishful_hash_set std::set
+
+
+// The macro END_ARRAY is used in several places to simplify code that
+// constructs an STL container from an array of data.
+// (Yes, I hate macros too. Sorry.)
+#define END_ARRAY(zzz_array) \
+ ((zzz_array) + sizeof(zzz_array)/sizeof((zzz_array)[0]))
+
+
+namespace blahtex
+{
+
+
+// Stores colours in 0x00rrggbb format.
+// Better be 32 bits wide!
+typedef unsigned RGBColour;
+
+
+// The blahtex core throws an Exception object when it detects the input is
+// invalid in some way. This doesn't include more serious things like debug
+// assertions (these are thrown as std::logic_error).
+//
+// Each exception consists of an identifying string (mCode) plus zero or
+// more arguments (mArgs). This scheme is designed to facilitate
+// localisation of error messages. A complete list of corresponding
+// messages in English is provided in Messages.cpp (not part of the
+// blahtex core).
+class Exception
+{
+private:
+ std::wstring mCode;
+ std::vector<std::wstring> mArgs;
+
+public:
+ Exception()
+ {
+ }
+
+ Exception(
+ const std::wstring& code,
+ const std::wstring& arg1 = L"",
+ const std::wstring& arg2 = L"",
+ const std::wstring& arg3 = L""
+ ) :
+ mCode(code)
+ {
+ if (!arg1.empty())
+ mArgs.push_back(arg1);
+ if (!arg2.empty())
+ mArgs.push_back(arg2);
+ if (!arg3.empty())
+ mArgs.push_back(arg3);
+ }
+
+ const std::wstring& GetCode() const
+ {
+ return mCode;
+ }
+
+ const std::vector<std::wstring>& GetArgs() const
+ {
+ return mArgs;
+ }
+};
+
+
+// EncodingOptions describes output character encoding options.
+struct EncodingOptions
+{
+ // mMathmlEncoding tells what to do with non-ASCII MathML characters.
+ // It corresponds to the "--mathml-encoding" option on the command line.
+ enum MathmlEncoding
+ {
+ cMathmlEncodingRaw, // directly in unicode
+ cMathmlEncodingNumeric, // use e.g. "&#x2329;"
+ cMathmlEncodingShort, // use e.g. "&lang;"
+ cMathmlEncodingLong // use e.g. "&LeftAngleBracket;"
+ }
+ mMathmlEncoding;
+
+ // mOtherEncodingRaw tells what to do with non-ASCII, non-MathML
+ // characters:
+ // * true means use unicode directly
+ // * false means use e.g. "&#x1234;"
+ bool mOtherEncodingRaw;
+
+ // mAllowPlane1 tells whether to allow unicode plane-1 characters.
+ // (This facility is included because some browsers don't have decent
+ // support for plane 1 characters.)
+ //
+ // If this flag is NOT set, then blahtex will never output things like
+ // "&#x1d504;", even when mMathmlEncoding is set to cMathmlEncodingRaw
+ // or cMathmlEncodingNumeric. Instead it will fall back on something
+ // like "&Afr;".
+ //
+ // (This flag is also present in struct MathmlOptions.)
+ bool mAllowPlane1;
+
+ EncodingOptions() :
+ mMathmlEncoding(cMathmlEncodingNumeric),
+ mOtherEncodingRaw(false),
+ mAllowPlane1(true)
+ { }
+};
+
+
+// MathmlOptions stores options that affect the MathML output.
+struct MathmlOptions
+{
+ // mSpacingControl controls blahtex's MathML spacing markup output. It
+ // corresponds to the command line "--spacing" option.
+ //
+ // Blahtex always uses TeX's rules (or an approximation thereof) to
+ // determine spacing, but the SpacingControl values describe how much of
+ // the time it actually outputs markup (<mspace>, lspace, rspace) to
+ // implement its spacing decisions.
+ //
+ // cSpacingControlStrict:
+ // Blahtex outputs spacing commands everywhere possible, doesn't
+ // leave any choice to the MathML renderer.
+ //
+ // cSpacingControlModerate:
+ // Blahtex outputs spacing commands where it thinks a typical MathML
+ // renderer is likely to do something visually unsatisfactory
+ // without additional help. The aim is to get good agreement with
+ // TeX without overly bloated MathML markup. (It's very difficult
+ // to get this right, so I expect it to be under continual review.)
+ //
+ // cSpacingControlRelaxed:
+ // Blahtex only outputs spacing commands when the user specifically
+ // asks for them, using TeX commands like "\," or "\quad".
+ enum SpacingControl
+ {
+ cSpacingControlStrict,
+ cSpacingControlModerate,
+ cSpacingControlRelaxed
+ }
+ mSpacingControl;
+
+ // If mUseVersion1FontAttributes is set, blahtex will use MathML version
+ // 1 font attributes (fontstyle, fontweight, fontfamily) instead of
+ // mathvariant, and it will handle the fancier fonts (script,
+ // bold-script, fraktur, bold-fraktur, double-struck) by explicitly
+ // using appropriate MathML entities (e.g. "&Afr;").
+ bool mUseVersion1FontAttributes;
+
+ // Discussed at struct EncodingOptions.
+ bool mAllowPlane1;
+
+ MathmlOptions() :
+ mSpacingControl(cSpacingControlStrict),
+ mUseVersion1FontAttributes(false),
+ mAllowPlane1(true)
+ { }
+};
+
+
+// This class contains options to control how blahtex generates
+// "purified Tex", that is, the .tex file which is sent to LaTeX to
+// generate PNG output.
+struct PurifiedTexOptions
+{
+ // Blahtex may use "\usepackage[utf8x]{inputenc}" (which also requires
+ // the "ucs" package)
+ bool mAllowUcs;
+
+ // Blahtex may use "\usepackage{CJK}"
+ bool mAllowCJK;
+
+ // Blahtex may use the "preview" package.
+ bool mAllowPreview;
+
+ // The font name (e.g. "ipam") which gets passed to "\begin{CJK}..."
+ // for handling japanese, or blank if no font is available.
+ std::wstring mJapaneseFont;
+
+ PurifiedTexOptions() :
+ mAllowUcs(false),
+ mAllowCJK(false),
+ mAllowPreview(false)
+ { }
+};
+
+}
+
+#endif
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/BlahtexCore/ParseTree.h b/blahtexml/source/BlahtexCore/ParseTree.h
new file mode 100644
index 0000000..1651fc0
--- /dev/null
+++ b/blahtexml/source/BlahtexCore/ParseTree.h
@@ -0,0 +1,896 @@
+// File "ParseTree.h"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#ifndef BLAHTEX_PARSETREE_H
+#define BLAHTEX_PARSETREE_H
+
+// Everything here is implemented variously in ParseTree1.cpp,
+// ParseTree2.cpp and ParseTree3.cpp.
+
+#include <memory>
+#include "LayoutTree.h"
+
+// The ParseTree namespace contains all classes representing nodes in the
+// parse tree. This is essentially a tree representation of the input
+// TeX expression, with as little additional processing done as possible.
+// The idea is that the "purified TeX" should be easily reconstructible
+// from the parse tree.
+
+namespace blahtex
+{
+
+// This struct describes (approximately) a TeX font during math mode.
+struct TexMathFont
+{
+ enum Family
+ {
+ cFamilyDefault, // indicates default font
+ // (e.g. "x" gets italics, "1" gets roman)
+ cFamilyRm, // roman
+ cFamilyBf, // bold
+ cFamilyIt, // italics
+ cFamilySf, // sans serif
+ cFamilyTt, // typewriter
+ cFamilyBb, // blackboard bold
+ cFamilyCal, // calligraphic
+ cFamilyFrak // fraktur
+ }
+ mFamily;
+
+ // Whether or not we are in "\boldsymbol" mode (from AMS packages).
+ // This seems to be mostly orthogonal to the family. (I haven't
+ // studied carefully how this is implemented in TeX.)
+ bool mIsBoldsymbol;
+
+ TexMathFont(
+ Family family = cFamilyDefault,
+ bool isBoldsymbol = false
+ ) :
+ mFamily(family),
+ mIsBoldsymbol(isBoldsymbol)
+ { }
+
+ // This function finds the closest MathML font (i.e. value of
+ // mathvariant) which matches this TeX font.
+ MathmlFont GetMathmlApproximation() const;
+};
+
+// This struct describes (approximately) a TeX font during text mode.
+struct TexTextFont
+{
+ enum Family
+ {
+ cFamilyRm, // roman
+ cFamilySf, // sans serif
+ cFamilyTt // typewriter
+ }
+ mFamily;
+
+ bool mIsBold;
+ bool mIsItalic;
+
+ TexTextFont(
+ Family family = cFamilyRm,
+ bool isBold = false,
+ bool isItalic = false
+ ) :
+ mFamily(family),
+ mIsBold(isBold),
+ mIsItalic(isItalic)
+ { }
+
+ // This function finds the closest MathML font (i.e. value of
+ // mathvariant) which matches this TeX font.
+ MathmlFont GetMathmlApproximation() const;
+};
+
+
+// This struct represents some state information during the parse tree =>
+// layout tree building phase (i.e. while within BuildLayoutTree).
+struct TexProcessingState
+{
+ TexMathFont mMathFont;
+ TexTextFont mTextFont;
+ LayoutTree::Node::Style mStyle;
+ RGBColour mColour;
+};
+
+
+// This struct keeps track of the packages, encodings etc, that LaTeX will
+// need to be able to handle the given input.
+struct LatexFeatures
+{
+ // Requires amsmath, amsfonts, amssymb packages
+ bool mNeedsAmsmath;
+ bool mNeedsAmsfonts;
+ bool mNeedsAmssymb;
+
+ // Requires "\usepackage[utf8x]{inputenc}".
+ bool mNeedsUcs;
+
+ // Requires the "color" package.
+ bool mNeedsColor;
+
+ // Requires "X2" font encoding (for cyrillic).
+ bool mNeedsX2;
+
+ // Requires the "CJK" package.
+ bool mNeedsCJK;
+
+ // Needs a japanese font to be installed.
+ bool mNeedsJapaneseFont;
+
+ LatexFeatures() :
+ mNeedsAmsmath(false),
+ mNeedsAmsfonts(false),
+ mNeedsAmssymb(false),
+ mNeedsUcs(false),
+ mNeedsColor(false),
+ mNeedsX2(false),
+ mNeedsCJK(false),
+ mNeedsJapaneseFont(false)
+ { }
+
+ // Given the LaTeX command "command", checks to see if any of the above
+ // flags need to be switched on for that command to work.
+ void Update(const std::wstring& command);
+};
+
+
+// While preparing the purified TeX, blahtex keeps track of something
+// approximating the current font encoding. E.g. while in X2 encoding, only
+// cyrillic characters and whitespace are allowed.
+enum FontEncoding
+{
+ cFontEncodingDefault,
+ cFontEncodingCyrillic,
+ cFontEncodingJapanese
+};
+
+
+namespace ParseTree
+{
+ // Base class for nodes in the parse tree.
+ struct Node
+ {
+ virtual ~Node()
+ { };
+
+ // This function converts the parse tree under this node into a
+ // layout tree. This is where most of blahtex's hard work is done.
+ virtual std::auto_ptr<LayoutTree::Node> BuildLayoutTree(
+ const TexProcessingState& state
+ ) const = 0;
+
+ // This function converts the parse tree under this node to
+ // "purified TeX"; that is, TeX markup that can get sent to LaTeX
+ // for PNG generation. Output gets written to the supplied stream.
+ //
+ // This (obviously) does not include the file header and footer;
+ // see Manager::GeneratePurifiedTex for that.
+ //
+ // The "features" object is used to store a list of e.g. LaTeX
+ // packages that will be required to handle the given output.
+ virtual void GetPurifiedTex(
+ std::wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+ ) const = 0;
+
+ // Print() recursively prints the parse tree under this node.
+ // Debugging use only.
+ virtual void Print(
+ std::wostream& os,
+ int depth = 0
+ ) const = 0;
+ };
+
+
+ // MathNode represents any node occurring during math mode.
+ struct MathNode : Node
+ {
+ };
+
+ // TextNode represents any node occurring during text mode.
+ struct TextNode : Node
+ {
+ };
+
+
+ // Represents any command like "a", "1", "\alpha", "\int" which blahtex
+ // treats as a single symbol. Also includes spacing commands like "\,".
+ struct MathSymbol : MathNode
+ {
+ // The command, e.g. "a", "\alpha".
+ std::wstring mCommand;
+
+ MathSymbol(const std::wstring& command) :
+ mCommand(command)
+ { }
+
+ virtual std::auto_ptr<LayoutTree::Node> BuildLayoutTree(
+ const TexProcessingState& state
+ ) const;
+
+ virtual void GetPurifiedTex(
+ std::wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth
+ ) const;
+ };
+
+ // Represents a command taking a single argument.
+ struct MathCommand1Arg : MathNode
+ {
+ // The command, e.g. "\hat", "\mathop".
+ std::wstring mCommand;
+
+ // Node corresponding to the argument of the command.
+ std::auto_ptr<MathNode> mChild;
+
+ MathCommand1Arg(
+ const std::wstring& command,
+ std::auto_ptr<MathNode> child
+ ) :
+ mCommand(command),
+ mChild(child)
+ { }
+
+ virtual std::auto_ptr<LayoutTree::Node> BuildLayoutTree(
+ const TexProcessingState& state
+ ) const;
+
+ virtual void GetPurifiedTex(
+ std::wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth
+ ) const;
+ };
+
+
+ // Represents a TeX state change command like "\rm", "\scriptstyle",
+ // "\color".
+ struct MathStateChange : MathNode
+ {
+ // The style change command, e.g. "\scriptstyle".
+ std::wstring mCommand;
+
+ MathStateChange(
+ const std::wstring& command
+ ) :
+ mCommand(command)
+ { }
+
+ // Modifies "state" according to the state change command.
+ virtual void Apply(
+ TexProcessingState& state
+ ) const;
+
+ virtual std::auto_ptr<LayoutTree::Node> BuildLayoutTree(
+ const TexProcessingState& state
+ ) const;
+
+ virtual void GetPurifiedTex(
+ std::wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth
+ ) const;
+ };
+
+
+ // Represents a "\color{xyz}" command in math mode.
+ struct MathColour : MathStateChange
+ {
+ // The colour name, e.g. "red".
+ std::wstring mColourName;
+
+ MathColour(
+ const std::wstring& colourName
+ ) :
+ MathStateChange(L"\\color"),
+ mColourName(colourName)
+ { }
+
+ virtual void Apply(
+ TexProcessingState& state
+ ) const;
+
+ virtual std::auto_ptr<LayoutTree::Node> BuildLayoutTree(
+ const TexProcessingState& state
+ ) const;
+
+ virtual void GetPurifiedTex(
+ std::wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth
+ ) const;
+ };
+
+
+ // Represents a command taking two arguments, including infix commands.
+ struct MathCommand2Args : MathNode
+ {
+ // The command, e.g. "\frac", "\choose".
+ std::wstring mCommand;
+
+ // The two arguments.
+ std::auto_ptr<MathNode> mChild1, mChild2;
+
+ // This flag is set for infix commands like "\over".
+ bool mIsInfix;
+
+ MathCommand2Args(
+ const std::wstring& command,
+ std::auto_ptr<MathNode> child1,
+ std::auto_ptr<MathNode> child2,
+ bool isInfix
+ ) :
+ mCommand(command),
+ mChild1(child1),
+ mChild2(child2),
+ mIsInfix(isInfix)
+ { }
+
+ virtual std::auto_ptr<LayoutTree::Node> BuildLayoutTree(
+ const TexProcessingState& state
+ ) const;
+
+ virtual void GetPurifiedTex(
+ std::wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth
+ ) const;
+ };
+
+
+ // Represents a "big" command like "\big", "\bigg" etc.
+ struct MathBig : MathNode
+ {
+ // The command, e.g. "\big".
+ std::wstring mCommand;
+
+ // The delimiter that the big command is applied to, e.g. "\langle".
+ std::wstring mDelimiter;
+
+ MathBig(
+ const std::wstring& command,
+ const std::wstring& delimiter
+ ) :
+ mCommand(command),
+ mDelimiter(delimiter)
+ { }
+
+ virtual std::auto_ptr<LayoutTree::Node> BuildLayoutTree(
+ const TexProcessingState& state
+ ) const;
+
+ virtual void GetPurifiedTex(
+ std::wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth
+ ) const;
+ };
+
+
+ // Represents material surrounded by grouping braces, e.g. "{abc}" gets
+ // stored as a MathGroup node whose child contains "abc".
+ struct MathGroup : MathNode
+ {
+ // The enclosed material.
+ std::auto_ptr<MathNode> mChild;
+
+ MathGroup(std::auto_ptr<MathNode> child) :
+ mChild(child)
+ { }
+
+ virtual std::auto_ptr<LayoutTree::Node> BuildLayoutTree(
+ const TexProcessingState& state
+ ) const;
+
+ virtual void GetPurifiedTex(
+ std::wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth
+ ) const;
+ };
+
+
+ // Represents a sequence of nodes in math mode, concatenated together.
+ // e.g. "a\alpha 2" is stored as a MathList containing three MathSymbol
+ // nodes.
+ struct MathList : MathNode
+ {
+ std::vector<MathNode*> mChildren;
+
+ ~MathList();
+
+ virtual std::auto_ptr<LayoutTree::Node> BuildLayoutTree(
+ const TexProcessingState& state
+ ) const;
+
+ virtual void GetPurifiedTex(
+ std::wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth
+ ) const;
+ };
+
+
+ // Represents a base with a superscript and/or subscript.
+ // (i.e. an expression like "x^y_z".)
+ struct MathScripts : MathNode
+ {
+ // All three fields are optional (NULL indicates an empty field).
+ std::auto_ptr<MathNode> mBase, mUpper, mLower;
+
+ virtual std::auto_ptr<LayoutTree::Node> BuildLayoutTree(
+ const TexProcessingState& state
+ ) const;
+
+ virtual void GetPurifiedTex(
+ std::wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth
+ ) const;
+ };
+
+
+ // Represents a "limits" command, i.e. one of "\limits", "\nolimits",
+ // or "\displaylimits".
+ struct MathLimits : MathNode
+ {
+ // The command, e.g. "\limits".
+ std::wstring mCommand;
+
+ // mChild is the operator that the limits command is applied to.
+ // e.g. for the input "x^2\limits_5", the base of the MathScripts
+ // node should be a MathLimits node, whose child is the MathSymbol
+ // node representing "x".
+ std::auto_ptr<MathNode> mChild;
+
+ MathLimits(
+ const std::wstring& command,
+ std::auto_ptr<MathNode> child
+ ) :
+ mCommand(command),
+ mChild(child)
+ { }
+
+ virtual std::auto_ptr<LayoutTree::Node> BuildLayoutTree(
+ const TexProcessingState& state
+ ) const;
+
+ virtual void GetPurifiedTex(
+ std::wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth
+ ) const;
+ };
+
+
+ // Represents an expression surrounded by "\left( ... \right)".
+ struct MathDelimited : MathNode
+ {
+ // The delimiters, e.g. "\langle", "(".
+ std::wstring mLeftDelimiter, mRightDelimiter;
+
+ // The stuff enclosed by the delimiters:
+ std::auto_ptr<MathNode> mChild;
+
+ MathDelimited(
+ std::auto_ptr<MathNode> child,
+ const std::wstring& leftDelimiter,
+ const std::wstring& rightDelimiter
+ ) :
+ mChild(child),
+ mLeftDelimiter(leftDelimiter),
+ mRightDelimiter(rightDelimiter)
+ { }
+
+ virtual std::auto_ptr<LayoutTree::Node> BuildLayoutTree(
+ const TexProcessingState& state
+ ) const;
+
+ virtual void GetPurifiedTex(
+ std::wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth
+ ) const;
+ };
+
+
+ // Represents a row of a table, e.g. might represent the
+ // TeX subexpression "a & b & c".
+ struct MathTableRow : MathNode
+ {
+ // The entries in the row.
+ std::vector<MathNode*> mEntries;
+
+ ~MathTableRow();
+
+ virtual std::auto_ptr<LayoutTree::Node> BuildLayoutTree(
+ const TexProcessingState& state
+ ) const;
+
+ virtual void GetPurifiedTex(
+ std::wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth
+ ) const;
+ };
+
+
+ // Represents a table, e.g. might represent the TeX subexpression
+ // expression "a & b & c \\ \\ d & e & f \\ g & h".
+ struct MathTable : MathNode
+ {
+ // The rows of the table.
+ std::vector<MathTableRow*> mRows;
+
+ ~MathTable();
+
+ virtual std::auto_ptr<LayoutTree::Node> BuildLayoutTree(
+ const TexProcessingState& state
+ ) const;
+
+ virtual void GetPurifiedTex(
+ std::wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth
+ ) const;
+ };
+
+
+ // Represents an environment, i.e. "\begin{xxx} ... \end{xxx}".
+ // Currently all supported environments are just various forms of table,
+ // so for the moment we insist that it contains a table.
+ struct MathEnvironment : MathNode
+ {
+ // Currently one of:
+ // "matrix", "pmatrix", "bmatrix", "Bmatrix", "vmatrix", "Vmatrix",
+ // "cases", "smallmatrix", "aligned", "substack"
+ std::wstring mName;
+
+ // True for things like "\substack" which don't need "\begin"
+ // and "\end";
+ // False for anything involving "\begin" and "\end"
+ bool mIsShort;
+
+ // The contained table.
+ std::auto_ptr<MathTable> mTable;
+
+ MathEnvironment(
+ const std::wstring& name,
+ std::auto_ptr<MathTable> table,
+ bool isShort
+ ) :
+ mName(name),
+ mTable(table),
+ mIsShort(isShort)
+ { }
+
+ virtual std::auto_ptr<LayoutTree::Node> BuildLayoutTree(
+ const TexProcessingState& state
+ ) const;
+
+ virtual void GetPurifiedTex(
+ std::wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth
+ ) const;
+ };
+
+
+ // Represents a command that switches from math mode into text mode,
+ // e.g. "\text". Note that certain commands (e.g. "\text") will be
+ // translated into a TextCommand1Arg node if encountered during text
+ // mode, but into a EnterTextMode if encountered during math mode.
+ struct EnterTextMode : MathNode
+ {
+ // The command, e.g. "\text".
+ std::wstring mCommand;
+
+ // The enclosed *text-mode* node.
+ std::auto_ptr<TextNode> mChild;
+
+ EnterTextMode(
+ const std::wstring& command,
+ std::auto_ptr<TextNode> child
+ ) :
+ mCommand(command),
+ mChild(child)
+ { }
+
+ virtual std::auto_ptr<LayoutTree::Node> BuildLayoutTree(
+ const TexProcessingState& state
+ ) const;
+
+ virtual void GetPurifiedTex(
+ std::wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth
+ ) const;
+ };
+
+
+ // Represents a sequence of nodes in text mode, concatenated together.
+ // e.g. "abc" is stored as a TextList containing three TextSymbol nodes.
+ struct TextList : TextNode
+ {
+ std::vector<TextNode*> mChildren;
+
+ ~TextList();
+
+ virtual std::auto_ptr<LayoutTree::Node> BuildLayoutTree(
+ const TexProcessingState& state
+ ) const;
+
+ virtual void GetPurifiedTex(
+ std::wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth
+ ) const;
+ };
+
+
+ // Represents text mode material surrounded by grouping braces.
+ struct TextGroup : TextNode
+ {
+ // The enclosed material.
+ std::auto_ptr<TextNode> mChild;
+
+ TextGroup(std::auto_ptr<TextNode> child) :
+ mChild(child)
+ { }
+
+ virtual std::auto_ptr<LayoutTree::Node> BuildLayoutTree(
+ const TexProcessingState& state
+ ) const;
+
+ virtual void GetPurifiedTex(
+ std::wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth
+ ) const;
+ };
+
+
+ // Represents any text mode command like "a", "1", "\textbackslash"
+ // that is treated as a single symbol (includes spacing commands
+ // like "\,").
+ struct TextSymbol : TextNode
+ {
+ // The command, e.g. "a" or "\textbackslash"
+ std::wstring mCommand;
+
+ TextSymbol(const std::wstring& command) :
+ mCommand(command)
+ { }
+
+ virtual std::auto_ptr<LayoutTree::Node> BuildLayoutTree(
+ const TexProcessingState& state
+ ) const;
+
+ virtual void GetPurifiedTex(
+ std::wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth
+ ) const;
+ };
+
+
+ // Represents a state change command like "\rm" occurring in text mode.
+ struct TextStateChange : TextNode
+ {
+ // The command, e.g. "\rm".
+ std::wstring mCommand;
+
+ TextStateChange(
+ const std::wstring& command
+ ) :
+ mCommand(command)
+ { }
+
+ // Modifies "state" according to the state change command.
+ virtual void Apply(
+ TexProcessingState& state
+ ) const;
+
+ virtual std::auto_ptr<LayoutTree::Node> BuildLayoutTree(
+ const TexProcessingState& state
+ ) const;
+
+ virtual void GetPurifiedTex(
+ std::wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth
+ ) const;
+ };
+
+
+ // Represents a "\color{xyz}" command in text mode.
+ struct TextColour : TextStateChange
+ {
+ // The colour name, e.g. "red".
+ std::wstring mColourName;
+
+ TextColour(
+ const std::wstring& colourName
+ ) :
+ TextStateChange(L"\\color"),
+ mColourName(colourName)
+ { }
+
+ virtual void Apply(
+ TexProcessingState& state
+ ) const;
+
+ virtual std::auto_ptr<LayoutTree::Node> BuildLayoutTree(
+ const TexProcessingState& state
+ ) const;
+
+ virtual void GetPurifiedTex(
+ std::wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth
+ ) const;
+ };
+
+
+ // Represents a command in text mode taking a single argument.
+ struct TextCommand1Arg : TextNode
+ {
+ // The command, e.g. "\textrm".
+ std::wstring mCommand;
+
+ // Node corresponding to the argument of the command.
+ std::auto_ptr<TextNode> mChild;
+
+ TextCommand1Arg(
+ const std::wstring& command,
+ std::auto_ptr<TextNode> child
+ ) :
+ mCommand(command),
+ mChild(child)
+ { }
+
+ virtual std::auto_ptr<LayoutTree::Node> BuildLayoutTree(
+ const TexProcessingState& state
+ ) const;
+
+ virtual void GetPurifiedTex(
+ std::wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+ ) const;
+
+ virtual void Print(
+ std::wostream& os,
+ int depth
+ ) const;
+ };
+
+} // end ParseTree namespace
+
+}
+
+#endif
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/BlahtexCore/ParseTree1.cpp b/blahtexml/source/BlahtexCore/ParseTree1.cpp
new file mode 100644
index 0000000..788ca31
--- /dev/null
+++ b/blahtexml/source/BlahtexCore/ParseTree1.cpp
@@ -0,0 +1,1290 @@
+// File "ParseTree1.cpp"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include <stdexcept>
+#include "ParseTree.h"
+
+using namespace std;
+
+namespace blahtex
+{
+
+// This is a list of delimiters which may appear after "\left", "\right"
+// and "\big", and of which MathML characters they get mapped to.
+
+static pair<wstring, wstring> gDelimiterArray[] =
+{
+ make_pair(L".", L""),
+ make_pair(L"[", L"["),
+ make_pair(L"]", L"]"),
+ make_pair(L"\\lbrack", L"["),
+ make_pair(L"\\rbrack", L"]"),
+ make_pair(L"(", L"("),
+ make_pair(L")", L")"),
+ make_pair(L"<", L"\U00002329"),
+ make_pair(L">", L"\U0000232A"),
+ make_pair(L"\\langle", L"\U00002329"),
+ make_pair(L"\\rangle", L"\U0000232A"),
+ make_pair(L"/", L"/"),
+ make_pair(L"\\backslash", L"\U00002216"),
+ make_pair(L"\\{", L"{"),
+ make_pair(L"\\}", L"}"),
+ make_pair(L"\\lbrace", L"{"),
+ make_pair(L"\\rbrace", L"}"),
+ make_pair(L"|", L"|"),
+ make_pair(L"\\vert", L"|"),
+ make_pair(L"\\lvert", L"|"),
+ make_pair(L"\\rvert", L"|"),
+ make_pair(L"\\Vert", L"\U00002225"),
+ make_pair(L"\\lVert", L"\U00002225"),
+ make_pair(L"\\rVert", L"\U00002225"),
+ make_pair(L"\\uparrow", L"\U00002191"),
+ make_pair(L"\\downarrow", L"\U00002193"),
+ make_pair(L"\\updownarrow", L"\U00002195"),
+ make_pair(L"\\Uparrow", L"\U000021D1"),
+ make_pair(L"\\Downarrow", L"\U000021D3"),
+ make_pair(L"\\Updownarrow", L"\U000021D5"),
+ make_pair(L"\\lfloor", L"\U0000230A"),
+ make_pair(L"\\rfloor", L"\U0000230B"),
+ make_pair(L"\\lceil", L"\U00002308"),
+ make_pair(L"\\rceil", L"\U00002309")
+};
+
+wishful_hash_map<wstring, wstring> gDelimiterTable(
+ gDelimiterArray,
+ END_ARRAY(gDelimiterArray)
+);
+
+
+namespace ParseTree
+{
+
+
+auto_ptr<LayoutTree::Node> MathList::BuildLayoutTree(
+ const TexProcessingState& state
+) const
+{
+ auto_ptr<LayoutTree::Row> output(
+ new LayoutTree::Row(state.mStyle, state.mColour)
+ );
+ list<LayoutTree::Node*>& targetList = output->mChildren;
+
+
+ // 1st pass: recursively build layout trees for all children in
+ // this row, and process state changes
+ TexProcessingState currentState = state;
+ for (vector<MathNode*>::const_iterator
+ node = mChildren.begin(); node != mChildren.end(); node++
+ )
+ {
+ MathStateChange* nodeAsStateChange =
+ dynamic_cast<MathStateChange*>(*node);
+
+ if (nodeAsStateChange)
+ nodeAsStateChange->Apply(currentState);
+ else
+ targetList.push_back(
+ (*node)->BuildLayoutTree(currentState).release()
+ );
+ }
+
+
+ // 2nd pass: modify atom flavours according to TeX's rules.
+ for (list<LayoutTree::Node*>::iterator
+ node = targetList.begin(); node != targetList.end(); node++
+ )
+ {
+ switch ((*node)->mFlavour)
+ {
+ case LayoutTree::Node::cFlavourBin:
+ {
+ if (node == targetList.begin())
+ (*node)->mFlavour = LayoutTree::Node::cFlavourOrd;
+ else
+ {
+ list<LayoutTree::Node*>::iterator previous = node;
+ previous--;
+ switch ((*previous)->mFlavour)
+ {
+ case LayoutTree::Node::cFlavourBin:
+ case LayoutTree::Node::cFlavourOp:
+ case LayoutTree::Node::cFlavourRel:
+ case LayoutTree::Node::cFlavourOpen:
+ case LayoutTree::Node::cFlavourPunct:
+ (*node)->mFlavour =
+ LayoutTree::Node::cFlavourOrd;
+ break;
+ }
+ }
+ break;
+ }
+
+ case LayoutTree::Node::cFlavourRel:
+ case LayoutTree::Node::cFlavourClose:
+ case LayoutTree::Node::cFlavourPunct:
+ {
+ if (node != targetList.begin())
+ {
+ list<LayoutTree::Node*>::iterator previous = node;
+ previous--;
+ if ((*previous)->mFlavour ==
+ LayoutTree::Node::cFlavourBin
+ )
+ (*previous)->mFlavour =
+ LayoutTree::Node::cFlavourOrd;
+ }
+
+ break;
+ }
+ }
+ }
+ if (!targetList.empty() &&
+ targetList.back()->mFlavour == LayoutTree::Node::cFlavourBin
+ )
+ targetList.back()->mFlavour = LayoutTree::Node::cFlavourOrd;
+
+
+ // 3rd pass: insert inter-atomic spacing according to TeX's rules.
+
+ // spaceTable[i][j] gives the amount of space that should be inserted
+ // between nodes of flavour i and flavour j.
+
+ // ignoreSpaceTable[i][j] is nonzero whenever the space between i and j
+ // should be ignored while in script or scriptscript style.
+
+ static int spaceTable[8][8] =
+ {
+ // RIGHT
+ // ord op bin rel open close punct inner
+ {0, 3, 4, 5, 0, 0, 0, 3}, // ord
+ {3, 3, 0, 5, 0, 0, 0, 3}, // op
+ {4, 4, 0, 0, 4, 0, 0, 4}, // bin
+ {5, 5, 0, 0, 5, 0, 0, 5}, // rel
+ {0, 0, 0, 0, 0, 0, 0, 0}, // open // LEFT
+ {0, 3, 4, 5, 0, 0, 0, 3}, // close
+ {3, 3, 0, 3, 3, 3, 3, 3}, // punct
+ {3, 3, 4, 5, 3, 0, 3, 3} // inner
+ };
+
+ static int ignoreSpaceTable[8][8] =
+ {
+ // RIGHT
+ // ord op bin rel open close punct inner
+ {0, 0, 1, 1, 0, 0, 0, 1}, // ord
+ {0, 0, 0, 1, 0, 0, 0, 1}, // op
+ {1, 1, 0, 0, 1, 0, 0, 1}, // bin
+ {1, 1, 0, 0, 1, 0, 0, 1}, // rel
+ {0, 0, 0, 0, 0, 0, 0, 0}, // open // LEFT
+ {0, 0, 1, 1, 0, 0, 0, 1}, // close
+ {1, 1, 0, 1, 1, 1, 1, 1}, // punct
+ {1, 0, 1, 1, 1, 0, 1, 1} // inner
+ };
+
+ list<LayoutTree::Node*>::iterator currentAtom = targetList.begin();
+ list<LayoutTree::Node*>::iterator previousAtom;
+ bool foundFirst = false;
+ while (true)
+ {
+ while (
+ currentAtom != targetList.end() &&
+ dynamic_cast<LayoutTree::Space*>(*currentAtom)
+ )
+ currentAtom++;
+
+ if (currentAtom == targetList.end())
+ break;
+
+ if (!foundFirst)
+ foundFirst = true;
+ else
+ {
+ LayoutTree::Node::Flavour leftFlavour =
+ (*previousAtom)->mFlavour;
+ LayoutTree::Node::Flavour rightFlavour =
+ (*currentAtom)->mFlavour;
+
+ int width =
+ (
+ ignoreSpaceTable[leftFlavour][rightFlavour] &&
+ (
+ state.mStyle ==
+ LayoutTree::Node::cStyleScript
+ ||
+ state.mStyle ==
+ LayoutTree::Node::cStyleScriptScript
+ )
+ )
+ ? 0 : spaceTable[leftFlavour][rightFlavour];
+
+ targetList.insert(
+ currentAtom,
+ new LayoutTree::Space(
+ width,
+ false // indicates non-user-specified space
+ )
+ );
+ }
+
+ previousAtom = currentAtom;
+ currentAtom++;
+ }
+
+
+ // 4th pass: splice any children Rows into this Row.
+ // The idea is that no Row node should have any Rows as children.
+ for (list<LayoutTree::Node*>::iterator
+ child = targetList.begin(); child != targetList.end(); child++
+ )
+ {
+ LayoutTree::Row* childAsRow
+ = dynamic_cast<LayoutTree::Row*>(*child);
+
+ if (childAsRow)
+ {
+ targetList.splice(child, childAsRow->mChildren);
+ delete childAsRow;
+ child = targetList.erase(child);
+ }
+ }
+
+ return static_cast< auto_ptr<LayoutTree::Node> >(output);
+}
+
+
+// Stores info about accent commands (like "\hat", "\overrightarrow", etc)
+struct AccentInfo {
+ wstring mText;
+ bool mIsStretchy;
+
+ AccentInfo(
+ const wstring& text,
+ bool isStretchy
+ ) :
+ mText(text),
+ mIsStretchy(isStretchy)
+ { }
+};
+
+
+auto_ptr<LayoutTree::Node> MathCommand1Arg::BuildLayoutTree(
+ const TexProcessingState& state
+) const
+{
+ if (mCommand == L"\\sqrt")
+ return auto_ptr<LayoutTree::Node>(
+ new LayoutTree::Sqrt(
+ mChild->BuildLayoutTree(state),
+ state.mColour
+ )
+ );
+
+ if (mCommand == L"\\overbrace" || mCommand == L"\\underbrace")
+ {
+ auto_ptr<LayoutTree::Node> brace(
+ new LayoutTree::SymbolOperator(
+ true,
+ L"",
+ false,
+ mCommand == L"\\overbrace" ? L"\U0000FE37" : L"\U0000FE38",
+ cMathmlFontNormal,
+ LayoutTree::Node::cStyleScript,
+ LayoutTree::Node::cFlavourOrd,
+ LayoutTree::Node::cLimitsDisplayLimits,
+ state.mColour
+ )
+ );
+
+ TexProcessingState newState = state;
+ newState.mStyle =
+ (state.mStyle == LayoutTree::Node::cStyleDisplay)
+ ? LayoutTree::Node::cStyleDisplay
+ : LayoutTree::Node::cStyleText;
+
+ auto_ptr<LayoutTree::Node> empty;
+
+ return auto_ptr<LayoutTree::Node>(
+ new LayoutTree::Scripts(
+ newState.mStyle,
+ LayoutTree::Node::cFlavourOp,
+ LayoutTree::Node::cLimitsLimits,
+ state.mColour,
+ false,
+ mChild->BuildLayoutTree(newState),
+ (mCommand == L"\\overbrace") ? brace : empty,
+ (mCommand == L"\\underbrace") ? brace : empty
+ )
+ );
+ }
+
+ if (mCommand == L"\\pmod")
+ {
+ auto_ptr<LayoutTree::Row> row(
+ new LayoutTree::Row(state.mStyle, state.mColour)
+ );
+
+ MathmlFont font =
+ state.mMathFont.mIsBoldsymbol
+ ? cMathmlFontBold : cMathmlFontNormal;
+
+ row->mChildren.push_back(new LayoutTree::Space(18, true));
+ row->mChildren.push_back(
+ new LayoutTree::SymbolOperator(
+ false,
+ L"",
+ false,
+ L"(",
+ font,
+ state.mStyle,
+ LayoutTree::Node::cFlavourOpen,
+ LayoutTree::Node::cLimitsDisplayLimits,
+ state.mColour
+ )
+ );
+ row->mChildren.push_back(
+ new LayoutTree::SymbolOperator(
+ false,
+ L"",
+ false,
+ L"mod",
+ font,
+ state.mStyle,
+ LayoutTree::Node::cFlavourOrd,
+ LayoutTree::Node::cLimitsDisplayLimits,
+ state.mColour
+ )
+ );
+ row->mChildren.push_back(new LayoutTree::Space(6, true));
+ row->mChildren.push_back(
+ mChild->BuildLayoutTree(state).release()
+ );
+ row->mChildren.push_back(
+ new LayoutTree::SymbolOperator(
+ false,
+ L"",
+ false,
+ L")",
+ font,
+ state.mStyle,
+ LayoutTree::Node::cFlavourClose,
+ LayoutTree::Node::cLimitsDisplayLimits,
+ state.mColour
+ )
+ );
+
+ return static_cast<auto_ptr<LayoutTree::Node> >(row);
+ }
+
+ if (mCommand == L"\\operatorname" ||
+ mCommand == L"\\operatornamewithlimits"
+ )
+ {
+ // Essentially this just writes the argument in upright font and
+ // sets limits correctly. So initially it looks like
+ // <mi mathvariant="normal">s</mi>
+ // <mi mathvariant="normal">i</mi>
+ // <mi mathvariant="normal">n</mi>
+ // But then these get merged later on, to produce the more
+ // reasonable <mi>sin</mi>.
+
+ TexProcessingState newState = state;
+ newState.mMathFont.mFamily = TexMathFont::cFamilyRm;
+ auto_ptr<LayoutTree::Node> node
+ = mChild->BuildLayoutTree(newState);
+ node->mFlavour = LayoutTree::Node::cFlavourOp;
+ node->mLimits =
+ (mCommand == L"\\operatorname")
+ ? LayoutTree::Node::cLimitsNoLimits
+ : LayoutTree::Node::cLimitsDisplayLimits;
+ return node;
+ }
+
+
+ static pair<wstring, LayoutTree::Node::Flavour> flavourCommandArray[] =
+ {
+ make_pair(L"\\mathop", LayoutTree::Node::cFlavourOp),
+ make_pair(L"\\mathrel", LayoutTree::Node::cFlavourRel),
+ make_pair(L"\\mathbin", LayoutTree::Node::cFlavourBin),
+ make_pair(L"\\mathord", LayoutTree::Node::cFlavourOrd),
+ make_pair(L"\\mathopen", LayoutTree::Node::cFlavourOpen),
+ make_pair(L"\\mathclose", LayoutTree::Node::cFlavourClose),
+ make_pair(L"\\mathpunct", LayoutTree::Node::cFlavourPunct),
+ make_pair(L"\\mathinner", LayoutTree::Node::cFlavourInner)
+ };
+ static wishful_hash_map<wstring, LayoutTree::Node::Flavour>
+ flavourCommandTable(
+ flavourCommandArray,
+ END_ARRAY(flavourCommandArray)
+ );
+
+ wishful_hash_map<wstring, LayoutTree::Node::Flavour>::const_iterator
+ flavourCommand = flavourCommandTable.find(mCommand);
+ if (flavourCommand != flavourCommandTable.end())
+ {
+ auto_ptr<LayoutTree::Node> node
+ = mChild->BuildLayoutTree(state);
+ node->mFlavour = flavourCommand->second;
+ if (node->mFlavour == LayoutTree::Node::cFlavourOp)
+ node->mLimits = LayoutTree::Node::cLimitsDisplayLimits;
+ return node;
+ }
+
+ static pair<wstring, TexMathFont::Family> fontCommandArray[] =
+ {
+ make_pair(L"\\mathbf", TexMathFont::cFamilyBf),
+ make_pair(L"\\mathbb", TexMathFont::cFamilyBb),
+ make_pair(L"\\mathit", TexMathFont::cFamilyIt),
+ make_pair(L"\\mathrm", TexMathFont::cFamilyRm),
+ make_pair(L"\\mathsf", TexMathFont::cFamilySf),
+ make_pair(L"\\mathtt", TexMathFont::cFamilyTt),
+ make_pair(L"\\mathcal", TexMathFont::cFamilyCal),
+ make_pair(L"\\mathfrak", TexMathFont::cFamilyFrak)
+ };
+ static wishful_hash_map<wstring, TexMathFont::Family> fontCommandTable(
+ fontCommandArray,
+ END_ARRAY(fontCommandArray)
+ );
+
+ wishful_hash_map<wstring, TexMathFont::Family>::const_iterator
+ fontCommand = fontCommandTable.find(mCommand);
+ if (fontCommand != fontCommandTable.end())
+ {
+ TexProcessingState newState = state;
+ newState.mMathFont.mFamily = fontCommand->second;
+ return mChild->BuildLayoutTree(newState);
+ }
+
+ if (mCommand == L"\\boldsymbol")
+ {
+ TexProcessingState newState = state;
+ newState.mMathFont.mIsBoldsymbol = true;
+ newState.mMathFont.mFamily = TexMathFont::cFamilyDefault;
+ return mChild->BuildLayoutTree(newState);
+ }
+
+ // Here is a list of all the accent commands we know about.
+ static pair<wstring, AccentInfo> accentCommandArray[] =
+ {
+ // FIX: there's some funny inconsistency between the definition of
+ // &Hat; among MathML versions. I was originally using plain "^" for
+ // these accents, but Roger recommended using 0x302 instead.
+ make_pair(L"\\hat", AccentInfo(L"\U00000302", false)),
+ make_pair(L"\\widehat", AccentInfo(L"\U00000302", true)),
+ make_pair(L"\\bar", AccentInfo(L"\U000000AF", false)),
+ make_pair(L"\\overline", AccentInfo(L"\U000000AF", true)),
+ make_pair(L"\\underline", AccentInfo(L"\U000000AF", true)),
+ make_pair(L"\\tilde", AccentInfo(L"\U000002DC", false)),
+ make_pair(L"\\widetilde", AccentInfo(L"\U000002DC", true)),
+ make_pair(L"\\overleftarrow", AccentInfo(L"\U00002190", true)),
+ make_pair(L"\\vec", AccentInfo(L"\U000020D7", true)),
+ make_pair(L"\\overrightarrow", AccentInfo(L"\U00002192", true)),
+ make_pair(L"\\overleftrightarrow", AccentInfo(L"\U00002194", true)),
+ make_pair(L"\\dot", AccentInfo(L"\U000000B7", false)),
+ make_pair(L"\\ddot", AccentInfo(L"\U000000B7\U000000B7", false)),
+ make_pair(L"\\check", AccentInfo(L"\U000002C7", false)),
+ make_pair(L"\\acute", AccentInfo(L"\U000000B4", false)),
+ make_pair(L"\\grave", AccentInfo(L"\U00000060", false)),
+ make_pair(L"\\breve", AccentInfo(L"\U000002D8", false)
+ )
+ };
+ static wishful_hash_map<wstring, AccentInfo> accentCommandTable(
+ accentCommandArray,
+ END_ARRAY(accentCommandArray)
+ );
+
+ wishful_hash_map<wstring, AccentInfo>::const_iterator
+ accentCommand = accentCommandTable.find(mCommand);
+ if (accentCommand != accentCommandTable.end())
+ {
+ auto_ptr<LayoutTree::Node> base
+ = mChild->BuildLayoutTree(state);
+ auto_ptr<LayoutTree::Node> lower, upper;
+
+ auto_ptr<LayoutTree::Node> accent(
+ new LayoutTree::SymbolOperator(
+ accentCommand->second.mIsStretchy,
+ L"",
+ true, // is an accent
+ accentCommand->second.mText,
+ state.mMathFont.mIsBoldsymbol
+ ? cMathmlFontBold : cMathmlFontNormal,
+ // We don't need to decrement the style here, because
+ // LayoutTree::SymbolOperator knows not to insert style
+ // changes for accent operators
+ state.mStyle,
+ LayoutTree::Node::cFlavourOrd,
+ LayoutTree::Node::cLimitsDisplayLimits,
+ state.mColour
+ )
+ );
+
+ if (mCommand == L"\\underline")
+ lower = accent;
+ else
+ upper = accent;
+
+ return auto_ptr<LayoutTree::Node>(
+ new LayoutTree::Scripts(
+ state.mStyle,
+ LayoutTree::Node::cFlavourOrd,
+ LayoutTree::Node::cLimitsDisplayLimits,
+ state.mColour,
+ false, // not sideset
+ base,
+ upper,
+ lower
+ )
+ );
+ }
+
+ throw logic_error(
+ "Unexpected command in MathCommand1Arg::BuildLayoutTree"
+ );
+}
+
+
+auto_ptr<LayoutTree::Node> MathStateChange::BuildLayoutTree(
+ const TexProcessingState& state
+) const
+{
+ // We should only arrive here if there was a state change command all
+ // by its lonesome self in its own math list, so we can safely ignore
+ // it.
+ return auto_ptr<LayoutTree::Node>(
+ new LayoutTree::Row(state.mStyle, state.mColour)
+ );
+}
+
+auto_ptr<LayoutTree::Node> MathColour::BuildLayoutTree(
+ const TexProcessingState& state
+) const
+{
+ // See above in MathStateChange::BuildLayoutTree
+ return auto_ptr<LayoutTree::Node>(
+ new LayoutTree::Row(state.mStyle, state.mColour)
+ );
+}
+
+auto_ptr<LayoutTree::Node> TextStateChange::BuildLayoutTree(
+ const TexProcessingState& state
+) const
+{
+ // See above in MathStateChange::BuildLayoutTree
+ return auto_ptr<LayoutTree::Node>(
+ new LayoutTree::Row(state.mStyle, state.mColour)
+ );
+}
+
+auto_ptr<LayoutTree::Node> TextColour::BuildLayoutTree(
+ const TexProcessingState& state
+) const
+{
+ // See above in MathStateChange::BuildLayoutTree
+ return auto_ptr<LayoutTree::Node>(
+ new LayoutTree::Row(state.mStyle, state.mColour)
+ );
+}
+
+
+auto_ptr<LayoutTree::Node> MathCommand2Args::BuildLayoutTree(
+ const TexProcessingState& state
+) const
+{
+ bool isFractionCommand = false;
+ bool hasParentheses;
+ bool isLineVisible;
+
+ if (mCommand == L"\\frac" || mCommand == L"\\over")
+ {
+ isFractionCommand = true;
+ isLineVisible = true;
+ hasParentheses = false;
+ }
+ else if (mCommand == L"\\atop")
+ {
+ isFractionCommand = true;
+ isLineVisible = false;
+ hasParentheses = false;
+ }
+ else if (mCommand == L"\\binom" || mCommand == L"\\choose")
+ {
+ isFractionCommand = true;
+ isLineVisible = false;
+ hasParentheses = true;
+ }
+
+ if (isFractionCommand)
+ {
+ // Work out what style the numerator/denominator should be.
+ TexProcessingState newState = state;
+ switch (state.mStyle)
+ {
+ case LayoutTree::Node::cStyleDisplay:
+ newState.mStyle = LayoutTree::Node::cStyleText;
+ break;
+
+ case LayoutTree::Node::cStyleText:
+ newState.mStyle = LayoutTree::Node::cStyleScript;
+ break;
+
+ case LayoutTree::Node::cStyleScript:
+ newState.mStyle = LayoutTree::Node::cStyleScriptScript;
+ break;
+ }
+
+ auto_ptr<LayoutTree::Node> inside(
+ new LayoutTree::Fraction(
+ state.mStyle,
+ state.mColour,
+ mChild1->BuildLayoutTree(newState),
+ mChild2->BuildLayoutTree(newState),
+ isLineVisible
+ )
+ );
+
+ if (hasParentheses)
+ return auto_ptr<LayoutTree::Node>(
+ new LayoutTree::Fenced(
+ state.mStyle,
+ state.mColour,
+ L"(", L")", inside
+ )
+ );
+ else
+ return inside;
+ }
+
+ if (mCommand == L"\\rootReserved")
+ {
+ TexProcessingState newState = state;
+ newState.mStyle = LayoutTree::Node::cStyleScriptScript;
+
+ return auto_ptr<LayoutTree::Node>(
+ new LayoutTree::Root(
+ mChild2->BuildLayoutTree(state),
+ mChild1->BuildLayoutTree(newState),
+ state.mColour
+ )
+ );
+ }
+
+ if (mCommand == L"\\cfrac")
+ {
+ TexProcessingState newState = state;
+ newState.mStyle = LayoutTree::Node::cStyleText;
+
+ return auto_ptr<LayoutTree::Node>(
+ new LayoutTree::Fraction(
+ LayoutTree::Node::cStyleDisplay,
+ state.mColour,
+ mChild1->BuildLayoutTree(newState),
+ mChild2->BuildLayoutTree(newState),
+ true // true = should be a visible fraction line
+ )
+ );
+ }
+
+ if (mCommand == L"\\overset" || mCommand == L"\\underset")
+ {
+ // Work out what style the under/overset node should be.
+ TexProcessingState newState = state;
+ switch (state.mStyle)
+ {
+ case LayoutTree::Node::cStyleDisplay:
+ case LayoutTree::Node::cStyleText:
+ newState.mStyle = LayoutTree::Node::cStyleScript;
+ break;
+
+ case LayoutTree::Node::cStyleScript:
+ case LayoutTree::Node::cStyleScriptScript:
+ newState.mStyle = LayoutTree::Node::cStyleScriptScript;
+ break;
+ }
+
+ auto_ptr<LayoutTree::Node> upper, lower;
+ if (mCommand == L"\\overset")
+ upper = mChild1->BuildLayoutTree(newState);
+ else // else underset
+ lower = mChild1->BuildLayoutTree(newState);
+
+ auto_ptr<LayoutTree::Node> base =
+ mChild2->BuildLayoutTree(state);
+
+ return auto_ptr<LayoutTree::Node>(
+ new LayoutTree::Scripts(
+ state.mStyle,
+ base->mFlavour,
+ LayoutTree::Node::cLimitsNoLimits,
+ state.mColour,
+ false, // false = NOT sideset
+ base,
+ upper,
+ lower
+ )
+ );
+ }
+
+ throw logic_error(
+ "Unexpected command in MathCommand2Args::BuildLayoutTree"
+ );
+}
+
+
+auto_ptr<LayoutTree::Node> MathScripts::BuildLayoutTree(
+ const TexProcessingState& state
+) const
+{
+ auto_ptr<LayoutTree::Node> base, upper, lower;
+
+ LayoutTree::Node::Flavour flavour = LayoutTree::Node::cFlavourOrd;
+ LayoutTree::Node::Limits limits =
+ LayoutTree::Node::cLimitsDisplayLimits;
+
+ if (mBase.get())
+ {
+ // If the base is nonempty, we inherit its flavour and limits
+ // settings
+ base = mBase->BuildLayoutTree(state);
+ flavour = base->mFlavour;
+ limits = base->mLimits;
+ }
+
+ // Work out the style for the super/subscripts
+ TexProcessingState newState = state;
+ switch (state.mStyle)
+ {
+ case LayoutTree::Node::cStyleDisplay:
+ case LayoutTree::Node::cStyleText:
+ newState.mStyle = LayoutTree::Node::cStyleScript;
+ break;
+
+ case LayoutTree::Node::cStyleScript:
+ case LayoutTree::Node::cStyleScriptScript:
+ newState.mStyle = LayoutTree::Node::cStyleScriptScript;
+ break;
+ }
+
+ if (mUpper.get())
+ upper = mUpper->BuildLayoutTree(newState);
+ if (mLower.get())
+ lower = mLower->BuildLayoutTree(newState);
+
+ // Determine from the flavour and limits settings whether we should
+ // be putting limits above/below or to the side.
+ bool isSideset =
+ (flavour != LayoutTree::Node::cFlavourOp) ||
+ (
+ limits != LayoutTree::Node::cLimitsLimits &&
+ (
+ limits != LayoutTree::Node::cLimitsDisplayLimits ||
+ state.mStyle != LayoutTree::Node::cStyleDisplay
+ )
+ );
+
+ return auto_ptr<LayoutTree::Node>(
+ new LayoutTree::Scripts(
+ state.mStyle,
+ flavour,
+ LayoutTree::Node::cLimitsDisplayLimits,
+ state.mColour,
+ isSideset,
+ base,
+ upper,
+ lower
+ )
+ );
+}
+
+
+auto_ptr<LayoutTree::Node> MathLimits::BuildLayoutTree(
+ const TexProcessingState& state
+) const
+{
+ auto_ptr<LayoutTree::Node> node =
+ mChild->BuildLayoutTree(state);
+
+ if (node->mFlavour != LayoutTree::Node::cFlavourOp)
+ throw Exception(L"MisplacedLimits", mCommand);
+
+ if (mCommand == L"\\limits")
+ node->mLimits = LayoutTree::Node::cLimitsLimits;
+ else if (mCommand == L"\\nolimits")
+ node->mLimits = LayoutTree::Node::cLimitsNoLimits;
+ else if (mCommand == L"\\displaylimits")
+ node->mLimits = LayoutTree::Node::cLimitsDisplayLimits;
+ else
+ throw logic_error(
+ "Unexpected command in MathLimits::BuildLayoutTree."
+ );
+
+ return node;
+}
+
+auto_ptr<LayoutTree::Node> MathGroup::BuildLayoutTree(
+ const TexProcessingState& state
+) const
+{
+ // TeX treates any group enclosed by curly braces as an "ordinary" atom.
+ // This is why e.g. "123{,}456" looks different to "123,456"
+ auto_ptr<LayoutTree::Node> node
+ = mChild->BuildLayoutTree(state);
+ node->mFlavour = LayoutTree::Node::cFlavourOrd;
+ return node;
+}
+
+
+auto_ptr<LayoutTree::Node> MathDelimited::BuildLayoutTree(
+ const TexProcessingState& state
+) const
+{
+ return auto_ptr<LayoutTree::Node>(
+ new LayoutTree::Fenced(
+ state.mStyle,
+ state.mColour,
+ gDelimiterTable[mLeftDelimiter],
+ gDelimiterTable[mRightDelimiter],
+ mChild->BuildLayoutTree(state)
+ )
+ );
+}
+
+
+// Stores information about the various "\big..." commands.
+struct BigInfo
+{
+ LayoutTree::Node::Flavour mFlavour;
+ wstring mSize;
+
+ BigInfo(
+ LayoutTree::Node::Flavour flavour,
+ const wstring& size
+ ) :
+ mFlavour(flavour),
+ mSize(size)
+ { }
+};
+
+
+auto_ptr<LayoutTree::Node> MathBig::BuildLayoutTree(
+ const TexProcessingState& state
+) const
+{
+ // Here's a list of all the "\big..." commands, how big the delimiter
+ // should become, and what flavour it should be, for each one.
+ static pair<wstring, BigInfo> bigCommandArray[] =
+ {
+ make_pair(L"\\big", BigInfo(LayoutTree::Node::cFlavourOrd, L"1.2em")),
+ make_pair(L"\\bigl", BigInfo(LayoutTree::Node::cFlavourOpen, L"1.2em")),
+ make_pair(L"\\bigr", BigInfo(LayoutTree::Node::cFlavourClose, L"1.2em")),
+
+ make_pair(L"\\Big", BigInfo(LayoutTree::Node::cFlavourOrd, L"1.8em")),
+ make_pair(L"\\Bigl", BigInfo(LayoutTree::Node::cFlavourOpen, L"1.8em")),
+ make_pair(L"\\Bigr", BigInfo(LayoutTree::Node::cFlavourClose, L"1.8em")),
+
+ make_pair(L"\\bigg", BigInfo(LayoutTree::Node::cFlavourOrd, L"2.4em")),
+ make_pair(L"\\biggl", BigInfo(LayoutTree::Node::cFlavourOpen, L"2.4em")),
+ make_pair(L"\\biggr", BigInfo(LayoutTree::Node::cFlavourClose, L"2.4em")),
+
+ make_pair(L"\\Bigg", BigInfo(LayoutTree::Node::cFlavourOrd, L"3em")),
+ make_pair(L"\\Biggl", BigInfo(LayoutTree::Node::cFlavourOpen, L"3em")),
+ make_pair(L"\\Biggr", BigInfo(LayoutTree::Node::cFlavourClose, L"3em"))
+ };
+ static wishful_hash_map<wstring, BigInfo> bigCommandTable(
+ bigCommandArray,
+ END_ARRAY(bigCommandArray)
+ );
+
+ wishful_hash_map<wstring, BigInfo>::const_iterator
+ bigCommand = bigCommandTable.find(mCommand);
+
+ if (bigCommand != bigCommandTable.end())
+ {
+ LayoutTree::Node::Style newStyle = state.mStyle;
+ if (state.mStyle != LayoutTree::Node::cStyleDisplay &&
+ state.mStyle != LayoutTree::Node::cStyleText
+ )
+ newStyle = LayoutTree::Node::cStyleText;
+
+ // FIX: TeX allows "\big."; do we?
+ return auto_ptr<LayoutTree::Node>(
+ new LayoutTree::SymbolOperator(
+ true, // indicates stretchy="true"
+ bigCommand->second.mSize,
+ false, // not an accent
+ gDelimiterTable[mDelimiter],
+ cMathmlFontNormal,
+ newStyle,
+ bigCommand->second.mFlavour,
+ LayoutTree::Node::cLimitsDisplayLimits,
+ state.mColour
+ )
+ );
+ }
+
+ throw logic_error("Unknown command in MathBig::BuildLayoutTree");
+}
+
+
+auto_ptr<LayoutTree::Node> MathTableRow::BuildLayoutTree(
+ const TexProcessingState& state
+) const
+{
+ // We should never get here, because MathTable::BuildLayoutTree
+ // handles the whole table.
+ throw logic_error(
+ "Arrived unexpectedly in MathTableRow::BuildLayoutTree"
+ );
+}
+
+
+auto_ptr<LayoutTree::Node> MathTable::BuildLayoutTree(
+ const TexProcessingState& state
+) const
+{
+ auto_ptr<LayoutTree::Table> table(
+ new LayoutTree::Table(state.mStyle, state.mColour)
+ );
+ table->mRows.reserve(mRows.size());
+
+ // Walk the table, building the layout tree as we go.
+ for (vector<MathTableRow*>::const_iterator
+ inRow = mRows.begin();
+ inRow != mRows.end();
+ inRow++
+ )
+ {
+ table->mRows.push_back(vector<LayoutTree::Node*>());
+ vector<LayoutTree::Node*>& outRow = table->mRows.back();
+ for (vector<MathNode*>::const_iterator
+ entry = (*inRow)->mEntries.begin();
+ entry != (*inRow)->mEntries.end();
+ entry++
+ )
+ outRow.push_back(
+ (*entry)->
+ BuildLayoutTree(state).release()
+ );
+ }
+
+ return static_cast<auto_ptr<LayoutTree::Node> >(table);
+}
+
+
+// Stores information about an environment.
+struct EnvironmentInfo
+{
+ wstring mLeftDelimiter, mRightDelimiter;
+
+ EnvironmentInfo(
+ const wstring& leftDelimiter,
+ const wstring& rightDelimiter
+ ) :
+ mLeftDelimiter(leftDelimiter),
+ mRightDelimiter(rightDelimiter)
+ { }
+};
+
+
+auto_ptr<LayoutTree::Node> MathEnvironment::BuildLayoutTree(
+ const TexProcessingState& state
+) const
+{
+ // A list of all environments, and which delimiters appear on each
+ // side of the corresponding table.
+ // FIX: this is kind of stupid... almost every environment ends up
+ // with its own special-case code!
+ static pair<wstring, EnvironmentInfo> environmentArray[] =
+ {
+ make_pair(L"matrix", EnvironmentInfo(L"", L"")),
+ make_pair(L"pmatrix", EnvironmentInfo(L"(", L")")),
+ make_pair(L"bmatrix", EnvironmentInfo(L"[", L"]")),
+ make_pair(L"Bmatrix", EnvironmentInfo(L"{", L"}")),
+ make_pair(L"vmatrix", EnvironmentInfo(L"|", L"|")),
+ // DoubleVerticalBar:
+ make_pair(L"Vmatrix", EnvironmentInfo(L"\U00002225", L"\U00002225")),
+ make_pair(L"cases", EnvironmentInfo(L"{", L"")),
+ make_pair(L"aligned", EnvironmentInfo(L"", L"")),
+ make_pair(L"smallmatrix", EnvironmentInfo(L"", L"")),
+ make_pair(L"substack", EnvironmentInfo(L"", L""))
+ };
+ static wishful_hash_map<wstring, EnvironmentInfo> environmentTable(
+ environmentArray,
+ END_ARRAY(environmentArray)
+ );
+
+ wishful_hash_map<wstring, EnvironmentInfo>::const_iterator
+ environmentLookup = environmentTable.find(mName);
+
+ if (environmentLookup == environmentTable.end())
+ throw logic_error(
+ "Unexpected environment name in "
+ "MathEnvironment::BuildLayoutTree"
+ );
+
+ // For reasons I haven't investigated, the "boldsymbol" flag persists
+ // into environments, but the math font doesn't.
+ TexProcessingState newState = state;
+ newState.mMathFont = TexMathFont();
+ newState.mMathFont.mIsBoldsymbol = state.mMathFont.mIsBoldsymbol;
+
+ LayoutTree::Node::Style fencedStyle;
+ if (mName == L"smallmatrix" || mName == L"substack")
+ newState.mStyle = LayoutTree::Node::cStyleScript;
+ else if (mName == L"aligned")
+ newState.mStyle = LayoutTree::Node::cStyleDisplay;
+ else
+ {
+ newState.mStyle = LayoutTree::Node::cStyleText;
+ fencedStyle =
+ (state.mStyle == LayoutTree::Node::cStyleDisplay)
+ ? LayoutTree::Node::cStyleDisplay
+ : LayoutTree::Node::cStyleText;
+ }
+
+ auto_ptr<LayoutTree::Node> table = mTable->BuildLayoutTree(newState);
+ LayoutTree::Table* tablePtr =
+ dynamic_cast<LayoutTree::Table*>(table.get());
+ if (!tablePtr)
+ throw logic_error(
+ "Unexpected node type in MathEnvironment::BuildLayoutTree"
+ );
+
+ if (mName == L"substack")
+ tablePtr->mRowSpacing = LayoutTree::Table::cRowSpacingTight;
+
+ if (mName == L"aligned")
+ tablePtr->mAlign = LayoutTree::Table::cAlignRightLeft;
+ else if (mName == L"cases")
+ tablePtr->mAlign = LayoutTree::Table::cAlignLeft;
+
+ if (environmentLookup->second.mLeftDelimiter.empty() &&
+ environmentLookup->second.mRightDelimiter.empty()
+ )
+ return table;
+
+ return auto_ptr<LayoutTree::Node>(
+ new LayoutTree::Fenced(
+ fencedStyle,
+ state.mColour,
+ environmentLookup->second.mLeftDelimiter,
+ environmentLookup->second.mRightDelimiter,
+ table
+ )
+ );
+}
+
+
+auto_ptr<LayoutTree::Node> EnterTextMode::BuildLayoutTree(
+ const TexProcessingState& state
+) const
+{
+ // List of all commands that launch into text mode, and some information
+ // about which font they select.
+ static pair<wstring, TexTextFont> textCommandArray[] =
+ { // flags are: bold? italic?
+ make_pair(L"\\mbox", TexTextFont(TexTextFont::cFamilyRm, false, false)),
+ make_pair(L"\\hbox", TexTextFont(TexTextFont::cFamilyRm, false, false)),
+ make_pair(L"\\text", TexTextFont(TexTextFont::cFamilyRm, false, false)),
+ make_pair(L"\\textrm", TexTextFont(TexTextFont::cFamilyRm, false, false)),
+ make_pair(L"\\textbf", TexTextFont(TexTextFont::cFamilyRm, true, false)),
+ make_pair(L"\\emph", TexTextFont(TexTextFont::cFamilyRm, false, true)),
+ make_pair(L"\\textit", TexTextFont(TexTextFont::cFamilyRm, false, true)),
+ make_pair(L"\\textsf", TexTextFont(TexTextFont::cFamilySf, false, false)),
+ make_pair(L"\\texttt", TexTextFont(TexTextFont::cFamilyTt, false, false)),
+ make_pair(L"\\cyr", TexTextFont(TexTextFont::cFamilyRm, false, false)),
+ make_pair(L"\\jap", TexTextFont(TexTextFont::cFamilyRm, false, false))
+ };
+ static wishful_hash_map<wstring, TexTextFont> textCommandTable(
+ textCommandArray,
+ END_ARRAY(textCommandArray)
+ );
+
+ wishful_hash_map<wstring, TexTextFont>::iterator
+ textCommand = textCommandTable.find(mCommand);
+
+ if (textCommand == textCommandTable.end())
+ throw logic_error(
+ "Unexpected command in EnterTextMode::BuildLayoutTree"
+ );
+
+ TexProcessingState newState = state;
+ newState.mTextFont = textCommand->second;
+
+ if (mCommand == L"\\hbox" || mCommand == L"\\mbox")
+ newState.mStyle = LayoutTree::Node::cStyleText;
+
+ return mChild->BuildLayoutTree(newState);
+}
+
+
+auto_ptr<LayoutTree::Node> TextList::BuildLayoutTree(
+ const TexProcessingState& state
+) const
+{
+ auto_ptr<LayoutTree::Row> node(
+ new LayoutTree::Row(state.mStyle, state.mColour)
+ );
+
+ // Recursively build layout trees for children, and merge Rows to obtain
+ // a single Row, and apply state changes as appropriate.
+ TexProcessingState currentState = state;
+ for (vector<TextNode*>::const_iterator
+ child = mChildren.begin();
+ child != mChildren.end();
+ child++
+ )
+ {
+ TextStateChange* childAsStateChange =
+ dynamic_cast<TextStateChange*>(*child);
+
+ if (childAsStateChange)
+ childAsStateChange->Apply(currentState);
+ else
+ {
+ auto_ptr<LayoutTree::Node>
+ newNode = (*child)->BuildLayoutTree(currentState);
+
+ LayoutTree::Row* isRow =
+ dynamic_cast<LayoutTree::Row*>(newNode.get());
+
+ if (isRow)
+ node->mChildren.splice(
+ node->mChildren.end(),
+ isRow->mChildren
+ );
+ else
+ node->mChildren.push_back(newNode.release());
+ }
+ }
+
+ return static_cast<auto_ptr<LayoutTree::Node> >(node);
+}
+
+
+auto_ptr<LayoutTree::Node> TextSymbol::BuildLayoutTree(
+ const TexProcessingState& state
+) const
+{
+ static pair<wstring, wstring> textCommandArray[] =
+ {
+ make_pair(L"\\!", L""),
+ make_pair(L" ", L"\U000000A0"), // NonBreakingSpace
+ make_pair(L"~", L"\U000000A0"),
+ make_pair(L"\\,", L"\U000000A0"),
+ make_pair(L"\\ ", L"\U000000A0"),
+ make_pair(L"\\;", L"\U000000A0"),
+ make_pair(L"\\quad", L"\U000000A0\U000000A0"),
+ make_pair(L"\\qquad", L"\U000000A0\U000000A0\U000000A0\U000000A0"),
+
+ make_pair(L"\\&", L"&"),
+ // FIX: why did I put in these next two lines again?
+ // FIX: The character "<" and ">" actually do funny things in TeX...
+ make_pair(L"<", L"<"),
+ make_pair(L">", L">"),
+ make_pair(L"\\_", L"_"),
+ make_pair(L"\\$", L"$"),
+ make_pair(L"\\#", L"#"),
+ make_pair(L"\\%", L"%"),
+ make_pair(L"\\{", L"{"),
+ make_pair(L"\\}", L"}"),
+ make_pair(L"\\textbackslash", L"\\"),
+ // FIX: for some reason in Firefox the caret is much lower
+ // than it should be
+ make_pair(L"\\textasciicircum", L"^"),
+ make_pair(L"\\textasciitilde", L"~"),
+ make_pair(L"\\textvisiblespace", L"\U000023B5"),
+ make_pair(L"\\O", L"\U000000D8"),
+ make_pair(L"\\S", L"\U000000A7")
+ };
+ static wishful_hash_map<wstring, wstring> textCommandTable(
+ textCommandArray,
+ END_ARRAY(textCommandArray)
+ );
+
+ wishful_hash_map<wstring, wstring>::iterator
+ textCommand = textCommandTable.find(mCommand);
+
+ if (textCommand != textCommandTable.end())
+ return auto_ptr<LayoutTree::Node>(
+ new LayoutTree::SymbolText(
+ textCommand->second,
+ state.mTextFont.GetMathmlApproximation(),
+ state.mStyle,
+ state.mColour
+ )
+ );
+
+ return auto_ptr<LayoutTree::Node>(
+ new LayoutTree::SymbolText(
+ mCommand,
+ state.mTextFont.GetMathmlApproximation(),
+ state.mStyle,
+ state.mColour
+ )
+ );
+}
+
+
+auto_ptr<LayoutTree::Node> TextGroup::BuildLayoutTree(
+ const TexProcessingState& state
+) const
+{
+ return mChild->BuildLayoutTree(state);
+}
+
+
+auto_ptr<LayoutTree::Node> TextCommand1Arg::BuildLayoutTree(
+ const TexProcessingState& state
+) const
+{
+ TexProcessingState newState = state;
+
+ if (mCommand == L"\\textrm")
+ newState.mTextFont.mFamily = TexTextFont::cFamilyRm;
+ else if (mCommand == L"\\texttt")
+ newState.mTextFont.mFamily = TexTextFont::cFamilyTt;
+ else if (mCommand == L"\\textsf")
+ newState.mTextFont.mFamily = TexTextFont::cFamilySf;
+ else if (mCommand == L"\\textit")
+ newState.mTextFont.mIsItalic = true;
+ else if (mCommand == L"\\emph")
+ newState.mTextFont.mIsItalic = !newState.mTextFont.mIsItalic;
+ else if (mCommand == L"\\textbf")
+ newState.mTextFont.mIsBold = true;
+ else if (
+ mCommand == L"\\text" ||
+ mCommand == L"\\hbox" ||
+ mCommand == L"\\mbox" ||
+ mCommand == L"\\cyr" ||
+ mCommand == L"\\jap"
+ )
+ // do nothing!
+ { }
+ else
+ throw logic_error(
+ "Unexpected command in TextCommand1Arg::BuildLayoutTree"
+ );
+
+ return mChild->BuildLayoutTree(newState);
+}
+
+}
+}
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/BlahtexCore/ParseTree2.cpp b/blahtexml/source/BlahtexCore/ParseTree2.cpp
new file mode 100644
index 0000000..41523c2
--- /dev/null
+++ b/blahtexml/source/BlahtexCore/ParseTree2.cpp
@@ -0,0 +1,1089 @@
+// File "ParseTree2.cpp"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include <stdexcept>
+#include "ParseTree.h"
+
+using namespace std;
+
+namespace blahtex
+{
+
+pair<wstring, wchar_t> lowercaseGreekArray[] =
+{
+ make_pair(L"\\alpha", L'\U000003B1'),
+ make_pair(L"\\beta", L'\U000003B2'),
+ make_pair(L"\\gamma", L'\U000003B3'),
+ make_pair(L"\\delta", L'\U000003B4'),
+ make_pair(L"\\epsilon", L'\U000003F5'), // straightepsilon
+ make_pair(L"\\varepsilon", L'\U000003B5'), // varepsilon
+ make_pair(L"\\zeta", L'\U000003B6'),
+ make_pair(L"\\eta", L'\U000003B7'),
+ make_pair(L"\\theta", L'\U000003B8'),
+ make_pair(L"\\vartheta", L'\U000003D1'),
+ make_pair(L"\\iota", L'\U000003B9'),
+ make_pair(L"\\kappa", L'\U000003BA'),
+ make_pair(L"\\varkappa", L'\U000003F0'),
+ make_pair(L"\\lambda", L'\U000003BB'),
+ make_pair(L"\\mu", L'\U000003BC'),
+ make_pair(L"\\nu", L'\U000003BD'),
+ make_pair(L"\\pi", L'\U000003C0'),
+ make_pair(L"\\varpi", L'\U000003D6'),
+ make_pair(L"\\rho", L'\U000003C1'),
+ make_pair(L"\\varrho", L'\U000003F1'),
+ make_pair(L"\\sigma", L'\U000003C3'),
+ make_pair(L"\\varsigma", L'\U000003C2'),
+ make_pair(L"\\tau", L'\U000003C4'),
+ make_pair(L"\\upsilon", L'\U000003C5'),
+ make_pair(L"\\phi", L'\U000003D5'), // straightphi
+ make_pair(L"\\varphi", L'\U000003C6'),
+ make_pair(L"\\chi", L'\U000003C7'),
+ make_pair(L"\\psi", L'\U000003C8'),
+ make_pair(L"\\omega", L'\U000003C9'),
+ make_pair(L"\\xi", L'\U000003BE'),
+ make_pair(L"\\digamma", L'\U000003DD')
+};
+wishful_hash_map<wstring, wchar_t> lowercaseGreekTable(
+ lowercaseGreekArray,
+ END_ARRAY(lowercaseGreekArray)
+);
+
+
+pair<wstring, wchar_t> uppercaseGreekArray[] =
+{
+ make_pair(L"\\Gamma", L'\U00000393'),
+ make_pair(L"\\Delta", L'\U00000394'),
+ make_pair(L"\\Theta", L'\U00000398'),
+ make_pair(L"\\Lambda", L'\U0000039B'),
+ make_pair(L"\\Pi", L'\U000003A0'),
+ make_pair(L"\\Sigma", L'\U000003A3'),
+ make_pair(L"\\Upsilon", L'\U000003A5'),
+ make_pair(L"\\Phi", L'\U000003A6'),
+ make_pair(L"\\Psi", L'\U000003A8'),
+ make_pair(L"\\Omega", L'\U000003A9'),
+ make_pair(L"\\Xi", L'\U0000039E')
+};
+wishful_hash_map<wstring, wchar_t> uppercaseGreekTable(
+ uppercaseGreekArray,
+ END_ARRAY(uppercaseGreekArray)
+);
+
+
+pair<wstring, int> spaceArray[] =
+{
+ make_pair(L"\\!", -3),
+ make_pair(L"\\,", 3),
+ make_pair(L"\\>", 4),
+ make_pair(L"\\;", 5),
+ make_pair(L"\\quad", 18),
+ make_pair(L"\\qquad", 36),
+ // These last two aren't quite right, but hopefully they're close
+ // enough. TeX's rules are too complicated for me to care :-)
+ make_pair(L"~", 6),
+ make_pair(L"\\ ", 6)
+};
+wishful_hash_map<wstring, int> spaceTable(
+ spaceArray,
+ END_ARRAY(spaceArray)
+);
+
+
+struct OperatorInfo
+{
+ wstring mText;
+ LayoutTree::Node::Flavour mFlavour;
+ LayoutTree::Node::Limits mLimits;
+
+ OperatorInfo(
+ const wstring& text,
+ LayoutTree::Node::Flavour flavour,
+ LayoutTree::Node::Limits limits =
+ LayoutTree::Node::cLimitsDisplayLimits
+ ) :
+ mText(text),
+ mFlavour(flavour),
+ mLimits(limits)
+ { }
+};
+
+// Here is a list of all commands that get translated as operators,
+// together with their MathML translation and flavour.
+pair<wstring, OperatorInfo> operatorArray[] =
+{
+ make_pair(L"(", OperatorInfo(L"(", LayoutTree::Node::cFlavourOpen)),
+ make_pair(L")", OperatorInfo(L")", LayoutTree::Node::cFlavourClose)),
+ make_pair(L"[", OperatorInfo(L"[", LayoutTree::Node::cFlavourOpen)),
+ make_pair(L"]", OperatorInfo(L"]", LayoutTree::Node::cFlavourClose)),
+ make_pair(L"<", OperatorInfo(L"<", LayoutTree::Node::cFlavourRel)),
+ make_pair(L">", OperatorInfo(L">", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"+", OperatorInfo(L"+", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"-", OperatorInfo(L"-", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"=", OperatorInfo(L"=", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"|", OperatorInfo(L"|", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L";", OperatorInfo(L";", LayoutTree::Node::cFlavourPunct)),
+ make_pair(L":", OperatorInfo(L":", LayoutTree::Node::cFlavourRel)),
+ make_pair(L",", OperatorInfo(L",", LayoutTree::Node::cFlavourPunct)),
+ make_pair(L".", OperatorInfo(L".", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"/", OperatorInfo(L"/", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"?", OperatorInfo(L"?", LayoutTree::Node::cFlavourClose)),
+ make_pair(L"!", OperatorInfo(L"!", LayoutTree::Node::cFlavourClose)),
+ make_pair(L"@", OperatorInfo(L"@", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"*", OperatorInfo(L"*", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\_", OperatorInfo(L"_", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\&", OperatorInfo(L"&", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\$", OperatorInfo(L"$", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\#", OperatorInfo(L"#", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\%", OperatorInfo(L"%", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\{", OperatorInfo(L"{", LayoutTree::Node::cFlavourOpen)),
+ make_pair(L"\\}", OperatorInfo(L"}", LayoutTree::Node::cFlavourClose)),
+ make_pair(L"\\ast", OperatorInfo(L"*", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\lbrace", OperatorInfo(L"{", LayoutTree::Node::cFlavourOpen)),
+ make_pair(L"\\rbrace", OperatorInfo(L"}", LayoutTree::Node::cFlavourClose)),
+ make_pair(L"\\vert", OperatorInfo(L"|", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\lvert", OperatorInfo(L"|", LayoutTree::Node::cFlavourOpen)),
+ make_pair(L"\\rvert", OperatorInfo(L"|", LayoutTree::Node::cFlavourClose)),
+ make_pair(L"\\lbrack", OperatorInfo(L"[", LayoutTree::Node::cFlavourOpen)),
+ make_pair(L"\\rbrack", OperatorInfo(L"]", LayoutTree::Node::cFlavourClose)),
+ make_pair(L"\\Vert", OperatorInfo(L"\U00002225", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\lVert", OperatorInfo(L"\U00002225", LayoutTree::Node::cFlavourOpen)),
+ make_pair(L"\\rVert", OperatorInfo(L"\U00002225", LayoutTree::Node::cFlavourClose)),
+ make_pair(L"\\lfloor", OperatorInfo(L"\U0000230A", LayoutTree::Node::cFlavourOpen)),
+ make_pair(L"\\rfloor", OperatorInfo(L"\U0000230B", LayoutTree::Node::cFlavourClose)),
+ make_pair(L"\\lceil", OperatorInfo(L"\U00002308", LayoutTree::Node::cFlavourOpen)),
+ make_pair(L"\\rceil", OperatorInfo(L"\U00002309", LayoutTree::Node::cFlavourClose)),
+ make_pair(L"\\langle", OperatorInfo(L"\U00002329", LayoutTree::Node::cFlavourOpen)),
+ make_pair(L"\\rangle", OperatorInfo(L"\U0000232A", LayoutTree::Node::cFlavourClose)),
+ make_pair(L"\\forall", OperatorInfo(L"\U00002200", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\exists", OperatorInfo(L"\U00002203", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\leftarrow", OperatorInfo(L"\U00002190", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\rightarrow", OperatorInfo(L"\U00002192", LayoutTree::Node::cFlavourRel)),
+
+ // FIX: The first version below has the correct MathML characters.
+ // They seem to be missing in the fonts currently shipped with
+ // Firefox, so we just map them to their short counterparts (second
+ // version) for the moment.
+ // FIX: perhaps it's possible to do this with the "stretchy" attribute
+ // instead?
+#if 0
+ make_pair(L"\\longleftarrow", OperatorInfo(L"\U000027F5", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\longrightarrow", OperatorInfo(L"\U000027F6", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\Longleftarrow", OperatorInfo(L"\U000027F8", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\Longrightarrow", OperatorInfo(L"\U000027F9", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\longmapsto", OperatorInfo(L"\U000027FC", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\longleftrightarrow", OperatorInfo(L"\U000027F7", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\Longleftrightarrow", OperatorInfo(L"\U000027FA", LayoutTree::Node::cFlavourRel)),
+#else
+ make_pair(L"\\longleftarrow", OperatorInfo(L"\U00002190", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\longrightarrow", OperatorInfo(L"\U00002192", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\Longleftarrow", OperatorInfo(L"\U000021D0", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\Longrightarrow", OperatorInfo(L"\U000021D2", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\longmapsto", OperatorInfo(L"\U000021A6", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\longleftrightarrow", OperatorInfo(L"\U00002194", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\Longleftrightarrow", OperatorInfo(L"\U000021D4", LayoutTree::Node::cFlavourRel)),
+#endif
+
+ make_pair(L"\\Leftarrow", OperatorInfo(L"\U000021D0", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\Rightarrow", OperatorInfo(L"\U000021D2", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\mapsto", OperatorInfo(L"\U000021A6", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\leftrightarrow", OperatorInfo(L"\U00002194", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\Leftrightarrow", OperatorInfo(L"\U000021D4", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\uparrow", OperatorInfo(L"\U00002191", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\Uparrow", OperatorInfo(L"\U000021D1", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\downarrow", OperatorInfo(L"\U00002193", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\Downarrow", OperatorInfo(L"\U000021D3", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\updownarrow", OperatorInfo(L"\U00002195", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\Updownarrow", OperatorInfo(L"\U000021D5", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\searrow", OperatorInfo(L"\U00002198", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nearrow", OperatorInfo(L"\U00002197", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\swarrow", OperatorInfo(L"\U00002199", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nwarrow", OperatorInfo(L"\U00002196", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\hookrightarrow", OperatorInfo(L"\U000021AA", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\hookleftarrow", OperatorInfo(L"\U000021A9", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\upharpoonright", OperatorInfo(L"\U000021BE", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\upharpoonleft", OperatorInfo(L"\U000021BF", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\downharpoonright", OperatorInfo(L"\U000021C2", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\downharpoonleft", OperatorInfo(L"\U000021C3", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\rightharpoonup", OperatorInfo(L"\U000021C0", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\rightharpoondown", OperatorInfo(L"\U000021C1", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\leftharpoonup", OperatorInfo(L"\U000021BC", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\leftharpoondown", OperatorInfo(L"\U000021BD", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nleftarrow", OperatorInfo(L"\U0000219A", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nrightarrow", OperatorInfo(L"\U0000219B", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\supset", OperatorInfo(L"\U00002283", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\subset", OperatorInfo(L"\U00002282", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\supseteq", OperatorInfo(L"\U00002287", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\subseteq", OperatorInfo(L"\U00002286", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\sqsupset", OperatorInfo(L"\U00002290", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\sqsubset", OperatorInfo(L"\U0000228F", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\sqsupseteq", OperatorInfo(L"\U00002292", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\sqsubseteq", OperatorInfo(L"\U00002291", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\supsetneq", OperatorInfo(L"\U0000228B", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\subsetneq", OperatorInfo(L"\U0000228A", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\in", OperatorInfo(L"\U00002208", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\ni", OperatorInfo(L"\U0000220B", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\notin", OperatorInfo(L"\U00002209", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\mid", OperatorInfo(L"|", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\sim", OperatorInfo(L"\U0000223C", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\simeq", OperatorInfo(L"\U00002243", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\approx", OperatorInfo(L"\U00002248", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\propto", OperatorInfo(L"\U0000221D", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\equiv", OperatorInfo(L"\U00002261", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\cong", OperatorInfo(L"\U00002245", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\neq", OperatorInfo(L"\U00002260", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\ll", OperatorInfo(L"\U0000226A", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\gg", OperatorInfo(L"\U0000226B", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\geq", OperatorInfo(L"\U00002265", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\leq", OperatorInfo(L"\U00002264", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\triangleleft", OperatorInfo(L"\U000025C3", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\triangleright", OperatorInfo(L"\U000025B9", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\models", OperatorInfo(L"\U000022A7", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\vdash", OperatorInfo(L"\U000022A2", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\Vdash", OperatorInfo(L"\U000022A9", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\vDash", OperatorInfo(L"\U000022A8", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\lesssim", OperatorInfo(L"\U00002272", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nless", OperatorInfo(L"\U0000226E", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\ngeq", OperatorInfo(L"\U00002271", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nleq", OperatorInfo(L"\U00002270", LayoutTree::Node::cFlavourRel)),
+
+ // FIX: the fonts shipped with Firefox 1.5 don't know about
+ // 0x2a2f (&Cross;). So I'm mapping it to 0xd7 (&times;) for now.
+#if 0
+ make_pair(L"\\times", OperatorInfo(L"\U00002A2F", LayoutTree::Node::cFlavourBin)),
+#else
+ make_pair(L"\\times", OperatorInfo(L"\U000000D7", LayoutTree::Node::cFlavourBin)),
+#endif
+
+ make_pair(L"\\div", OperatorInfo(L"\U000000F7", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\wedge", OperatorInfo(L"\U00002227", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\vee", OperatorInfo(L"\U00002228", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\oplus", OperatorInfo(L"\U00002295", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\otimes", OperatorInfo(L"\U00002297", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\cap", OperatorInfo(L"\U00002229", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\cup", OperatorInfo(L"\U0000222A", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\sqcap", OperatorInfo(L"\U00002293", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\sqcup", OperatorInfo(L"\U00002294", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\smile", OperatorInfo(L"\U00002323", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\frown", OperatorInfo(L"\U00002322", LayoutTree::Node::cFlavourRel)),
+ // FIX: how to make these smiles/frowns smaller?
+ make_pair(L"\\smallsmile", OperatorInfo(L"\U00002323", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\smallfrown", OperatorInfo(L"\U00002322", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\setminus", OperatorInfo(L"\U00002216", LayoutTree::Node::cFlavourBin)),
+ // FIX: how to make smallsetminus smaller?
+ make_pair(L"\\smallsetminus", OperatorInfo(L"\U00002216", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\star", OperatorInfo(L"\U000022C6", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\triangle", OperatorInfo(L"\U000025B3", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\wr", OperatorInfo(L"\U00002240", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\circ", OperatorInfo(L"\U00002218", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\lnot", OperatorInfo(L"\U000000AC", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\nabla", OperatorInfo(L"\U00002207", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\prime", OperatorInfo(L"\U00002032", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\backslash", OperatorInfo(L"\U00002216", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\pm", OperatorInfo(L"\U000000B1", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\mp", OperatorInfo(L"\U00002213", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\angle", OperatorInfo(L"\U00002220", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\nmid", OperatorInfo(L"\U00002224", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\square", OperatorInfo(L"\U000025A1", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\Box", OperatorInfo(L"\U000025A1", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\checkmark", OperatorInfo(L"\U00002713", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\complement", OperatorInfo(L"\U00002201", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\flat", OperatorInfo(L"\U0000266D", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\sharp", OperatorInfo(L"\U0000266F", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\natural", OperatorInfo(L"\U0000266E", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\bullet", OperatorInfo(L"\U00002022", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\dagger", OperatorInfo(L"\U00002020", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\ddagger", OperatorInfo(L"\U00002021", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\clubsuit", OperatorInfo(L"\U00002663", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\spadesuit", OperatorInfo(L"\U00002660", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\heartsuit", OperatorInfo(L"\U00002665", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\diamondsuit", OperatorInfo(L"\U00002666", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\top", OperatorInfo(L"\U000022A4", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\bot", OperatorInfo(L"\U000022A5", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\perp", OperatorInfo(L"\U000022A5", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\cdot", OperatorInfo(L"\U000022C5", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\vdots", OperatorInfo(L"\U000022EE", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\ddots", OperatorInfo(L"\U000022F1", LayoutTree::Node::cFlavourInner)),
+ make_pair(L"\\cdots", OperatorInfo(L"\U000022EF", LayoutTree::Node::cFlavourInner)),
+ make_pair(L"\\ldots", OperatorInfo(L"\U00002026", LayoutTree::Node::cFlavourInner)),
+ // FIX: these next two aren't right. The amsmath package does tricky
+ // things so that the dots change their vertical position depending
+ // on the surrounding operators. We chicken out and just map them
+ // to the same as \cdots and \ldots respectively.
+ make_pair(L"\\dotsb", OperatorInfo(L"\U000022EF", LayoutTree::Node::cFlavourInner)),
+ make_pair(L"\\dots", OperatorInfo(L"\U00002026", LayoutTree::Node::cFlavourInner)),
+ make_pair(L"\\sum", OperatorInfo(L"\U00002211", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\prod", OperatorInfo(L"\U0000220F", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\int", OperatorInfo(L"\U0000222B", LayoutTree::Node::cFlavourOp, LayoutTree::Node::cLimitsNoLimits)),
+ make_pair(L"\\iint", OperatorInfo(L"\U0000222C", LayoutTree::Node::cFlavourOp, LayoutTree::Node::cLimitsNoLimits)),
+ make_pair(L"\\iiint", OperatorInfo(L"\U0000222D", LayoutTree::Node::cFlavourOp, LayoutTree::Node::cLimitsNoLimits)),
+ make_pair(L"\\iiiint", OperatorInfo(L"\U00002A0C", LayoutTree::Node::cFlavourOp, LayoutTree::Node::cLimitsNoLimits)),
+ make_pair(L"\\oint", OperatorInfo(L"\U0000222E", LayoutTree::Node::cFlavourOp, LayoutTree::Node::cLimitsNoLimits)),
+ make_pair(L"\\bigcap", OperatorInfo(L"\U000022C2", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\bigodot", OperatorInfo(L"\U00002A00", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\bigcup", OperatorInfo(L"\U000022C3", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\bigotimes", OperatorInfo(L"\U00002A02", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\coprod", OperatorInfo(L"\U00002210", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\bigsqcup", OperatorInfo(L"\U00002A06", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\bigoplus", OperatorInfo(L"\U00002A01", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\bigvee", OperatorInfo(L"\U000022C1", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\biguplus", OperatorInfo(L"\U00002A04", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\bigwedge", OperatorInfo(L"\U000022C0", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\ulcorner", OperatorInfo(L"\U0000231C", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\urcorner", OperatorInfo(L"\U0000231D", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\llcorner", OperatorInfo(L"\U0000231E", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\lrcorner", OperatorInfo(L"\U0000231F", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\dashrightarrow", OperatorInfo(L"\U0000290F", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\dashleftarrow", OperatorInfo(L"\U0000290E", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\backprime", OperatorInfo(L"\U00002035", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\vartriangle", OperatorInfo(L"\U000025B5", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\blacktriangle", OperatorInfo(L"\U000025B4", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\triangledown", OperatorInfo(L"\U000025BF", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\blacktriangledown", OperatorInfo(L"\U000025BE", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\blacksquare", OperatorInfo(L"\U000025FC", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\lozenge", OperatorInfo(L"\U000025CA", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\blacklozenge", OperatorInfo(L"\U000029EB", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\bigstar", OperatorInfo(L"\U00002605", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\sphericalangle", OperatorInfo(L"\U00002222", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\measuredangle", OperatorInfo(L"\U00002221", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\dotplus", OperatorInfo(L"\U00002214", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\ltimes", OperatorInfo(L"\U000022C9", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\rtimes", OperatorInfo(L"\U000022CA", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\Cap", OperatorInfo(L"\U000022D2", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\leftthreetimes", OperatorInfo(L"\U000022CB", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\rightthreetimes", OperatorInfo(L"\U000022CC", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\Cup", OperatorInfo(L"\U000022D3", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\barwedge", OperatorInfo(L"\U00002305", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\curlywedge", OperatorInfo(L"\U000022CF", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\veebar", OperatorInfo(L"\U000022BB", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\curlyvee", OperatorInfo(L"\U000022CE", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\doublebarwedge", OperatorInfo(L"\U00002306", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\boxminus", OperatorInfo(L"\U0000229F", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\circleddash", OperatorInfo(L"\U0000229D", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\boxtimes", OperatorInfo(L"\U000022A0", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\circledast", OperatorInfo(L"\U0000229B", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\boxdot", OperatorInfo(L"\U000022A1", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\circledcirc", OperatorInfo(L"\U0000229A", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\boxplus", OperatorInfo(L"\U0000229E", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\centerdot", OperatorInfo(L"\U000022C5", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\divideontimes", OperatorInfo(L"\U000022C7", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\intercal", OperatorInfo(L"\U000022BA", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\leqq", OperatorInfo(L"\U00002266", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\geqq", OperatorInfo(L"\U00002267", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\leqslant", OperatorInfo(L"\U00002A7D", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\geqslant", OperatorInfo(L"\U00002A7E", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\eqslantless", OperatorInfo(L"\U00002A95", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\eqslantgtr", OperatorInfo(L"\U00002A96", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\gtrsim", OperatorInfo(L"\U00002273", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\lessapprox", OperatorInfo(L"\U00002A85", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\gtrapprox", OperatorInfo(L"\U00002A86", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\approxeq", OperatorInfo(L"\U0000224A", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\eqsim", OperatorInfo(L"\U00002242", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\lessdot", OperatorInfo(L"\U000022D6", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\gtrdot", OperatorInfo(L"\U000022D7", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\lll", OperatorInfo(L"\U000022D8", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\ggg", OperatorInfo(L"\U000022D9", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\lessgtr", OperatorInfo(L"\U00002276", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\gtrless", OperatorInfo(L"\U00002277", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\lesseqgtr", OperatorInfo(L"\U000022DA", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\gtreqless", OperatorInfo(L"\U000022DB", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\lesseqqgtr", OperatorInfo(L"\U00002A8B", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\gtreqqless", OperatorInfo(L"\U00002A8C", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\doteqdot", OperatorInfo(L"\U00002251", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\eqcirc", OperatorInfo(L"\U00002256", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\risingdotseq", OperatorInfo(L"\U00002253", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\circeq", OperatorInfo(L"\U00002257", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\fallingdotseq", OperatorInfo(L"\U00002252", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\triangleq", OperatorInfo(L"\U0000225C", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\backsim", OperatorInfo(L"\U0000223D", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\thicksim", OperatorInfo(L"\U0000223C", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\backsimeq", OperatorInfo(L"\U000022CD", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\thickapprox", OperatorInfo(L"\U00002248", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\subseteqq", OperatorInfo(L"\U00002AC5", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\supseteqq", OperatorInfo(L"\U00002AC6", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\Subset", OperatorInfo(L"\U000022D0", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\Supset", OperatorInfo(L"\U000022D1", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\preccurlyeq", OperatorInfo(L"\U0000227C", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\succcurlyeq", OperatorInfo(L"\U0000227D", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\curlyeqprec", OperatorInfo(L"\U000022DE", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\curlyeqsucc", OperatorInfo(L"\U000022DF", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\precsim", OperatorInfo(L"\U0000227E", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\succsim", OperatorInfo(L"\U0000227F", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\precapprox", OperatorInfo(L"\U00002AB7", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\succapprox", OperatorInfo(L"\U00002AB8", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\Vvdash", OperatorInfo(L"\U000022AA", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\shortmid", OperatorInfo(L"\U00002223", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\shortparallel", OperatorInfo(L"\U00002225", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\bumpeq", OperatorInfo(L"\U0000224F", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\between", OperatorInfo(L"\U0000226C", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\Bumpeq", OperatorInfo(L"\U0000224E", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\varpropto", OperatorInfo(L"\U0000221D", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\backepsilon", OperatorInfo(L"\U000003F6", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\blacktriangleleft", OperatorInfo(L"\U000025C0", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\blacktriangleright", OperatorInfo(L"\U000025B6", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\therefore", OperatorInfo(L"\U00002234", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\because", OperatorInfo(L"\U00002235", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\ngtr", OperatorInfo(L"\U0000226F", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nleqslant", OperatorInfo(L"\U00002A7D\U00000338", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\ngeqslant", OperatorInfo(L"\U00002A7E\U00000338", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nleqq", OperatorInfo(L"\U00002266\U00000338", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\ngeqq", OperatorInfo(L"\U00002267\U00000338", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\lneqq", OperatorInfo(L"\U00002268", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\gneqq", OperatorInfo(L"\U00002269", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\lvertneqq", OperatorInfo(L"\U00002268\U0000FE00", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\gvertneqq", OperatorInfo(L"\U00002269\U0000FE00", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\lnsim", OperatorInfo(L"\U000022E6", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\gnsim", OperatorInfo(L"\U000022E7", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\lnapprox", OperatorInfo(L"\U00002A89", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\gnapprox", OperatorInfo(L"\U00002A8A", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nprec", OperatorInfo(L"\U00002280", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nsucc", OperatorInfo(L"\U00002281", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\npreceq", OperatorInfo(L"\U00002AAF\U00000338", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nsucceq", OperatorInfo(L"\U00002AB0\U00000338", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\precneqq", OperatorInfo(L"\U00002AB5", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\succneqq", OperatorInfo(L"\U00002AB6", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\precnsim", OperatorInfo(L"\U000022E8", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\succnsim", OperatorInfo(L"\U000022E9", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\precnapprox", OperatorInfo(L"\U00002AB9", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\succnapprox", OperatorInfo(L"\U00002ABA", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nsim", OperatorInfo(L"\U00002241", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\ncong", OperatorInfo(L"\U00002247", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nshortmid", OperatorInfo(L"\U00002224", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nshortparallel", OperatorInfo(L"\U00002226", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nmid", OperatorInfo(L"\U00002224", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nparallel", OperatorInfo(L"\U00002226", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nvdash", OperatorInfo(L"\U000022AC", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nvDash", OperatorInfo(L"\U000022AD", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nVdash", OperatorInfo(L"\U000022AE", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nVDash", OperatorInfo(L"\U000022AF", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\ntriangleleft", OperatorInfo(L"\U000022EA", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\ntriangleright", OperatorInfo(L"\U000022EB", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\ntrianglelefteq", OperatorInfo(L"\U000022EC", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\ntrianglerighteq", OperatorInfo(L"\U000022ED", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nsubseteq", OperatorInfo(L"\U00002288", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nsupseteq", OperatorInfo(L"\U00002289", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nsubseteqq", OperatorInfo(L"\U00002AC5\U00000338", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nsupseteqq", OperatorInfo(L"\U00002AC6\U00000338", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\subsetneq", OperatorInfo(L"\U0000228A", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\supsetneq", OperatorInfo(L"\U0000228B", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\varsubsetneq", OperatorInfo(L"\U0000228A\U0000FE00", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\varsupsetneq", OperatorInfo(L"\U0000228B\U0000FE00", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\subsetneqq", OperatorInfo(L"\U00002ACB", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\supsetneqq", OperatorInfo(L"\U00002ACC", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\varsubsetneqq", OperatorInfo(L"\U00002ACB\U0000FE00", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\varsupsetneqq", OperatorInfo(L"\U00002ACC\U0000FE00", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\leftleftarrows", OperatorInfo(L"\U000021C7", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\rightrightarrows", OperatorInfo(L"\U000021C9", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\leftrightarrows", OperatorInfo(L"\U000021C6", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\rightleftarrows", OperatorInfo(L"\U000021C4", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\Lleftarrow", OperatorInfo(L"\U000021DA", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\Rrightarrow", OperatorInfo(L"\U000021DB", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\twoheadleftarrow", OperatorInfo(L"\U0000219E", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\twoheadrightarrow", OperatorInfo(L"\U000021A0", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\leftarrowtail", OperatorInfo(L"\U000021A2", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\rightarrowtail", OperatorInfo(L"\U000021A3", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\looparrowleft", OperatorInfo(L"\U000021AB", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\looparrowright", OperatorInfo(L"\U000021AC", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\leftrightharpoons", OperatorInfo(L"\U000021CB", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\rightleftharpoons", OperatorInfo(L"\U000021CC", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\curvearrowleft", OperatorInfo(L"\U000021B6", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\curvearrowright", OperatorInfo(L"\U000021B7", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\circlearrowleft", OperatorInfo(L"\U000021BA", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\circlearrowright", OperatorInfo(L"\U000021BB", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\Lsh", OperatorInfo(L"\U000021B0", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\Rsh", OperatorInfo(L"\U000021B1", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\upuparrows", OperatorInfo(L"\U000021C8", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\downdownarrows", OperatorInfo(L"\U000021CA", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\multimap", OperatorInfo(L"\U000022B8", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\rightsquigarrow", OperatorInfo(L"\U0000219D", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\leftrightsquigarrow", OperatorInfo(L"\U000021AD", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nLeftarrow", OperatorInfo(L"\U000021CD", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nRightarrow", OperatorInfo(L"\U000021CF", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nleftrightarrow", OperatorInfo(L"\U000021AE", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nLeftrightarrow", OperatorInfo(L"\U000021CE", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\pitchfork", OperatorInfo(L"\U000022D4", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\nexists", OperatorInfo(L"\U00002204", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\lhd", OperatorInfo(L"\U000022B2", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\rhd", OperatorInfo(L"\U000022B3", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\unlhd", OperatorInfo(L"\U000022B4", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\unrhd", OperatorInfo(L"\U000022B5", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\leadsto", OperatorInfo(L"\U000021DD", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\uplus", OperatorInfo(L"\U0000228E", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\diamond", OperatorInfo(L"\U000022C4", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\bigtriangleup", OperatorInfo(L"\U000025B3", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\bigtriangledown", OperatorInfo(L"\U000025BD", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\ominus", OperatorInfo(L"\U00002296", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\oslash", OperatorInfo(L"\U00002298", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\odot", OperatorInfo(L"\U00002299", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\bigcirc", OperatorInfo(L"\U000025EF", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\amalg", OperatorInfo(L"\U00002A3F", LayoutTree::Node::cFlavourBin)),
+ make_pair(L"\\prec", OperatorInfo(L"\U0000227A", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\succ", OperatorInfo(L"\U0000227B", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\preceq", OperatorInfo(L"\U00002AAF", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\succeq", OperatorInfo(L"\U00002AB0", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\dashv", OperatorInfo(L"\U000022A3", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\asymp", OperatorInfo(L"\U00002248", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\doteq", OperatorInfo(L"\U00002250", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\parallel", OperatorInfo(L"\U00002225", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\bowtie", OperatorInfo(L"\U000022C8", LayoutTree::Node::cFlavourRel)),
+ make_pair(L"\\surd", OperatorInfo(L"\U0000221A", LayoutTree::Node::cFlavourOrd)),
+
+ make_pair(L"\\lim", OperatorInfo(L"lim", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\sup", OperatorInfo(L"sup", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\inf", OperatorInfo(L"inf", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\min", OperatorInfo(L"min", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\max", OperatorInfo(L"max", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\gcd", OperatorInfo(L"gcd", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\det", OperatorInfo(L"det", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\Pr", OperatorInfo(L"Pr", LayoutTree::Node::cFlavourOp)),
+ // FIX: the space between the words in these operators is maybe a tiny bit too big.
+ make_pair(L"\\limsup", OperatorInfo(L"lim sup", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\liminf", OperatorInfo(L"lim inf", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\injlim", OperatorInfo(L"inj lim", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\projlim", OperatorInfo(L"proj lim", LayoutTree::Node::cFlavourOp)),
+
+ // The translation of \not is special: we record it as a SymbolOperator
+ // in the layout tree, but it gets special handling later.
+ make_pair(L"\\not", OperatorInfo(L"NOT", LayoutTree::Node::cFlavourRel))
+};
+wishful_hash_map<wstring, OperatorInfo> operatorTable(
+ operatorArray,
+ END_ARRAY(operatorArray)
+);
+
+
+struct IdentifierInfo
+{
+ bool mIsItalicDefault;
+ wstring mText;
+ LayoutTree::Node::Flavour mFlavour;
+
+ IdentifierInfo(
+ bool isItalicDefault,
+ const wstring& text,
+ LayoutTree::Node::Flavour flavour
+ ) :
+ mIsItalicDefault(isItalicDefault),
+ mText(text),
+ mFlavour(flavour)
+ { }
+};
+
+// A list of all commands that get translated as identifiers,
+// their MathML translations, flavour, and whether they should be
+// rendered in italic font.
+pair<wstring, IdentifierInfo> identifierArray[] =
+{
+ make_pair(L"\\ker", IdentifierInfo(false, L"ker", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\deg", IdentifierInfo(false, L"deg", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\hom", IdentifierInfo(false, L"hom", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\dim", IdentifierInfo(false, L"dim", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\arg", IdentifierInfo(false, L"arg", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\sin", IdentifierInfo(false, L"sin", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\cos", IdentifierInfo(false, L"cos", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\sec", IdentifierInfo(false, L"sec", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\csc", IdentifierInfo(false, L"csc", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\tan", IdentifierInfo(false, L"tan", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\cot", IdentifierInfo(false, L"cot", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\arcsin", IdentifierInfo(false, L"arcsin", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\arccos", IdentifierInfo(false, L"arccos", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\arctan", IdentifierInfo(false, L"arctan", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\sinh", IdentifierInfo(false, L"sinh", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\cosh", IdentifierInfo(false, L"cosh", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\tanh", IdentifierInfo(false, L"tanh", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\coth", IdentifierInfo(false, L"coth", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\log", IdentifierInfo(false, L"log", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\lg", IdentifierInfo(false, L"lg", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\ln", IdentifierInfo(false, L"ln", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\exp", IdentifierInfo(false, L"exp", LayoutTree::Node::cFlavourOp)),
+ make_pair(L"\\aleph", IdentifierInfo(false, L"\U00002135", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\beth", IdentifierInfo(false, L"\U00002136", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\gimel", IdentifierInfo(false, L"\U00002137", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\daleth", IdentifierInfo(false, L"\U00002138", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\wp", IdentifierInfo(true, L"\U00002118", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\ell", IdentifierInfo(true, L"\U00002113", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\P", IdentifierInfo(true, L"\U000000B6", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\imath", IdentifierInfo(true, L"\U00000131", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\Finv", IdentifierInfo(false, L"\U00002132", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\Game", IdentifierInfo(false, L"\U00002141", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\partial", IdentifierInfo(false, L"\U00002202", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\Re", IdentifierInfo(false, L"\U0000211C", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\Im", IdentifierInfo(false, L"\U00002111", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\infty", IdentifierInfo(false, L"\U0000221E", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\hbar", IdentifierInfo(false, L"\U00000127", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\emptyset", IdentifierInfo(false, L"\U00002205", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\varnothing", IdentifierInfo(false, L"\U000000D8", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\S", IdentifierInfo(false, L"\U000000A7", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\eth", IdentifierInfo(false, L"\U000000F0", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\hslash", IdentifierInfo(false, L"\U0000210F", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\mho", IdentifierInfo(false, L"\U00002127", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\circledR", IdentifierInfo(false, L"\U000000AE", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\yen", IdentifierInfo(false, L"\U000000A5", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\maltese", IdentifierInfo(false, L"\U00002720", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\circledS", IdentifierInfo(false, L"\U000024C8", LayoutTree::Node::cFlavourOrd)),
+ // FIX: these two needs special testing since they're plane-1:
+ // FIX: need to update mediawiki to recognise these entities
+ make_pair(L"\\Bbbk", IdentifierInfo(false, L"\U0001D55C", LayoutTree::Node::cFlavourOrd)),
+ make_pair(L"\\jmath", IdentifierInfo(true, L"\U0001D6A5", LayoutTree::Node::cFlavourOrd))
+};
+wishful_hash_map<wstring, IdentifierInfo> identifierTable(
+ identifierArray,
+ END_ARRAY(identifierArray)
+);
+
+
+namespace ParseTree
+{
+
+auto_ptr<LayoutTree::Node> MathSymbol::BuildLayoutTree(
+ const TexProcessingState& state
+) const
+{
+ // First check for certain easy-to-handle single character commands,
+ // like letters or numerals.
+ if (mCommand.size() == 1)
+ {
+ bool good = false;
+ bool isNumber = false;
+ // fancyFontsIllegal is set for characters which can't be
+ // displayed in frak, cal or bb fonts.
+ bool fancyFontsIllegal = false;
+ TexMathFont::Family defaultFamily = TexMathFont::cFamilyIt;
+ TexMathFont font = state.mMathFont;
+
+ if (mCommand[0] >= L'A' && mCommand[0] <= L'Z')
+ good = true;
+
+ else if (mCommand[0] >= L'a' && mCommand[0] <= L'z')
+ {
+ fancyFontsIllegal = true;
+ good = true;
+ }
+
+ else if (mCommand[0] >= L'0' && mCommand[0] <= L'9')
+ {
+ fancyFontsIllegal = true;
+ defaultFamily = TexMathFont::cFamilyRm;
+ good = isNumber = true;
+ }
+
+ if (good)
+ {
+ if (font.mFamily == TexMathFont::cFamilyDefault)
+ font.mFamily = defaultFamily;
+
+ if (fancyFontsIllegal &&
+ font.mFamily == TexMathFont::cFamilyCal
+ )
+ throw Exception(
+ L"UnavailableSymbolFontCombination", mCommand, L"cal"
+ );
+
+ if (fancyFontsIllegal &&
+ font.mFamily == TexMathFont::cFamilyBb
+ )
+ throw Exception(
+ L"UnavailableSymbolFontCombination", mCommand, L"bb"
+ );
+
+ if (isNumber)
+ return auto_ptr<LayoutTree::Node>(
+ new LayoutTree::SymbolNumber(
+ mCommand,
+ font.GetMathmlApproximation(),
+ state.mStyle,
+ LayoutTree::Node::cFlavourOrd,
+ LayoutTree::Node::cLimitsDisplayLimits,
+ state.mColour
+ )
+ );
+ else
+ return auto_ptr<LayoutTree::Node>(
+ new LayoutTree::SymbolIdentifier(
+ mCommand,
+ font.GetMathmlApproximation(),
+ state.mStyle,
+ LayoutTree::Node::cFlavourOrd,
+ LayoutTree::Node::cLimitsDisplayLimits,
+ state.mColour
+ )
+ );
+ }
+
+ // Non-ascii characters
+ if (mCommand[0] > 0x7F)
+ throw logic_error(
+ "Unexpected non-ASCII character "
+ "in MathSymbol::BuildLayoutTree"
+ );
+ }
+
+ wishful_hash_map<wstring, wchar_t>::const_iterator
+ lowercaseGreekLookup = lowercaseGreekTable.find(mCommand);
+
+ if (lowercaseGreekLookup != lowercaseGreekTable.end())
+ {
+ return auto_ptr<LayoutTree::Node>(
+ new LayoutTree::SymbolIdentifier(
+ wstring(1, lowercaseGreekLookup->second),
+ // lowercase greek is only affected by the boldsymbol
+ // status, not the family.
+ state.mMathFont.mIsBoldsymbol
+ ? cMathmlFontBoldItalic : cMathmlFontItalic,
+ state.mStyle,
+ LayoutTree::Node::cFlavourOrd,
+ LayoutTree::Node::cLimitsDisplayLimits,
+ state.mColour
+ )
+ );
+ }
+
+ wishful_hash_map<wstring, wchar_t>::const_iterator
+ uppercaseGreekLookup = uppercaseGreekTable.find(mCommand);
+
+ if (uppercaseGreekLookup != uppercaseGreekTable.end())
+ {
+ TexMathFont font = state.mMathFont;
+ if (font.mFamily == TexMathFont::cFamilyCal)
+ throw Exception(
+ L"UnavailableSymbolFontCombination", mCommand, L"cal"
+ );
+
+ if (font.mFamily == TexMathFont::cFamilyBb)
+ throw Exception(
+ L"UnavailableSymbolFontCombination", mCommand, L"bb"
+ );
+
+ if (font.mFamily == TexMathFont::cFamilyFrak)
+ throw Exception(
+ L"UnavailableSymbolFontCombination", mCommand, L"frak"
+ );
+
+ if (font.mFamily == TexMathFont::cFamilyDefault)
+ font.mFamily = TexMathFont::cFamilyRm;
+
+ return auto_ptr<LayoutTree::Node>(
+ new LayoutTree::SymbolIdentifier(
+ wstring(1, uppercaseGreekLookup->second),
+ font.GetMathmlApproximation(),
+ state.mStyle,
+ LayoutTree::Node::cFlavourOrd,
+ LayoutTree::Node::cLimitsDisplayLimits,
+ state.mColour
+ )
+ );
+ }
+
+ wishful_hash_map<wstring, int>::const_iterator
+ spaceLookup = spaceTable.find(mCommand);
+
+ if (spaceLookup != spaceTable.end())
+ {
+ return auto_ptr<LayoutTree::Node>(
+ new LayoutTree::Space(
+ spaceLookup->second,
+ true // true = indicates a user-requested space
+ )
+ );
+ }
+
+ wishful_hash_map<wstring, OperatorInfo>::const_iterator
+ operatorLookup = operatorTable.find(mCommand);
+
+ if (operatorLookup != operatorTable.end())
+ {
+ return auto_ptr<LayoutTree::Node>(
+ new LayoutTree::SymbolOperator(
+ false, L"", // not stretchy
+ false, // not an accent
+ operatorLookup->second.mText,
+ // operators are only affected by the boldsymbol status,
+ // not the family.
+ state.mMathFont.mIsBoldsymbol
+ ? cMathmlFontBold : cMathmlFontNormal,
+ state.mStyle,
+ operatorLookup->second.mFlavour,
+ operatorLookup->second.mLimits,
+ state.mColour
+ )
+ );
+ }
+
+ wishful_hash_map<wstring, IdentifierInfo>::const_iterator
+ identifierLookup = identifierTable.find(mCommand);
+
+ if (identifierLookup != identifierTable.end())
+ {
+ TexMathFont font = state.mMathFont;
+ font.mFamily =
+ identifierLookup->second.mIsItalicDefault
+ ? TexMathFont::cFamilyIt : TexMathFont::cFamilyRm;
+
+ return auto_ptr<LayoutTree::Node>(
+ new LayoutTree::SymbolIdentifier(
+ identifierLookup->second.mText,
+ font.GetMathmlApproximation(),
+ state.mStyle,
+ identifierLookup->second.mFlavour,
+ // For all the "\sin"-like functions:
+ (
+ identifierLookup->second.mFlavour ==
+ LayoutTree::Node::cFlavourOp
+ )
+ ? LayoutTree::Node::cLimitsNoLimits
+ : LayoutTree::Node::cLimitsDisplayLimits,
+ state.mColour
+ )
+ );
+ }
+
+ if (mCommand == L"\\And")
+ {
+ auto_ptr<LayoutTree::Row> row(
+ new LayoutTree::Row(state.mStyle, state.mColour)
+ );
+ row->mFlavour = LayoutTree::Node::cFlavourRel;
+ row->mChildren.push_back(new LayoutTree::Space(5, true));
+ row->mChildren.push_back(
+ new LayoutTree::SymbolOperator(
+ false,
+ L"",
+ false,
+ L"&",
+ state.mMathFont.mIsBoldsymbol
+ ? cMathmlFontBold : cMathmlFontNormal,
+ state.mStyle,
+ LayoutTree::Node::cFlavourOrd,
+ LayoutTree::Node::cLimitsDisplayLimits,
+ state.mColour
+ )
+ );
+ row->mChildren.push_back(new LayoutTree::Space(5, true));
+ return static_cast<auto_ptr<LayoutTree::Node> >(row);
+ }
+
+ if (mCommand == L"\\iff")
+ {
+ auto_ptr<LayoutTree::Row> row(
+ new LayoutTree::Row(state.mStyle, state.mColour)
+ );
+ row->mFlavour = LayoutTree::Node::cFlavourRel;
+ row->mChildren.push_back(new LayoutTree::Space(5, true));
+ // FIX: I would like to make this stretchy and set a particular
+ // size, but for some reason firefox doesn't stretch things
+ // horizontally like this. It DOES do it if the element is in a
+ // <mover> or <munder> etc, but not when it's just by itself.
+ // Very strange. This is mozilla bug 320303.
+ row->mChildren.push_back(
+ new LayoutTree::SymbolOperator(
+ false,
+ L"",
+ false,
+ L"\U000021D4",
+ state.mMathFont.mIsBoldsymbol
+ ? cMathmlFontBold : cMathmlFontNormal,
+ state.mStyle,
+ LayoutTree::Node::cFlavourOrd,
+ LayoutTree::Node::cLimitsDisplayLimits,
+ state.mColour
+ )
+ );
+ row->mChildren.push_back(new LayoutTree::Space(5, true));
+ return static_cast<auto_ptr<LayoutTree::Node> >(row);
+ }
+
+ if (mCommand == L"\\colon")
+ {
+ // FIX: this spacing stuff isn't quite right, but it will hopefully
+ // do. The amsmath package does all kinds of interesting things with
+ // \colon's spacing.
+ auto_ptr<LayoutTree::Row> row(
+ new LayoutTree::Row(state.mStyle, state.mColour)
+ );
+ row->mChildren.push_back(new LayoutTree::Space(2, true));
+ row->mChildren.push_back(
+ new LayoutTree::SymbolOperator(
+ false,
+ L"",
+ false,
+ L":",
+ state.mMathFont.mIsBoldsymbol
+ ? cMathmlFontBold : cMathmlFontNormal,
+ state.mStyle,
+ LayoutTree::Node::cFlavourOrd,
+ LayoutTree::Node::cLimitsDisplayLimits,
+ state.mColour
+ )
+ );
+ row->mChildren.push_back(new LayoutTree::Space(6, true));
+ return static_cast<auto_ptr<LayoutTree::Node> >(row);
+ }
+
+ if (mCommand == L"\\bmod")
+ {
+ auto_ptr<LayoutTree::Row> row(
+ new LayoutTree::Row(state.mStyle, state.mColour)
+ );
+ row->mFlavour = LayoutTree::Node::cFlavourBin;
+ row->mChildren.push_back(new LayoutTree::Space(1, true));
+ row->mChildren.push_back(
+ new LayoutTree::SymbolOperator(
+ false,
+ L"",
+ false,
+ L"mod",
+ state.mMathFont.mIsBoldsymbol
+ ? cMathmlFontBold : cMathmlFontNormal,
+ state.mStyle,
+ LayoutTree::Node::cFlavourOrd,
+ LayoutTree::Node::cLimitsDisplayLimits,
+ state.mColour
+ )
+ );
+ row->mChildren.push_back(new LayoutTree::Space(1, true));
+ return static_cast<auto_ptr<LayoutTree::Node> >(row);
+ }
+
+ if (mCommand == L"\\mod")
+ {
+ auto_ptr<LayoutTree::Row> row(
+ new LayoutTree::Row(state.mStyle, state.mColour)
+ );
+ row->mChildren.push_back(new LayoutTree::Space(18, true));
+ row->mChildren.push_back(
+ new LayoutTree::SymbolOperator(
+ false,
+ L"",
+ false,
+ L"mod",
+ state.mMathFont.mIsBoldsymbol
+ ? cMathmlFontBold : cMathmlFontNormal,
+ state.mStyle,
+ LayoutTree::Node::cFlavourOrd,
+ LayoutTree::Node::cLimitsDisplayLimits,
+ state.mColour
+ )
+ );
+ row->mChildren.push_back(new LayoutTree::Space(6, true));
+ return static_cast<auto_ptr<LayoutTree::Node> >(row);
+ }
+
+ if (mCommand == L"\\varinjlim" || mCommand == L"\\varprojlim" ||
+ mCommand == L"\\varlimsup" || mCommand == L"\\varliminf")
+ {
+ MathmlFont font =
+ state.mMathFont.mIsBoldsymbol
+ ? cMathmlFontBold : cMathmlFontNormal;
+
+ auto_ptr<LayoutTree::Node> base(
+ new LayoutTree::SymbolOperator(
+ false,
+ L"",
+ false,
+ L"lim",
+ font,
+ state.mStyle,
+ LayoutTree::Node::cFlavourOp,
+ LayoutTree::Node::cLimitsLimits,
+ state.mColour
+ )
+ );
+
+ auto_ptr<LayoutTree::Scripts> node(
+ new LayoutTree::Scripts(
+ state.mStyle,
+ LayoutTree::Node::cFlavourOp,
+ LayoutTree::Node::cLimitsDisplayLimits,
+ state.mColour,
+ false,
+ base,
+ auto_ptr<LayoutTree::Node>(),
+ auto_ptr<LayoutTree::Node>()
+ )
+ );
+
+ if (mCommand == L"\\varinjlim")
+ node->mLower = auto_ptr<LayoutTree::Node>(
+ new LayoutTree::SymbolOperator(
+ false,
+ L"",
+ true,
+ L"\U00002192",
+ font,
+ state.mStyle,
+ LayoutTree::Node::cFlavourOrd,
+ LayoutTree::Node::cLimitsDisplayLimits,
+ state.mColour
+ )
+ );
+
+ else if (mCommand == L"\\varprojlim")
+ node->mLower = auto_ptr<LayoutTree::Node>(
+ new LayoutTree::SymbolOperator(
+ false,
+ L"",
+ true,
+ L"\U00002190",
+ font,
+ state.mStyle,
+ LayoutTree::Node::cFlavourOrd,
+ LayoutTree::Node::cLimitsDisplayLimits,
+ state.mColour
+ )
+ );
+
+ else if (mCommand == L"\\varliminf")
+ node->mLower = auto_ptr<LayoutTree::Node>(
+ new LayoutTree::SymbolOperator(
+ true,
+ L"",
+ true,
+ L"\U000000AF",
+ font,
+ state.mStyle,
+ LayoutTree::Node::cFlavourOrd,
+ LayoutTree::Node::cLimitsDisplayLimits,
+ state.mColour
+ )
+ );
+
+ else if (mCommand == L"\\varlimsup")
+ node->mUpper = auto_ptr<LayoutTree::Node>(
+ new LayoutTree::SymbolOperator(
+ true,
+ L"",
+ true,
+ L"\U000000AF",
+ font,
+ state.mStyle,
+ LayoutTree::Node::cFlavourOrd,
+ LayoutTree::Node::cLimitsDisplayLimits,
+ state.mColour
+ )
+ );
+
+ return static_cast<auto_ptr<LayoutTree::Node> >(node);
+ }
+
+ throw logic_error("Unexpected command in MathSymbol::BuildLayoutTree");
+}
+
+}
+}
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/BlahtexCore/ParseTree3.cpp b/blahtexml/source/BlahtexCore/ParseTree3.cpp
new file mode 100644
index 0000000..88812e0
--- /dev/null
+++ b/blahtexml/source/BlahtexCore/ParseTree3.cpp
@@ -0,0 +1,1317 @@
+// File "ParseTree3.cpp"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include <stdexcept>
+#include <set>
+#include <iomanip>
+#include <sstream>
+#include "ParseTree.h"
+
+using namespace std;
+
+namespace blahtex
+{
+
+// List of colour names that we know about.
+pair<wstring, RGBColour> gColourArray[] =
+{
+ make_pair(L"GreenYellow", 0xd8ff4f),
+ make_pair(L"Yellow", 0xffff00),
+ make_pair(L"yellow", 0xffff00),
+ make_pair(L"Goldenrod", 0xffe528),
+ make_pair(L"Dandelion", 0xffb528),
+ make_pair(L"Apricot", 0xffad7a),
+ make_pair(L"Peach", 0xff7f4c),
+ make_pair(L"Melon", 0xff897f),
+ make_pair(L"YellowOrange", 0xff9300),
+ make_pair(L"Orange", 0xff6321),
+ make_pair(L"BurntOrange", 0xff7c00),
+ make_pair(L"Bittersweet", 0xc10200),
+ make_pair(L"RedOrange", 0xff3a21),
+ make_pair(L"Mahogany", 0xa50000),
+ make_pair(L"Maroon", 0xad0000),
+ make_pair(L"BrickRed", 0xb70000),
+ make_pair(L"Red", 0xff0000),
+ make_pair(L"red", 0xff0000),
+ make_pair(L"OrangeRed", 0xff007f),
+ make_pair(L"RubineRed", 0xff00dd),
+ make_pair(L"WildStrawberry", 0xff0a9b),
+ make_pair(L"Salmon", 0xff779e),
+ make_pair(L"CarnationPink", 0xff5eff),
+ make_pair(L"Magenta", 0xff00ff),
+ make_pair(L"magenta", 0xff00ff),
+ make_pair(L"VioletRed", 0xff30ff),
+ make_pair(L"Rhodamine", 0xff2dff),
+ make_pair(L"Mulberry", 0xa314f9),
+ make_pair(L"RedViolet", 0x9600a8),
+ make_pair(L"Fuchsia", 0x7202ea),
+ make_pair(L"Lavender", 0xff84ff),
+ make_pair(L"Thistle", 0xe068ff),
+ make_pair(L"Orchid", 0xad5bff),
+ make_pair(L"DarkOrchid", 0x9933cc),
+ make_pair(L"Purple", 0x8c23ff),
+ make_pair(L"Plum", 0x7f00ff),
+ make_pair(L"Violet", 0x351eff),
+ make_pair(L"RoyalPurple", 0x3f19ff),
+ make_pair(L"BlueViolet", 0x190cf4),
+ make_pair(L"Periwinkle", 0x6d72ff),
+ make_pair(L"CadetBlue", 0x606dc4),
+ make_pair(L"CornflowerBlue", 0x59ddff),
+ make_pair(L"MidnightBlue", 0x007091),
+ make_pair(L"NavyBlue", 0x0f75ff),
+ make_pair(L"RoyalBlue", 0x007fff),
+ make_pair(L"Blue", 0x0000ff),
+ make_pair(L"blue", 0x0000ff),
+ make_pair(L"Cerulean", 0x0fe2ff),
+ make_pair(L"Cyan", 0x00ffff),
+ make_pair(L"cyan", 0x00ffff),
+ make_pair(L"ProcessBlue", 0x0affff),
+ make_pair(L"SkyBlue", 0x60ffe0),
+ make_pair(L"Turquoise", 0x26ffcc),
+ make_pair(L"TealBlue", 0x1ef9a3),
+ make_pair(L"Aquamarine", 0x2dffb2),
+ make_pair(L"BlueGreen", 0x26ffaa),
+ make_pair(L"Emerald", 0x00ff7f),
+ make_pair(L"JungleGreen", 0x02ff7a),
+ make_pair(L"SeaGreen", 0x4fff7f),
+ make_pair(L"Green", 0x00ff00),
+ make_pair(L"green", 0x00ff00),
+ make_pair(L"ForestGreen", 0x00e000),
+ make_pair(L"PineGreen", 0x00bf28),
+ make_pair(L"LimeGreen", 0x7fff00),
+ make_pair(L"YellowGreen", 0x8eff42),
+ make_pair(L"SpringGreen", 0xbcff3d),
+ make_pair(L"OliveGreen", 0x009900),
+ make_pair(L"RawSienna", 0x8c0000),
+ make_pair(L"Sepia", 0x4c0000),
+ make_pair(L"Brown", 0x660000),
+ make_pair(L"Tan", 0xdb9370),
+ make_pair(L"Gray", 0x7f7f7f),
+ make_pair(L"Black", 0x000000),
+ make_pair(L"black", 0x000000),
+ make_pair(L"White", 0xffffff),
+ make_pair(L"white", 0xffffff)
+};
+
+wishful_hash_map<wstring, RGBColour> gColourTable(
+ gColourArray,
+ END_ARRAY(gColourArray)
+);
+
+
+MathmlFont TexMathFont::GetMathmlApproximation() const
+{
+ if (mIsBoldsymbol)
+ {
+ switch (mFamily)
+ {
+ case cFamilyRm: return cMathmlFontBold;
+ case cFamilyIt: return cMathmlFontBoldItalic;
+ case cFamilyBf: return cMathmlFontBold;
+ case cFamilyBb: return cMathmlFontDoubleStruck;
+ case cFamilySf: return cMathmlFontBoldSansSerif;
+ case cFamilyCal: return cMathmlFontBoldScript;
+ case cFamilyTt: return cMathmlFontMonospace;
+ case cFamilyFrak: return cMathmlFontBoldFraktur;
+ }
+ }
+ else
+ {
+ switch (mFamily)
+ {
+ case cFamilyRm: return cMathmlFontNormal;
+ case cFamilyIt: return cMathmlFontItalic;
+ case cFamilyBf: return cMathmlFontBold;
+ case cFamilyBb: return cMathmlFontDoubleStruck;
+ case cFamilySf: return cMathmlFontSansSerif;
+ case cFamilyCal: return cMathmlFontScript;
+ case cFamilyTt: return cMathmlFontMonospace;
+ case cFamilyFrak: return cMathmlFontFraktur;
+ }
+ }
+
+ throw logic_error("Unexpected TexMathFont data");
+}
+
+MathmlFont TexTextFont::GetMathmlApproximation() const
+{
+ switch (mFamily)
+ {
+ case cFamilyRm:
+ return mIsBold
+ ? (mIsItalic ? cMathmlFontBoldItalic : cMathmlFontBold)
+ : (mIsItalic ? cMathmlFontItalic : cMathmlFontNormal);
+
+ case cFamilySf:
+ return mIsBold
+ ? (
+ mIsItalic
+ ? cMathmlFontSansSerifBoldItalic
+ : cMathmlFontBoldSansSerif
+ )
+ : (
+ mIsItalic
+ ? cMathmlFontSansSerifItalic
+ : cMathmlFontSansSerif
+ );
+
+ case cFamilyTt: return cMathmlFontMonospace;
+ }
+
+ throw logic_error("Unexpected TexTextFont data");
+}
+
+
+namespace ParseTree
+{
+
+// A couple of destructors that implement ownership conventions.
+
+MathList::~MathList()
+{
+ for (vector<MathNode*>::iterator
+ p = mChildren.begin(); p != mChildren.end(); p++
+ )
+ delete *p;
+}
+
+MathTableRow::~MathTableRow()
+{
+ for (vector<MathNode*>::iterator
+ p = mEntries.begin(); p != mEntries.end(); p++
+ )
+ delete *p;
+}
+
+MathTable::~MathTable()
+{
+ for (vector<MathTableRow*>::iterator
+ p = mRows.begin(); p != mRows.end(); p++
+ )
+ delete *p;
+}
+
+TextList::~TextList()
+{
+ for (vector<TextNode*>::iterator
+ p = mChildren.begin(); p != mChildren.end(); p++
+ )
+ delete *p;
+}
+
+
+// =========================================================================
+// Implementations of ParseTree::MathStateChange/TextStateChange::Apply
+
+
+void MathStateChange::Apply(
+ TexProcessingState& state
+) const
+{
+ static pair<wstring, LayoutTree::Node::Style> styleCommandArray[] =
+ {
+ make_pair(L"\\displaystyle", LayoutTree::Node::cStyleDisplay),
+ make_pair(L"\\textstyle", LayoutTree::Node::cStyleText),
+ make_pair(L"\\scriptstyle", LayoutTree::Node::cStyleScript),
+ make_pair(L"\\scriptscriptstyle", LayoutTree::Node::cStyleScriptScript)
+ };
+ static wishful_hash_map<wstring, LayoutTree::Node::Style>
+ styleCommandTable(
+ styleCommandArray,
+ END_ARRAY(styleCommandArray)
+ );
+
+ wishful_hash_map<wstring, LayoutTree::Node::Style>::const_iterator
+ styleCommand = styleCommandTable.find(mCommand);
+
+ if (styleCommand != styleCommandTable.end())
+ {
+ state.mStyle = styleCommand->second;
+ return;
+ }
+
+ static pair<wstring, TexMathFont::Family> fontCommandArray[] =
+ {
+ make_pair(L"\\rm", TexMathFont::cFamilyRm),
+ make_pair(L"\\bf", TexMathFont::cFamilyBf),
+ make_pair(L"\\it", TexMathFont::cFamilyIt),
+ make_pair(L"\\cal", TexMathFont::cFamilyCal),
+ make_pair(L"\\tt", TexMathFont::cFamilyTt),
+ make_pair(L"\\sf", TexMathFont::cFamilySf)
+ };
+ static wishful_hash_map<wstring, TexMathFont::Family> fontCommandTable(
+ fontCommandArray,
+ END_ARRAY(fontCommandArray)
+ );
+
+ wishful_hash_map<wstring, TexMathFont::Family>::const_iterator
+ fontCommand = fontCommandTable.find(mCommand);
+
+ if (fontCommand != fontCommandTable.end())
+ {
+ state.mMathFont.mFamily = fontCommand->second;
+ return;
+ }
+
+ throw logic_error(
+ "Unexpected command in MathStateChange::Apply"
+ );
+}
+
+
+void TextStateChange::Apply(
+ TexProcessingState& state
+) const
+{
+ static pair<wstring, TexTextFont> textCommandArray[] =
+ { // bold? italic?
+ make_pair(L"\\rm", TexTextFont(TexTextFont::cFamilyRm, false, false)),
+ make_pair(L"\\it", TexTextFont(TexTextFont::cFamilyRm, false, true)),
+ make_pair(L"\\bf", TexTextFont(TexTextFont::cFamilyRm, true, false)),
+ make_pair(L"\\sf", TexTextFont(TexTextFont::cFamilySf, false, false)),
+ make_pair(L"\\tt", TexTextFont(TexTextFont::cFamilyTt, false, false)),
+ };
+ static wishful_hash_map<wstring, TexTextFont> textCommandTable(
+ textCommandArray,
+ END_ARRAY(textCommandArray)
+ );
+
+ wishful_hash_map<wstring, TexTextFont>::iterator
+ textCommand = textCommandTable.find(mCommand);
+
+ if (textCommand == textCommandTable.end())
+ throw logic_error(
+ "Unexpected command in TextStateChange::Apply"
+ );
+
+ state.mTextFont = textCommand->second;
+}
+
+
+void MathColour::Apply(
+ TexProcessingState& state
+) const
+{
+ wishful_hash_map<wstring, RGBColour>::const_iterator
+ colourLookup = gColourTable.find(mColourName);
+
+ if (colourLookup == gColourTable.end())
+ // This shouldn't happen because we checked the colour name during
+ // parsing stage
+ throw logic_error(
+ "Cannot find colour name in MathColour::Apply"
+ );
+
+ state.mColour = colourLookup->second;
+}
+
+
+void TextColour::Apply(
+ TexProcessingState& state
+) const
+{
+ wishful_hash_map<wstring, RGBColour>::const_iterator
+ colourLookup = gColourTable.find(mColourName);
+
+ if (colourLookup == gColourTable.end())
+ // This shouldn't happen because we checked the colour name during
+ // parsing stage
+ throw logic_error(
+ "Cannot find colour name in TextColour::Apply"
+ );
+
+ state.mColour = colourLookup->second;
+}
+
+
+// =========================================================================
+// Implementations of ParseTree::Node::GetPurifiedTex()
+
+
+
+
+void MathSymbol::GetPurifiedTex(
+ wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+) const
+{
+ features.Update(mCommand);
+ os << L" " << mCommand;
+}
+
+
+void MathCommand1Arg::GetPurifiedTex(
+ wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+) const
+{
+ features.Update(mCommand);
+ os << mCommand << L"{";
+ mChild->GetPurifiedTex(os, features, fontEncoding);
+ os << L"}";
+}
+
+
+void MathStateChange::GetPurifiedTex(
+ wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+) const
+{
+ features.Update(mCommand);
+ os << mCommand << L" ";
+}
+
+
+void MathColour::GetPurifiedTex(
+ wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+) const
+{
+ features.mNeedsColor = true;
+ os << L"\\color{" << mColourName << L"}";
+}
+
+
+void MathCommand2Args::GetPurifiedTex(
+ wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+) const
+{
+ features.Update(mCommand);
+ if (mIsInfix)
+ {
+ // e.g. "\over"
+ os << L"{";
+ mChild1->GetPurifiedTex(os, features, fontEncoding);
+ os << L"}" << mCommand << L"{";
+ mChild2->GetPurifiedTex(os, features, fontEncoding);
+ os << L"}";
+ }
+ else
+ {
+ if (mCommand == L"\\rootReserved")
+ {
+ os << L"\\sqrt[{";
+ mChild1->GetPurifiedTex(os, features, fontEncoding);
+ os << L"}]{";
+ mChild2->GetPurifiedTex(os, features, fontEncoding);
+ os << L"}";
+ }
+ else
+ {
+ os << mCommand << L"{";
+ mChild1->GetPurifiedTex(os, features, fontEncoding);
+ os << L"}{";
+ mChild2->GetPurifiedTex(os, features, fontEncoding);
+ os << L"}";
+ }
+ }
+}
+
+
+void MathGroup::GetPurifiedTex(
+ wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+) const
+{
+ os << L"{";
+ mChild->GetPurifiedTex(os, features, fontEncoding);
+ os << L"}";
+}
+
+
+void MathList::GetPurifiedTex(
+ wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+) const
+{
+ for (vector<MathNode*>::const_iterator
+ ptr = mChildren.begin();
+ ptr != mChildren.end();
+ ptr++
+ )
+ (*ptr)->GetPurifiedTex(os, features, fontEncoding);
+}
+
+
+void MathScripts::GetPurifiedTex(
+ wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+) const
+{
+ if (mBase.get())
+ mBase->GetPurifiedTex(os, features, fontEncoding);
+ if (mUpper.get())
+ {
+ os << L"^{";
+ mUpper->GetPurifiedTex(os, features, fontEncoding);
+ os << L"}";
+ }
+ if (mLower.get())
+ {
+ os << L"_{";
+ mLower->GetPurifiedTex(os, features, fontEncoding);
+ os << L"}";
+ }
+}
+
+
+void MathLimits::GetPurifiedTex(
+ wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+) const
+{
+ mChild->GetPurifiedTex(os, features, fontEncoding);
+ os << mCommand;
+}
+
+
+void MathDelimited::GetPurifiedTex(
+ wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+) const
+{
+ features.Update(mLeftDelimiter);
+ features.Update(mRightDelimiter);
+
+ os << L"\\left" << mLeftDelimiter;
+ mChild->GetPurifiedTex(os, features, fontEncoding);
+ os << L"\\right" << mRightDelimiter;
+}
+
+
+void MathBig::GetPurifiedTex(
+ wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+) const
+{
+ features.Update(mCommand);
+ features.Update(mDelimiter);
+
+ os << mCommand << mDelimiter;
+}
+
+
+void MathTableRow::GetPurifiedTex(
+ wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+) const
+{
+ for (vector<MathNode*>::const_iterator
+ ptr = mEntries.begin();
+ ptr != mEntries.end();
+ ptr++
+ )
+ {
+ if (ptr != mEntries.begin())
+ os << L" &";
+ (*ptr)->GetPurifiedTex(os, features, fontEncoding);
+ }
+}
+
+
+void MathTable::GetPurifiedTex(
+ wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+) const
+{
+ for (vector<MathTableRow*>::const_iterator
+ ptr = mRows.begin();
+ ptr != mRows.end();
+ ptr++
+ )
+ {
+ if (ptr != mRows.begin())
+ os << L" \\\\";
+ (*ptr)->GetPurifiedTex(os, features, fontEncoding);
+ }
+}
+
+
+void MathEnvironment::GetPurifiedTex(
+ wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+) const
+{
+ wstring beginCommand, endCommand;
+ if (mIsShort)
+ {
+ beginCommand = L"\\" + mName;
+ features.Update(beginCommand);
+ beginCommand += L"{";
+ endCommand = L"}";
+ }
+ else
+ {
+ beginCommand = L"\\begin{" + mName + L"}";
+ features.Update(beginCommand);
+ endCommand = L"\\end{" + mName + L"}";
+ }
+
+ os << beginCommand;
+ mTable->GetPurifiedTex(os, features, fontEncoding);
+ os << endCommand;
+}
+
+
+void TextList::GetPurifiedTex(
+ wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+) const
+{
+ for (vector<TextNode*>::const_iterator
+ ptr = mChildren.begin();
+ ptr != mChildren.end();
+ ptr++
+ )
+ (*ptr)->GetPurifiedTex(os, features, fontEncoding);
+}
+
+
+void TextGroup::GetPurifiedTex(
+ wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+) const
+{
+ os << L"{";
+ mChild->GetPurifiedTex(os, features, fontEncoding);
+ os << L"}";
+}
+
+
+wstring FormatCodePoint(unsigned code)
+{
+ wostringstream s;
+ s << L"U+" << hex << setfill(L'0') << uppercase << setw(8) << code;
+ return s.str();
+}
+
+wstring FontEncodingName[] =
+{
+ L"default",
+ L"cyrillic",
+ L"japanese"
+};
+
+void TextSymbol::GetPurifiedTex(
+ wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+) const
+{
+ // These are all the non-ASCII unicode characters that we will translate
+ // directly to \unichar without additional font encoding commands.
+ static wchar_t gSimpleUnicodeArray[] =
+ {
+ 161, 163, 167, 169, 172, 174, 176, 181, 182, 191, 192, 193, 194,
+ 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
+ 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221,
+ 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235,
+ 236, 237, 238, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250,
+ 251, 252, 253, 255, 256, 257, 258, 259, 262, 263, 264, 265, 266,
+ 267, 268, 269, 270, 271, 274, 275, 276, 277, 278, 279, 282, 283,
+ 284, 285, 286, 287, 288, 289, 290, 292, 293, 296, 297, 298, 299,
+ 300, 301, 304, 305, 308, 309, 310, 311, 313, 314, 315, 316, 317,
+ 318, 321, 322, 323, 324, 325, 326, 327, 328, 332, 333, 334, 335,
+ 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348,
+ 349, 350, 351, 352, 353, 354, 355, 356, 357, 360, 361, 362, 363,
+ 364, 365, 366, 367, 368, 369, 372, 373, 374, 375, 376, 377, 378,
+ 379, 380, 381, 382, 461, 462, 463, 464, 465, 466, 467, 468, 482,
+ 483, 486, 487, 488, 489, 496, 500, 501, 504, 505, 508, 509, 510,
+ 511, 536, 537, 538, 539, 542, 543, 550, 551, 552, 553, 558, 559,
+ 562, 563
+ };
+
+ static set<wchar_t> gSimpleUnicodeTable(
+ gSimpleUnicodeArray,
+ END_ARRAY(gSimpleUnicodeArray)
+ );
+
+ if (mCommand.size() > 1 || mCommand[0] <= 0x7F)
+ {
+ // Plain ASCII character, or something like \textbackslash or \{.
+ if (mCommand != L" " && fontEncoding != cFontEncodingDefault)
+ throw Exception(
+ L"WrongFontEncoding",
+ mCommand,
+ FontEncodingName[fontEncoding]
+ );
+
+ features.Update(mCommand);
+ os << mCommand;
+ }
+ else
+ {
+ unsigned code = static_cast<unsigned>(mCommand[0]);
+
+ if (gSimpleUnicodeTable.count(code))
+ {
+ if (fontEncoding != cFontEncodingDefault)
+ throw Exception(
+ L"WrongFontEncoding",
+ FormatCodePoint(code),
+ FontEncodingName[fontEncoding]
+ );
+
+ features.mNeedsUcs = true;
+ os << L"\\unichar{" << code << L"}";
+ }
+ // Cyrillic:
+ else if (code >= 0x400 && code <= 0x45F)
+ {
+ if (fontEncoding != cFontEncodingCyrillic)
+ throw Exception(
+ L"WrongFontEncodingWithHint",
+ FormatCodePoint(code),
+ FontEncodingName[fontEncoding],
+ L"\\cyr"
+ );
+
+ features.mNeedsUcs = true;
+ features.mNeedsX2 = true;
+ os << L"\\unichar{" << code << L"}";
+ }
+ // Japanese:
+ // FIX: we're making a very half-hearted attempt to filter out
+ // non-Japanese characters here...
+ else if (
+ (code >= 0x3040 && code <= 0x30FF) // hiragana + katakana
+ || (code >= 0x3400 && code <= 0x9FFF) // some kanji
+ || (code >= 0xF900 && code <= 0xFAE0) // more kanji
+ )
+ {
+ if (fontEncoding != cFontEncodingJapanese)
+ throw Exception(
+ L"WrongFontEncodingWithHint",
+ FormatCodePoint(code),
+ FontEncodingName[fontEncoding],
+ L"\\jap"
+ );
+
+ features.mNeedsCJK = true;
+ features.mNeedsJapaneseFont = true;
+ // FIX: find out if CJK package lets us input via code point
+ // instead of UTF-8
+ os << mCommand[0];
+ }
+ else
+ {
+ throw Exception(
+ L"PngIncompatibleCharacter",
+ FormatCodePoint(code)
+ );
+ }
+ }
+}
+
+
+void TextStateChange::GetPurifiedTex(
+ wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+) const
+{
+ features.Update(mCommand);
+ os << mCommand << L"{}";
+}
+
+
+void TextColour::GetPurifiedTex(
+ wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+) const
+{
+ features.mNeedsColor = true;
+ os << L"\\color{" << mColourName << L"}";
+}
+
+
+// Determines whether the supplied command is a font encoding command
+// (like "\cyr" or "\jap"), modifies fontEncoding accordingly, and throws
+// an exception if nested encodings occur.
+void HandleFontEncodingCommand(
+ const wstring& command,
+ FontEncoding& fontEncoding
+)
+{
+ FontEncoding newEncoding = cFontEncodingDefault;
+
+ if (command == L"\\cyr")
+ newEncoding = cFontEncodingCyrillic;
+ else if (command == L"\\jap")
+ newEncoding = cFontEncodingJapanese;
+
+ if (newEncoding != cFontEncodingDefault)
+ {
+ if (fontEncoding != cFontEncodingDefault)
+ throw Exception(L"IllegalNestedFontEncodings");
+
+ fontEncoding = newEncoding;
+ }
+}
+
+void TextCommand1Arg::GetPurifiedTex(
+ wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+) const
+{
+ features.Update(mCommand);
+ HandleFontEncodingCommand(mCommand, fontEncoding);
+
+ os << mCommand << L"{";
+ mChild->GetPurifiedTex(os, features, fontEncoding);
+ os << L"}";
+}
+
+
+void EnterTextMode::GetPurifiedTex(
+ wostream& os,
+ LatexFeatures& features,
+ FontEncoding fontEncoding
+) const
+{
+ features.Update(mCommand);
+ HandleFontEncodingCommand(mCommand, fontEncoding);
+
+ os << mCommand << L"{";
+ mChild->GetPurifiedTex(os, features, fontEncoding);
+ os << L"}";
+}
+
+// =========================================================================
+// Now all the ParseTree debugging code.
+
+// This function generates the indents used by various debugging Print()
+// functions.
+wstring indent(int depth)
+{
+ return wstring(2 * depth, L' ');
+}
+
+void MathSymbol::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"MathSymbol \"" << mCommand << L"\"" << endl;
+}
+
+void MathCommand1Arg::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"MathCommand1Arg \""
+ << mCommand << L"\"" << endl;
+ mChild->Print(os, depth+1);
+}
+
+void MathCommand2Args::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"MathCommand2Args \""
+ << mCommand << L"\"" << endl;
+ mChild1->Print(os, depth+1);
+ mChild2->Print(os, depth+1);
+}
+
+void MathGroup::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"MathGroup" << endl;
+ mChild->Print(os, depth+1);
+}
+
+void MathList::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"MathList" << endl;
+ for (vector<MathNode*>::const_iterator
+ ptr = mChildren.begin(); ptr != mChildren.end(); ptr++
+ )
+ (*ptr)->Print(os, depth+1);
+}
+
+void MathScripts::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"MathScripts" << endl;
+ if (mBase.get())
+ {
+ os << indent(depth+1) << L"base" << endl;
+ mBase->Print(os, depth+2);
+ }
+ if (mUpper.get())
+ {
+ os << indent(depth+1) << L"upper" << endl;
+ mUpper->Print(os, depth+2);
+ }
+ if (mLower.get())
+ {
+ os << indent(depth+1) << L"lower" << endl;
+ mLower->Print(os, depth+2);
+ }
+}
+
+void MathLimits::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"MathLimits \"" << mCommand << L"\"" << endl;
+ mChild->Print(os, depth+1);
+}
+
+void MathStateChange::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"MathStateChange \""
+ << mCommand << L"\"" << endl;
+}
+
+void MathColour::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"MathColour \""
+ << mColourName << L"\"" << endl;
+}
+
+void MathDelimited::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"MathDelimited \"" << mLeftDelimiter
+ << L"\" \"" << mRightDelimiter << L"\"" << endl;
+ mChild->Print(os, depth+1);
+}
+
+void MathBig::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"MathBig \"" << mCommand << L"\" \""
+ << mDelimiter << L"\"" << endl;
+}
+
+void MathTableRow::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"MathTableRow" << endl;
+ for (vector<MathNode*>::const_iterator
+ ptr = mEntries.begin(); ptr != mEntries.end(); ptr++
+ )
+ (*ptr)->Print(os, depth+1);
+}
+
+void MathTable::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"MathTable" << endl;
+ for (vector<MathTableRow*>::const_iterator
+ ptr = mRows.begin(); ptr != mRows.end(); ptr++
+ )
+ (*ptr)->Print(os, depth+1);
+}
+
+void MathEnvironment::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"MathEnvironment \"" << mName << L"\"";
+ if (mIsShort)
+ os << " (short)";
+ os << endl;
+ mTable->Print(os, depth+1);
+}
+
+void EnterTextMode::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"EnterTextMode \"" << mCommand << L"\"" << endl;
+ mChild->Print(os, depth+1);
+}
+
+void TextList::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"TextList" << endl;
+ for (vector<TextNode*>::const_iterator
+ ptr = mChildren.begin(); ptr != mChildren.end(); ptr++
+ )
+ (*ptr)->Print(os, depth+1);
+}
+
+void TextSymbol::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"TextSymbol \"" << mCommand << L"\"" << endl;
+}
+
+void TextCommand1Arg::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"TextCommand1Arg \""
+ << mCommand << L"\"" << endl;
+ mChild->Print(os, depth+1);
+}
+
+void TextStateChange::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"TextStateChange \""
+ << mCommand << L"\"" << endl;
+}
+
+void TextColour::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"TextColour \""
+ << mColourName << L"\"" << endl;
+}
+
+void TextGroup::Print(wostream& os, int depth) const
+{
+ os << indent(depth) << L"TextGroup" << endl;
+ mChild->Print(os, depth+1);
+}
+
+}
+
+void LatexFeatures::Update(const wstring& command)
+{
+ static wstring gNeedsAmsmathArray[] =
+ {
+ L"\\text",
+ L"\\binom",
+ L"\\cfrac",
+ L"\\begin{matrix}",
+ L"\\begin{pmatrix}",
+ L"\\begin{bmatrix}",
+ L"\\begin{Bmatrix}",
+ L"\\begin{vmatrix}",
+ L"\\begin{Vmatrix}",
+ L"\\begin{cases}",
+ L"\\begin{aligned}",
+ L"\\begin{smallmatrix}",
+ L"\\overleftrightarrow",
+ L"\\boldsymbol",
+ L"\\And",
+ L"\\iint",
+ L"\\iiint",
+ L"\\iiiint",
+ L"\\varlimsup",
+ L"\\varliminf",
+ L"\\varinjlim",
+ L"\\varprojlim",
+ L"\\injlim",
+ L"\\projlim",
+ L"\\dotsb",
+ L"\\operatorname",
+ L"\\operatornamewithlimits",
+ L"\\lvert",
+ L"\\rvert",
+ L"\\lVert",
+ L"\\rVert",
+ L"\\substack",
+ L"\\overset",
+ L"\\underset",
+ L"\\mod",
+
+ // The following commands are all defined in regular latex, but
+ // amsmath redefines them to have slightly different properties:
+ //
+ // * The text commands are modified so that the font size does not
+ // change if they are used inside a formula.
+ // * The "\dots" command adjusts the height of the dots depending
+ // on the surrounding symbols.
+ // * The "\colon" command gets some spacing adjustments.
+ //
+ // Therefore for consistency we include amsmath when these commands
+ // appear.
+ //
+ // (FIX: there are probably others that need to be here that I
+ // haven't put here yet.)
+ L"\\emph",
+ L"\\textit",
+ L"\\textbf",
+ L"\\textrm",
+ L"\\texttt",
+ L"\\textsf",
+ L"\\dots",
+ L"\\dotsb",
+ L"\\colon"
+ };
+
+ static wishful_hash_set<wstring> gNeedsAmsmathTable(
+ gNeedsAmsmathArray,
+ END_ARRAY(gNeedsAmsmathArray)
+ );
+
+
+ static wstring gNeedsAmssymbArray[] =
+ {
+ L"\\varkappa",
+ L"\\digamma",
+ L"\\beth",
+ L"\\gimel",
+ L"\\daleth",
+ L"\\Finv",
+ L"\\Game",
+ L"\\upharpoonright",
+ L"\\upharpoonleft",
+ L"\\downharpoonright",
+ L"\\downharpoonleft",
+ L"\\nleftarrow",
+ L"\\nrightarrow",
+ L"\\sqsupset",
+ L"\\sqsubset",
+ L"\\supsetneq",
+ L"\\subsetneq",
+ L"\\Vdash",
+ L"\\vDash",
+ L"\\lesssim",
+ L"\\nless",
+ L"\\ngeq",
+ L"\\nleq",
+ L"\\smallsmile",
+ L"\\smallfrown",
+ L"\\smallsetminus",
+ L"\\varnothing",
+ L"\\nmid",
+ L"\\square",
+ L"\\Box",
+ L"\\checkmark",
+ L"\\complement",
+ L"\\eth",
+ L"\\hslash",
+ L"\\mho",
+ L"\\circledR",
+ L"\\yen",
+ L"\\maltese",
+ L"\\ulcorner",
+ L"\\urcorner",
+ L"\\llcorner",
+ L"\\lrcorner",
+ L"\\dashrightarrow",
+ L"\\dasharrow",
+ L"\\dashleftarrow",
+ L"\\backprime",
+ L"\\vartriangle",
+ L"\\blacktriangle",
+ L"\\triangledown",
+ L"\\blacktriangledown",
+ L"\\blacksquare",
+ L"\\lozenge",
+ L"\\blacklozenge",
+ L"\\circledS",
+ L"\\bigstar",
+ L"\\sphericalangle",
+ L"\\measuredangle",
+ L"\\diagup",
+ L"\\diagdown",
+ L"\\Bbbk",
+ L"\\dotplus",
+ L"\\ltimes",
+ L"\\rtimes",
+ L"\\Cap",
+ L"\\leftthreetimes",
+ L"\\rightthreetimes",
+ L"\\Cup",
+ L"\\barwedge",
+ L"\\curlywedge",
+ L"\\veebar",
+ L"\\curlyvee",
+ L"\\doublebarwedge",
+ L"\\boxminus",
+ L"\\circleddash",
+ L"\\boxtimes",
+ L"\\circledast",
+ L"\\boxdot",
+ L"\\circledcirc",
+ L"\\boxplus",
+ L"\\centerdot",
+ L"\\divideontimes",
+ L"\\intercal",
+ L"\\leqq",
+ L"\\geqq",
+ L"\\leqslant",
+ L"\\geqslant",
+ L"\\eqslantless",
+ L"\\eqslantgtr",
+ L"\\gtrsim",
+ L"\\lessapprox",
+ L"\\gtrapprox",
+ L"\\approxeq",
+ L"\\eqsim",
+ L"\\lessdot",
+ L"\\gtrdot",
+ L"\\lll",
+ L"\\ggg",
+ L"\\lessgtr",
+ L"\\gtrless",
+ L"\\lesseqgtr",
+ L"\\gtreqless",
+ L"\\lesseqqgtr",
+ L"\\gtreqqless",
+ L"\\doteqdot",
+ L"\\eqcirc",
+ L"\\risingdotseq",
+ L"\\circeq",
+ L"\\fallingdotseq",
+ L"\\triangleq",
+ L"\\backsim",
+ L"\\thicksim",
+ L"\\backsimeq",
+ L"\\thickapprox",
+ L"\\subseteqq",
+ L"\\supseteqq",
+ L"\\Subset",
+ L"\\Supset",
+ L"\\preccurlyeq",
+ L"\\succcurlyeq",
+ L"\\curlyeqprec",
+ L"\\curlyeqsucc",
+ L"\\precsim",
+ L"\\succsim",
+ L"\\precapprox",
+ L"\\succapprox",
+ L"\\vartriangleleft",
+ L"\\vartriangleright",
+ L"\\Vvdash",
+ L"\\shortmid",
+ L"\\shortparallel",
+ L"\\bumpeq",
+ L"\\between",
+ L"\\Bumpeq",
+ L"\\varpropto",
+ L"\\backepsilon",
+ L"\\blacktriangleleft",
+ L"\\blacktriangleright",
+ L"\\therefore",
+ L"\\because",
+ L"\\ngtr",
+ L"\\nleqslant",
+ L"\\ngeqslant",
+ L"\\nleqq",
+ L"\\ngeqq",
+ L"\\lneqq",
+ L"\\gneqq",
+ L"\\lvertneqq",
+ L"\\gvertneqq",
+ L"\\lnsim",
+ L"\\gnsim",
+ L"\\lnapprox",
+ L"\\gnapprox",
+ L"\\nprec",
+ L"\\nsucc",
+ L"\\npreceq",
+ L"\\nsucceq",
+ L"\\precneqq",
+ L"\\succneqq",
+ L"\\precnsim",
+ L"\\succnsim",
+ L"\\precnapprox",
+ L"\\succnapprox",
+ L"\\nsim",
+ L"\\ncong",
+ L"\\nshortmid",
+ L"\\nshortparallel",
+ L"\\nmid",
+ L"\\nparallel",
+ L"\\nvdash",
+ L"\\nvDash",
+ L"\\nVdash",
+ L"\\nVDash",
+ L"\\ntriangleleft",
+ L"\\ntriangleright",
+ L"\\ntrianglelefteq",
+ L"\\ntrianglerighteq",
+ L"\\nsubseteq",
+ L"\\nsupseteq",
+ L"\\nsubseteqq",
+ L"\\nsupseteqq",
+ L"\\subsetneq",
+ L"\\supsetneq",
+ L"\\varsubsetneq",
+ L"\\varsupsetneq",
+ L"\\subsetneqq",
+ L"\\supsetneqq",
+ L"\\varsubsetneqq",
+ L"\\varsupsetneqq",
+ L"\\leftleftarrows",
+ L"\\rightrightarrows",
+ L"\\leftrightarrows",
+ L"\\rightleftarrows",
+ L"\\Lleftarrow",
+ L"\\Rrightarrow",
+ L"\\twoheadleftarrow",
+ L"\\twoheadrightarrow",
+ L"\\leftarrowtail",
+ L"\\rightarrowtail",
+ L"\\looparrowleft",
+ L"\\looparrowright",
+ L"\\leftrightharpoons",
+ L"\\rightleftharpoons",
+ L"\\curvearrowleft",
+ L"\\curvearrowright",
+ L"\\circlearrowleft",
+ L"\\circlearrowright",
+ L"\\Lsh",
+ L"\\Rsh",
+ L"\\upuparrows",
+ L"\\downdownarrows",
+ L"\\multimap",
+ L"\\rightsquigarrow",
+ L"\\leftrightsquigarrow",
+ L"\\nLeftarrow",
+ L"\\nRightarrow",
+ L"\\nleftrightarrow",
+ L"\\nLeftrightarrow",
+ L"\\pitchfork",
+ L"\\nexists",
+ L"\\lhd",
+ L"\\rhd",
+ L"\\unlhd",
+ L"\\unrhd",
+ L"\\Join",
+ L"\\leadsto"
+ };
+
+ static wishful_hash_set<wstring> gNeedsAmssymbTable(
+ gNeedsAmssymbArray,
+ END_ARRAY(gNeedsAmssymbArray)
+ );
+
+ // Note: there might be other commands which imply loading packages
+ // which are handled elsewhere (e.g. \color)
+
+ if (command[0] == L'\\')
+ {
+ if (command == L"\\cyr")
+ mNeedsX2 = mNeedsUcs = true;
+ if (command == L"\\jap")
+ mNeedsCJK = mNeedsJapaneseFont = true;
+
+ if (!mNeedsAmsfonts &&
+ (command == L"\\mathbb" || command == L"\\mathfrak")
+ )
+ mNeedsAmsfonts = true;
+
+ if (!mNeedsAmsmath && gNeedsAmsmathTable.count(command))
+ mNeedsAmsmath = true;
+
+ if (!mNeedsAmssymb && gNeedsAmssymbTable.count(command))
+ mNeedsAmssymb = true;
+ }
+}
+
+
+}
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/BlahtexCore/Parser.cpp b/blahtexml/source/BlahtexCore/Parser.cpp
new file mode 100644
index 0000000..0458235
--- /dev/null
+++ b/blahtexml/source/BlahtexCore/Parser.cpp
@@ -0,0 +1,1643 @@
+// File "Parser.cpp"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// blahtexml (version 0.5)
+// Copyright (C) 2008, Gilles Van Assche
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include <stdexcept>
+#include "InputSymbolTranslation.h"
+#include "Parser.h"
+
+using namespace std;
+
+namespace blahtex {
+
+// Imported from ParseTree1.cpp:
+extern wishful_hash_map<wstring, wstring> gDelimiterTable;
+extern wishful_hash_map<wstring, RGBColour> gColourTable;
+
+// These tables contain all the commands that blahtex recognises in math
+// mode (respectively text mode). They provide the token codes for the
+// parser to do its job.
+
+pair<wstring, Parser::TokenCode> gMathTokenArray[] =
+{
+ make_pair(L"", Parser::cEndOfInput),
+ make_pair(L" ", Parser::cWhitespace),
+ make_pair(L"\\newcommand", Parser::cNewcommand),
+
+ make_pair(L"$", Parser::cIllegal),
+ make_pair(L"%", Parser::cIllegal),
+ make_pair(L"#", Parser::cIllegal),
+ make_pair(L"`", Parser::cIllegal),
+ make_pair(L"\"", Parser::cIllegal),
+
+ make_pair(L"{", Parser::cBeginGroup),
+ make_pair(L"}", Parser::cEndGroup),
+
+ make_pair(L"&", Parser::cNextCell),
+ make_pair(L"\\\\", Parser::cNextRow),
+
+ make_pair(L"^", Parser::cSuperscript),
+ make_pair(L"_", Parser::cSubscript),
+ make_pair(L"'", Parser::cPrime),
+
+ make_pair(L"\\hbox", Parser::cEnterTextMode),
+ make_pair(L"\\mbox", Parser::cEnterTextMode),
+ make_pair(L"\\emph", Parser::cEnterTextMode),
+ make_pair(L"\\text", Parser::cEnterTextMode),
+ make_pair(L"\\textit", Parser::cEnterTextMode),
+ make_pair(L"\\textbf", Parser::cEnterTextMode),
+ make_pair(L"\\textrm", Parser::cEnterTextMode),
+ make_pair(L"\\texttt", Parser::cEnterTextMode),
+ make_pair(L"\\textsf", Parser::cEnterTextMode),
+
+ make_pair(L"\\cyr", Parser::cEnterTextMode),
+ make_pair(L"\\jap", Parser::cEnterTextMode),
+
+ make_pair(L"\\sqrt", Parser::cCommand1Arg),
+ make_pair(L"\\pmod", Parser::cCommand1Arg),
+ make_pair(L"\\operatorname", Parser::cCommand1Arg),
+ make_pair(L"\\operatornamewithlimits", Parser::cCommand1Arg),
+
+ // "\rootReserved" is the *only* "Reserved" command the parser
+ // has to worry about.
+ make_pair(L"\\rootReserved", Parser::cCommand2Args),
+
+ make_pair(L"\\binom", Parser::cCommand2Args),
+ make_pair(L"\\frac", Parser::cCommand2Args),
+ make_pair(L"\\cfrac", Parser::cCommand2Args),
+ make_pair(L"\\overset", Parser::cCommand2Args),
+ make_pair(L"\\underset", Parser::cCommand2Args),
+
+ make_pair(L"\\over", Parser::cCommandInfix),
+ make_pair(L"\\choose", Parser::cCommandInfix),
+ make_pair(L"\\atop", Parser::cCommandInfix),
+
+ make_pair(L"\\left", Parser::cLeft),
+ make_pair(L"\\right", Parser::cRight),
+
+ make_pair(L"\\big", Parser::cBig),
+ make_pair(L"\\bigl", Parser::cBig),
+ make_pair(L"\\bigr", Parser::cBig),
+ make_pair(L"\\Big", Parser::cBig),
+ make_pair(L"\\Bigl", Parser::cBig),
+ make_pair(L"\\Bigr", Parser::cBig),
+ make_pair(L"\\bigg", Parser::cBig),
+ make_pair(L"\\biggl", Parser::cBig),
+ make_pair(L"\\biggr", Parser::cBig),
+ make_pair(L"\\Bigg", Parser::cBig),
+ make_pair(L"\\Biggl", Parser::cBig),
+ make_pair(L"\\Biggr", Parser::cBig),
+
+ make_pair(L"\\mathop", Parser::cCommand1Arg),
+ make_pair(L"\\mathrel", Parser::cCommand1Arg),
+ make_pair(L"\\mathord", Parser::cCommand1Arg),
+ make_pair(L"\\mathbin", Parser::cCommand1Arg),
+ make_pair(L"\\mathopen", Parser::cCommand1Arg),
+ make_pair(L"\\mathclose", Parser::cCommand1Arg),
+ make_pair(L"\\mathpunct", Parser::cCommand1Arg),
+ make_pair(L"\\mathinner", Parser::cCommand1Arg),
+
+ make_pair(L"\\limits", Parser::cLimits),
+ make_pair(L"\\nolimits", Parser::cLimits),
+ make_pair(L"\\displaylimits", Parser::cLimits),
+
+ make_pair(L"\\_", Parser::cSymbol),
+ make_pair(L"\\&", Parser::cSymbol),
+ make_pair(L"\\$", Parser::cSymbol),
+ make_pair(L"\\#", Parser::cSymbol),
+ make_pair(L"\\%", Parser::cSymbol),
+ make_pair(L"\\{", Parser::cSymbol),
+ make_pair(L"\\}", Parser::cSymbol),
+
+ make_pair(L"\\mod", Parser::cSymbolUnsafe),
+ make_pair(L"\\bmod", Parser::cSymbolUnsafe),
+
+ make_pair(L"\\substack", Parser::cShortEnvironment),
+
+ make_pair(L"\\begin{matrix}", Parser::cBeginEnvironment),
+ make_pair(L"\\begin{pmatrix}", Parser::cBeginEnvironment),
+ make_pair(L"\\begin{bmatrix}", Parser::cBeginEnvironment),
+ make_pair(L"\\begin{Bmatrix}", Parser::cBeginEnvironment),
+ make_pair(L"\\begin{vmatrix}", Parser::cBeginEnvironment),
+ make_pair(L"\\begin{Vmatrix}", Parser::cBeginEnvironment),
+ make_pair(L"\\begin{cases}", Parser::cBeginEnvironment),
+ make_pair(L"\\begin{aligned}", Parser::cBeginEnvironment),
+ make_pair(L"\\begin{smallmatrix}", Parser::cBeginEnvironment),
+
+ make_pair(L"\\end{matrix}", Parser::cEndEnvironment),
+ make_pair(L"\\end{pmatrix}", Parser::cEndEnvironment),
+ make_pair(L"\\end{bmatrix}", Parser::cEndEnvironment),
+ make_pair(L"\\end{Bmatrix}", Parser::cEndEnvironment),
+ make_pair(L"\\end{vmatrix}", Parser::cEndEnvironment),
+ make_pair(L"\\end{Vmatrix}", Parser::cEndEnvironment),
+ make_pair(L"\\end{cases}", Parser::cEndEnvironment),
+ make_pair(L"\\end{aligned}", Parser::cEndEnvironment),
+ make_pair(L"\\end{smallmatrix}", Parser::cEndEnvironment),
+
+ make_pair(L"~", Parser::cSymbolUnsafe),
+ make_pair(L"\\,", Parser::cSymbolUnsafe),
+ make_pair(L"\\!", Parser::cSymbolUnsafe),
+ make_pair(L"\\ ", Parser::cSymbolUnsafe),
+ make_pair(L"\\;", Parser::cSymbolUnsafe),
+ make_pair(L"\\>", Parser::cSymbolUnsafe),
+ make_pair(L"\\quad", Parser::cSymbolUnsafe),
+ make_pair(L"\\qquad", Parser::cSymbolUnsafe),
+
+ make_pair(L"\\not", Parser::cSymbol),
+
+ make_pair(L"(", Parser::cSymbol),
+ make_pair(L")", Parser::cSymbol),
+ make_pair(L"[", Parser::cSymbol),
+ make_pair(L"]", Parser::cSymbol),
+ make_pair(L"<", Parser::cSymbol),
+ make_pair(L">", Parser::cSymbol),
+ make_pair(L"+", Parser::cSymbol),
+ make_pair(L"-", Parser::cSymbol),
+ make_pair(L"=", Parser::cSymbol),
+ make_pair(L"|", Parser::cSymbol),
+ make_pair(L";", Parser::cSymbol),
+ make_pair(L":", Parser::cSymbol),
+ make_pair(L",", Parser::cSymbol),
+ make_pair(L".", Parser::cSymbol),
+ make_pair(L"/", Parser::cSymbol),
+ make_pair(L"?", Parser::cSymbol),
+ make_pair(L"!", Parser::cSymbol),
+ make_pair(L"@", Parser::cSymbol),
+ make_pair(L"*", Parser::cSymbol),
+
+ make_pair(L"\\vert", Parser::cSymbol),
+ make_pair(L"\\lvert", Parser::cSymbol),
+ make_pair(L"\\rvert", Parser::cSymbol),
+ make_pair(L"\\Vert", Parser::cSymbol),
+ make_pair(L"\\lVert", Parser::cSymbol),
+ make_pair(L"\\rVert", Parser::cSymbol),
+ make_pair(L"\\lfloor", Parser::cSymbol),
+ make_pair(L"\\rfloor", Parser::cSymbol),
+ make_pair(L"\\lceil", Parser::cSymbol),
+ make_pair(L"\\rceil", Parser::cSymbol),
+ make_pair(L"\\lbrace", Parser::cSymbol),
+ make_pair(L"\\rbrace", Parser::cSymbol),
+ make_pair(L"\\langle", Parser::cSymbol),
+ make_pair(L"\\rangle", Parser::cSymbol),
+ make_pair(L"\\lbrack", Parser::cSymbol),
+ make_pair(L"\\rbrack", Parser::cSymbol),
+
+ make_pair(L"\\hat", Parser::cCommand1Arg),
+ make_pair(L"\\widehat", Parser::cCommand1Arg),
+ make_pair(L"\\dot", Parser::cCommand1Arg),
+ make_pair(L"\\ddot", Parser::cCommand1Arg),
+ make_pair(L"\\bar", Parser::cCommand1Arg),
+ make_pair(L"\\overline", Parser::cCommand1Arg),
+ make_pair(L"\\underline", Parser::cCommand1Arg),
+ make_pair(L"\\overbrace", Parser::cCommand1Arg),
+ make_pair(L"\\underbrace", Parser::cCommand1Arg),
+ make_pair(L"\\overleftarrow", Parser::cCommand1Arg),
+ make_pair(L"\\overrightarrow", Parser::cCommand1Arg),
+ make_pair(L"\\overleftrightarrow", Parser::cCommand1Arg),
+ make_pair(L"\\check", Parser::cCommand1Arg),
+ make_pair(L"\\acute", Parser::cCommand1Arg),
+ make_pair(L"\\grave", Parser::cCommand1Arg),
+ make_pair(L"\\vec", Parser::cCommand1Arg),
+ make_pair(L"\\breve", Parser::cCommand1Arg),
+ make_pair(L"\\tilde", Parser::cCommand1Arg),
+ make_pair(L"\\widetilde", Parser::cCommand1Arg),
+
+ make_pair(L"\\mathbf", Parser::cCommand1Arg),
+ make_pair(L"\\mathbb", Parser::cCommand1Arg),
+ make_pair(L"\\mathrm", Parser::cCommand1Arg),
+ make_pair(L"\\mathit", Parser::cCommand1Arg),
+ make_pair(L"\\mathcal", Parser::cCommand1Arg),
+ make_pair(L"\\mathfrak", Parser::cCommand1Arg),
+ make_pair(L"\\mathsf", Parser::cCommand1Arg),
+ make_pair(L"\\mathtt", Parser::cCommand1Arg),
+ make_pair(L"\\boldsymbol", Parser::cCommand1Arg),
+
+ make_pair(L"\\rm", Parser::cStateChange),
+ make_pair(L"\\bf", Parser::cStateChange),
+ make_pair(L"\\it", Parser::cStateChange),
+ make_pair(L"\\cal", Parser::cStateChange),
+ make_pair(L"\\tt", Parser::cStateChange),
+ make_pair(L"\\sf", Parser::cStateChange),
+
+ make_pair(L"\\displaystyle", Parser::cStateChange),
+ make_pair(L"\\textstyle", Parser::cStateChange),
+ make_pair(L"\\scriptstyle", Parser::cStateChange),
+ make_pair(L"\\scriptscriptstyle", Parser::cStateChange),
+
+ make_pair(L"\\color", Parser::cStateChange),
+
+ make_pair(L"\\varlimsup", Parser::cSymbolUnsafe),
+ make_pair(L"\\varliminf", Parser::cSymbolUnsafe),
+ make_pair(L"\\lim", Parser::cSymbolUnsafe),
+ make_pair(L"\\sup", Parser::cSymbolUnsafe),
+ make_pair(L"\\inf", Parser::cSymbolUnsafe),
+ make_pair(L"\\limsup", Parser::cSymbolUnsafe),
+ make_pair(L"\\liminf", Parser::cSymbolUnsafe),
+ make_pair(L"\\injlim", Parser::cSymbolUnsafe),
+ make_pair(L"\\projlim", Parser::cSymbolUnsafe),
+ make_pair(L"\\min", Parser::cSymbolUnsafe),
+ make_pair(L"\\max", Parser::cSymbolUnsafe),
+ make_pair(L"\\gcd", Parser::cSymbolUnsafe),
+ make_pair(L"\\det", Parser::cSymbolUnsafe),
+ make_pair(L"\\Pr", Parser::cSymbolUnsafe),
+
+ make_pair(L"\\ker", Parser::cSymbolUnsafe),
+ make_pair(L"\\hom", Parser::cSymbolUnsafe),
+ make_pair(L"\\dim", Parser::cSymbolUnsafe),
+ make_pair(L"\\arg", Parser::cSymbolUnsafe),
+ make_pair(L"\\sin", Parser::cSymbolUnsafe),
+ make_pair(L"\\cos", Parser::cSymbolUnsafe),
+ make_pair(L"\\sec", Parser::cSymbolUnsafe),
+ make_pair(L"\\csc", Parser::cSymbolUnsafe),
+ make_pair(L"\\tan", Parser::cSymbolUnsafe),
+ make_pair(L"\\cot", Parser::cSymbolUnsafe),
+ make_pair(L"\\arcsin", Parser::cSymbolUnsafe),
+ make_pair(L"\\arccos", Parser::cSymbolUnsafe),
+ make_pair(L"\\arctan", Parser::cSymbolUnsafe),
+ make_pair(L"\\sinh", Parser::cSymbolUnsafe),
+ make_pair(L"\\cosh", Parser::cSymbolUnsafe),
+ make_pair(L"\\tanh", Parser::cSymbolUnsafe),
+ make_pair(L"\\coth", Parser::cSymbolUnsafe),
+ make_pair(L"\\log", Parser::cSymbolUnsafe),
+ make_pair(L"\\lg", Parser::cSymbolUnsafe),
+ make_pair(L"\\ln", Parser::cSymbolUnsafe),
+ make_pair(L"\\exp", Parser::cSymbolUnsafe),
+ make_pair(L"\\deg", Parser::cSymbolUnsafe),
+
+ make_pair(L"\\alpha", Parser::cSymbol),
+ make_pair(L"\\beta", Parser::cSymbol),
+ make_pair(L"\\gamma", Parser::cSymbol),
+ make_pair(L"\\delta", Parser::cSymbol),
+ make_pair(L"\\epsilon", Parser::cSymbol),
+ make_pair(L"\\varepsilon", Parser::cSymbol),
+ make_pair(L"\\zeta", Parser::cSymbol),
+ make_pair(L"\\eta", Parser::cSymbol),
+ make_pair(L"\\theta", Parser::cSymbol),
+ make_pair(L"\\vartheta", Parser::cSymbol),
+ make_pair(L"\\iota", Parser::cSymbol),
+ make_pair(L"\\kappa", Parser::cSymbol),
+ make_pair(L"\\varkappa", Parser::cSymbol),
+ make_pair(L"\\lambda", Parser::cSymbol),
+ make_pair(L"\\mu", Parser::cSymbol),
+ make_pair(L"\\nu", Parser::cSymbol),
+ make_pair(L"\\pi", Parser::cSymbol),
+ make_pair(L"\\varpi", Parser::cSymbol),
+ make_pair(L"\\rho", Parser::cSymbol),
+ make_pair(L"\\varrho", Parser::cSymbol),
+ make_pair(L"\\sigma", Parser::cSymbol),
+ make_pair(L"\\varsigma", Parser::cSymbol),
+ make_pair(L"\\tau", Parser::cSymbol),
+ make_pair(L"\\upsilon", Parser::cSymbol),
+ make_pair(L"\\phi", Parser::cSymbol),
+ make_pair(L"\\varphi", Parser::cSymbol),
+ make_pair(L"\\chi", Parser::cSymbol),
+ make_pair(L"\\psi", Parser::cSymbol),
+ make_pair(L"\\omega", Parser::cSymbol),
+ make_pair(L"\\xi", Parser::cSymbol),
+ make_pair(L"\\digamma", Parser::cSymbol),
+
+ make_pair(L"\\Gamma", Parser::cSymbol),
+ make_pair(L"\\Delta", Parser::cSymbol),
+ make_pair(L"\\Theta", Parser::cSymbol),
+ make_pair(L"\\Lambda", Parser::cSymbol),
+ make_pair(L"\\Pi", Parser::cSymbol),
+ make_pair(L"\\Sigma", Parser::cSymbol),
+ make_pair(L"\\Upsilon", Parser::cSymbol),
+ make_pair(L"\\Phi", Parser::cSymbol),
+ make_pair(L"\\Psi", Parser::cSymbol),
+ make_pair(L"\\Omega", Parser::cSymbol),
+ make_pair(L"\\Xi", Parser::cSymbol),
+
+ make_pair(L"\\aleph", Parser::cSymbol),
+ make_pair(L"\\beth", Parser::cSymbol),
+ make_pair(L"\\gimel", Parser::cSymbol),
+ make_pair(L"\\daleth", Parser::cSymbol),
+
+ make_pair(L"\\wp", Parser::cSymbol),
+ make_pair(L"\\ell", Parser::cSymbol),
+ make_pair(L"\\P", Parser::cSymbol),
+ make_pair(L"\\imath", Parser::cSymbol),
+ make_pair(L"\\forall", Parser::cSymbol),
+ make_pair(L"\\exists", Parser::cSymbol),
+ make_pair(L"\\Finv", Parser::cSymbol),
+ make_pair(L"\\Game", Parser::cSymbol),
+ make_pair(L"\\partial", Parser::cSymbol),
+ make_pair(L"\\Re", Parser::cSymbol),
+ make_pair(L"\\Im", Parser::cSymbol),
+
+ make_pair(L"\\leftarrow", Parser::cSymbol),
+ make_pair(L"\\rightarrow", Parser::cSymbol),
+ make_pair(L"\\longleftarrow", Parser::cSymbolUnsafe),
+ make_pair(L"\\longrightarrow", Parser::cSymbolUnsafe),
+ make_pair(L"\\Leftarrow", Parser::cSymbol),
+ make_pair(L"\\Rightarrow", Parser::cSymbol),
+ make_pair(L"\\Longleftarrow", Parser::cSymbolUnsafe),
+ make_pair(L"\\Longrightarrow", Parser::cSymbolUnsafe),
+ make_pair(L"\\mapsto", Parser::cSymbolUnsafe),
+ make_pair(L"\\longmapsto", Parser::cSymbolUnsafe),
+ make_pair(L"\\leftrightarrow", Parser::cSymbol),
+ make_pair(L"\\Leftrightarrow", Parser::cSymbol),
+ make_pair(L"\\longleftrightarrow", Parser::cSymbolUnsafe),
+ make_pair(L"\\Longleftrightarrow", Parser::cSymbolUnsafe),
+
+ make_pair(L"\\uparrow", Parser::cSymbol),
+ make_pair(L"\\Uparrow", Parser::cSymbol),
+ make_pair(L"\\downarrow", Parser::cSymbol),
+ make_pair(L"\\Downarrow", Parser::cSymbol),
+ make_pair(L"\\updownarrow", Parser::cSymbol),
+ make_pair(L"\\Updownarrow", Parser::cSymbol),
+
+ make_pair(L"\\searrow", Parser::cSymbol),
+ make_pair(L"\\nearrow", Parser::cSymbol),
+ make_pair(L"\\swarrow", Parser::cSymbol),
+ make_pair(L"\\nwarrow", Parser::cSymbol),
+
+ make_pair(L"\\hookrightarrow", Parser::cSymbolUnsafe),
+ make_pair(L"\\hookleftarrow", Parser::cSymbolUnsafe),
+ make_pair(L"\\upharpoonright", Parser::cSymbol),
+ make_pair(L"\\upharpoonleft", Parser::cSymbol),
+ make_pair(L"\\downharpoonright", Parser::cSymbol),
+ make_pair(L"\\downharpoonleft", Parser::cSymbol),
+ make_pair(L"\\rightharpoonup", Parser::cSymbol),
+ make_pair(L"\\rightharpoondown", Parser::cSymbol),
+ make_pair(L"\\leftharpoonup", Parser::cSymbol),
+ make_pair(L"\\leftharpoondown", Parser::cSymbol),
+
+ make_pair(L"\\nleftarrow", Parser::cSymbol),
+ make_pair(L"\\nrightarrow", Parser::cSymbol),
+
+ make_pair(L"\\supset", Parser::cSymbol),
+ make_pair(L"\\subset", Parser::cSymbol),
+ make_pair(L"\\supseteq", Parser::cSymbol),
+ make_pair(L"\\subseteq", Parser::cSymbol),
+ make_pair(L"\\sqsupset", Parser::cSymbol),
+ make_pair(L"\\sqsubset", Parser::cSymbol),
+ make_pair(L"\\sqsupseteq", Parser::cSymbol),
+ make_pair(L"\\sqsubseteq", Parser::cSymbol),
+ make_pair(L"\\supsetneq", Parser::cSymbol),
+ make_pair(L"\\subsetneq", Parser::cSymbol),
+
+ make_pair(L"\\in", Parser::cSymbol),
+ make_pair(L"\\ni", Parser::cSymbol),
+ make_pair(L"\\notin", Parser::cSymbolUnsafe),
+
+ make_pair(L"\\iff", Parser::cSymbolUnsafe),
+
+ make_pair(L"\\mid", Parser::cSymbol),
+ make_pair(L"\\sim", Parser::cSymbol),
+ make_pair(L"\\simeq", Parser::cSymbol),
+ make_pair(L"\\approx", Parser::cSymbol),
+ make_pair(L"\\propto", Parser::cSymbol),
+ make_pair(L"\\equiv", Parser::cSymbol),
+ make_pair(L"\\cong", Parser::cSymbolUnsafe),
+ make_pair(L"\\neq", Parser::cSymbolUnsafe),
+
+ make_pair(L"\\ll", Parser::cSymbol),
+ make_pair(L"\\gg", Parser::cSymbol),
+ make_pair(L"\\geq", Parser::cSymbol),
+ make_pair(L"\\leq", Parser::cSymbol),
+ make_pair(L"\\triangleleft", Parser::cSymbol),
+ make_pair(L"\\triangleright", Parser::cSymbol),
+
+ make_pair(L"\\models", Parser::cSymbolUnsafe),
+ make_pair(L"\\vdash", Parser::cSymbol),
+ make_pair(L"\\Vdash", Parser::cSymbol),
+ make_pair(L"\\vDash", Parser::cSymbol),
+
+ make_pair(L"\\lesssim", Parser::cSymbol),
+ make_pair(L"\\nless", Parser::cSymbol),
+ make_pair(L"\\ngeq", Parser::cSymbol),
+ make_pair(L"\\nleq", Parser::cSymbol),
+
+ make_pair(L"\\ast", Parser::cSymbol),
+ make_pair(L"\\times", Parser::cSymbol),
+ make_pair(L"\\div", Parser::cSymbol),
+ make_pair(L"\\wedge", Parser::cSymbol),
+ make_pair(L"\\vee", Parser::cSymbol),
+ make_pair(L"\\oplus", Parser::cSymbol),
+ make_pair(L"\\otimes", Parser::cSymbol),
+ make_pair(L"\\cap", Parser::cSymbol),
+ make_pair(L"\\cup", Parser::cSymbol),
+ make_pair(L"\\sqcap", Parser::cSymbol),
+ make_pair(L"\\sqcup", Parser::cSymbol),
+ make_pair(L"\\smile", Parser::cSymbol),
+ make_pair(L"\\frown", Parser::cSymbol),
+ make_pair(L"\\smallsmile", Parser::cSymbol),
+ make_pair(L"\\smallfrown", Parser::cSymbol),
+
+ make_pair(L"\\setminus", Parser::cSymbol),
+ make_pair(L"\\smallsetminus", Parser::cSymbol),
+
+ make_pair(L"\\And", Parser::cSymbolUnsafe),
+
+ // This next group of "large operators" are all "safe" in ordinary
+ // TeX, but NOT when AMS-LaTeX is loaded, so we'll call them unsafe
+ // (just to be safe :-)
+
+ // FIX: we still don't size large operators correctly with style
+ // changes. TeX only uses TWO sizes for its "large operators".
+ // textstyle and below should all be the same size.
+ // Although, amsmath does things differently: investigate that.
+
+ make_pair(L"\\sum", Parser::cSymbolUnsafe),
+ make_pair(L"\\prod", Parser::cSymbolUnsafe),
+ make_pair(L"\\int", Parser::cSymbolUnsafe),
+ make_pair(L"\\iint", Parser::cSymbolUnsafe),
+ make_pair(L"\\iiint", Parser::cSymbolUnsafe),
+ make_pair(L"\\iiiint", Parser::cSymbolUnsafe),
+ make_pair(L"\\oint", Parser::cSymbolUnsafe),
+ make_pair(L"\\bigcap", Parser::cSymbolUnsafe),
+ make_pair(L"\\bigodot", Parser::cSymbolUnsafe),
+ make_pair(L"\\bigcup", Parser::cSymbolUnsafe),
+ make_pair(L"\\bigotimes", Parser::cSymbolUnsafe),
+ make_pair(L"\\coprod", Parser::cSymbolUnsafe),
+ make_pair(L"\\bigsqcup", Parser::cSymbolUnsafe),
+ make_pair(L"\\bigoplus", Parser::cSymbolUnsafe),
+ make_pair(L"\\bigvee", Parser::cSymbolUnsafe),
+ make_pair(L"\\biguplus", Parser::cSymbolUnsafe),
+ make_pair(L"\\bigwedge", Parser::cSymbolUnsafe),
+
+ make_pair(L"\\star", Parser::cSymbol),
+ make_pair(L"\\triangle", Parser::cSymbol),
+ make_pair(L"\\wr", Parser::cSymbol),
+ make_pair(L"\\infty", Parser::cSymbol),
+ make_pair(L"\\circ", Parser::cSymbol),
+ make_pair(L"\\hbar", Parser::cSymbol),
+ make_pair(L"\\lnot", Parser::cSymbol),
+ make_pair(L"\\nabla", Parser::cSymbol),
+ make_pair(L"\\prime", Parser::cSymbol),
+ make_pair(L"\\backslash", Parser::cSymbol),
+ make_pair(L"\\pm", Parser::cSymbol),
+ make_pair(L"\\mp", Parser::cSymbol),
+ make_pair(L"\\emptyset", Parser::cSymbol),
+ make_pair(L"\\varnothing", Parser::cSymbol),
+ make_pair(L"\\S", Parser::cSymbol),
+ make_pair(L"\\angle", Parser::cSymbol),
+ make_pair(L"\\colon", Parser::cSymbolUnsafe),
+ make_pair(L"\\nmid", Parser::cSymbol),
+ make_pair(L"\\square", Parser::cSymbol),
+ make_pair(L"\\Box", Parser::cSymbol),
+ make_pair(L"\\checkmark", Parser::cSymbol),
+ make_pair(L"\\complement", Parser::cSymbol),
+ make_pair(L"\\eth", Parser::cSymbol),
+ make_pair(L"\\hslash", Parser::cSymbol),
+ make_pair(L"\\mho", Parser::cSymbol),
+
+ make_pair(L"\\flat", Parser::cSymbol),
+ make_pair(L"\\sharp", Parser::cSymbol),
+ make_pair(L"\\natural", Parser::cSymbol),
+ make_pair(L"\\bullet", Parser::cSymbol),
+ make_pair(L"\\dagger", Parser::cSymbol),
+ make_pair(L"\\ddagger", Parser::cSymbol),
+
+ make_pair(L"\\clubsuit", Parser::cSymbol),
+ make_pair(L"\\spadesuit", Parser::cSymbol),
+ make_pair(L"\\heartsuit", Parser::cSymbol),
+ make_pair(L"\\diamondsuit", Parser::cSymbol),
+
+ make_pair(L"\\top", Parser::cSymbol),
+ make_pair(L"\\bot", Parser::cSymbol),
+ make_pair(L"\\perp", Parser::cSymbol),
+
+ make_pair(L"\\ldots", Parser::cSymbolUnsafe),
+ make_pair(L"\\cdot", Parser::cSymbol),
+ make_pair(L"\\cdots", Parser::cSymbolUnsafe),
+ make_pair(L"\\vdots", Parser::cSymbolUnsafe),
+ make_pair(L"\\ddots", Parser::cSymbolUnsafe),
+
+ // AMSLaTeX is pretty clever with "\dots" and "\dotsb"; it adjusts the
+ // height of the dots based on the surrounding symbols. Blahtex
+ // currently doesn't do this.
+ make_pair(L"\\dots", Parser::cSymbolUnsafe),
+ make_pair(L"\\dotsb", Parser::cSymbolUnsafe),
+
+ make_pair(L"\\varinjlim", Parser::cSymbolUnsafe),
+ make_pair(L"\\varprojlim", Parser::cSymbolUnsafe),
+
+ make_pair(L"\\circledR", Parser::cSymbol),
+ make_pair(L"\\yen", Parser::cSymbol),
+ make_pair(L"\\maltese", Parser::cSymbol),
+ make_pair(L"\\ulcorner", Parser::cSymbol),
+ make_pair(L"\\urcorner", Parser::cSymbol),
+ make_pair(L"\\llcorner", Parser::cSymbol),
+ make_pair(L"\\lrcorner", Parser::cSymbol),
+ make_pair(L"\\dashrightarrow", Parser::cSymbolUnsafe),
+ make_pair(L"\\dashleftarrow", Parser::cSymbolUnsafe),
+ make_pair(L"\\backprime", Parser::cSymbol),
+ make_pair(L"\\vartriangle", Parser::cSymbol),
+ make_pair(L"\\blacktriangle", Parser::cSymbol),
+ make_pair(L"\\triangledown", Parser::cSymbol),
+ make_pair(L"\\blacktriangledown", Parser::cSymbol),
+ make_pair(L"\\blacksquare", Parser::cSymbol),
+ make_pair(L"\\lozenge", Parser::cSymbol),
+ make_pair(L"\\blacklozenge", Parser::cSymbol),
+ make_pair(L"\\circledS", Parser::cSymbol),
+ make_pair(L"\\bigstar", Parser::cSymbol),
+ make_pair(L"\\sphericalangle", Parser::cSymbol),
+ make_pair(L"\\measuredangle", Parser::cSymbol),
+ make_pair(L"\\Bbbk", Parser::cSymbol),
+ make_pair(L"\\dotplus", Parser::cSymbol),
+ make_pair(L"\\ltimes", Parser::cSymbol),
+ make_pair(L"\\rtimes", Parser::cSymbol),
+ make_pair(L"\\Cap", Parser::cSymbol),
+ make_pair(L"\\leftthreetimes", Parser::cSymbol),
+ make_pair(L"\\rightthreetimes", Parser::cSymbol),
+ make_pair(L"\\Cup", Parser::cSymbol),
+ make_pair(L"\\barwedge", Parser::cSymbol),
+ make_pair(L"\\curlywedge", Parser::cSymbol),
+ make_pair(L"\\veebar", Parser::cSymbol),
+ make_pair(L"\\curlyvee", Parser::cSymbol),
+ make_pair(L"\\doublebarwedge", Parser::cSymbol),
+ make_pair(L"\\boxminus", Parser::cSymbol),
+ make_pair(L"\\circleddash", Parser::cSymbol),
+ make_pair(L"\\boxtimes", Parser::cSymbol),
+ make_pair(L"\\circledast", Parser::cSymbol),
+ make_pair(L"\\boxdot", Parser::cSymbol),
+ make_pair(L"\\circledcirc", Parser::cSymbol),
+ make_pair(L"\\boxplus", Parser::cSymbol),
+ make_pair(L"\\centerdot", Parser::cSymbol),
+ make_pair(L"\\divideontimes", Parser::cSymbol),
+ make_pair(L"\\intercal", Parser::cSymbol),
+ make_pair(L"\\leqq", Parser::cSymbol),
+ make_pair(L"\\geqq", Parser::cSymbol),
+ make_pair(L"\\leqslant", Parser::cSymbol),
+ make_pair(L"\\geqslant", Parser::cSymbol),
+ make_pair(L"\\eqslantless", Parser::cSymbol),
+ make_pair(L"\\eqslantgtr", Parser::cSymbol),
+ make_pair(L"\\gtrsim", Parser::cSymbol),
+ make_pair(L"\\lessapprox", Parser::cSymbol),
+ make_pair(L"\\gtrapprox", Parser::cSymbol),
+ make_pair(L"\\approxeq", Parser::cSymbol),
+ make_pair(L"\\eqsim", Parser::cSymbol),
+ make_pair(L"\\lessdot", Parser::cSymbol),
+ make_pair(L"\\gtrdot", Parser::cSymbol),
+ make_pair(L"\\lll", Parser::cSymbol),
+ make_pair(L"\\ggg", Parser::cSymbol),
+ make_pair(L"\\lessgtr", Parser::cSymbol),
+ make_pair(L"\\gtrless", Parser::cSymbol),
+ make_pair(L"\\lesseqgtr", Parser::cSymbol),
+ make_pair(L"\\gtreqless", Parser::cSymbol),
+ make_pair(L"\\lesseqqgtr", Parser::cSymbol),
+ make_pair(L"\\gtreqqless", Parser::cSymbol),
+ make_pair(L"\\doteqdot", Parser::cSymbol),
+ make_pair(L"\\eqcirc", Parser::cSymbol),
+ make_pair(L"\\risingdotseq", Parser::cSymbol),
+ make_pair(L"\\circeq", Parser::cSymbol),
+ make_pair(L"\\fallingdotseq", Parser::cSymbol),
+ make_pair(L"\\triangleq", Parser::cSymbol),
+ make_pair(L"\\backsim", Parser::cSymbol),
+ make_pair(L"\\thicksim", Parser::cSymbol),
+ make_pair(L"\\backsimeq", Parser::cSymbol),
+ make_pair(L"\\thickapprox", Parser::cSymbol),
+ make_pair(L"\\subseteqq", Parser::cSymbol),
+ make_pair(L"\\supseteqq", Parser::cSymbol),
+ make_pair(L"\\Subset", Parser::cSymbol),
+ make_pair(L"\\Supset", Parser::cSymbol),
+ make_pair(L"\\preccurlyeq", Parser::cSymbol),
+ make_pair(L"\\succcurlyeq", Parser::cSymbol),
+ make_pair(L"\\curlyeqprec", Parser::cSymbol),
+ make_pair(L"\\curlyeqsucc", Parser::cSymbol),
+ make_pair(L"\\precsim", Parser::cSymbol),
+ make_pair(L"\\succsim", Parser::cSymbol),
+ make_pair(L"\\precapprox", Parser::cSymbol),
+ make_pair(L"\\succapprox", Parser::cSymbol),
+ make_pair(L"\\Vvdash", Parser::cSymbol),
+ make_pair(L"\\shortmid", Parser::cSymbol),
+ make_pair(L"\\shortparallel", Parser::cSymbol),
+ make_pair(L"\\bumpeq", Parser::cSymbol),
+ make_pair(L"\\between", Parser::cSymbol),
+ make_pair(L"\\Bumpeq", Parser::cSymbol),
+ make_pair(L"\\varpropto", Parser::cSymbol),
+ make_pair(L"\\backepsilon", Parser::cSymbol),
+ make_pair(L"\\blacktriangleleft", Parser::cSymbol),
+ make_pair(L"\\blacktriangleright", Parser::cSymbol),
+ make_pair(L"\\therefore", Parser::cSymbol),
+ make_pair(L"\\because", Parser::cSymbol),
+ make_pair(L"\\ngtr", Parser::cSymbol),
+ make_pair(L"\\nleqslant", Parser::cSymbol),
+ make_pair(L"\\ngeqslant", Parser::cSymbol),
+ make_pair(L"\\nleqq", Parser::cSymbol),
+ make_pair(L"\\ngeqq", Parser::cSymbol),
+ make_pair(L"\\lneqq", Parser::cSymbol),
+ make_pair(L"\\gneqq", Parser::cSymbol),
+ make_pair(L"\\lvertneqq", Parser::cSymbol),
+ make_pair(L"\\gvertneqq", Parser::cSymbol),
+ make_pair(L"\\lnsim", Parser::cSymbol),
+ make_pair(L"\\gnsim", Parser::cSymbol),
+ make_pair(L"\\lnapprox", Parser::cSymbol),
+ make_pair(L"\\gnapprox", Parser::cSymbol),
+ make_pair(L"\\nprec", Parser::cSymbol),
+ make_pair(L"\\nsucc", Parser::cSymbol),
+ make_pair(L"\\npreceq", Parser::cSymbol),
+ make_pair(L"\\nsucceq", Parser::cSymbol),
+ make_pair(L"\\precneqq", Parser::cSymbol),
+ make_pair(L"\\succneqq", Parser::cSymbol),
+ make_pair(L"\\precnsim", Parser::cSymbol),
+ make_pair(L"\\succnsim", Parser::cSymbol),
+ make_pair(L"\\precnapprox", Parser::cSymbol),
+ make_pair(L"\\succnapprox", Parser::cSymbol),
+ make_pair(L"\\nsim", Parser::cSymbol),
+ make_pair(L"\\ncong", Parser::cSymbol),
+ make_pair(L"\\nshortmid", Parser::cSymbol),
+ make_pair(L"\\nshortparallel", Parser::cSymbol),
+ make_pair(L"\\nmid", Parser::cSymbol),
+ make_pair(L"\\nparallel", Parser::cSymbol),
+ make_pair(L"\\nvdash", Parser::cSymbol),
+ make_pair(L"\\nvDash", Parser::cSymbol),
+ make_pair(L"\\nVdash", Parser::cSymbol),
+ make_pair(L"\\nVDash", Parser::cSymbol),
+ make_pair(L"\\ntriangleleft", Parser::cSymbol),
+ make_pair(L"\\ntriangleright", Parser::cSymbol),
+ make_pair(L"\\ntrianglelefteq", Parser::cSymbol),
+ make_pair(L"\\ntrianglerighteq", Parser::cSymbol),
+ make_pair(L"\\nsubseteq", Parser::cSymbol),
+ make_pair(L"\\nsupseteq", Parser::cSymbol),
+ make_pair(L"\\nsubseteqq", Parser::cSymbol),
+ make_pair(L"\\nsupseteqq", Parser::cSymbol),
+ make_pair(L"\\subsetneq", Parser::cSymbol),
+ make_pair(L"\\supsetneq", Parser::cSymbol),
+ make_pair(L"\\varsubsetneq", Parser::cSymbol),
+ make_pair(L"\\varsupsetneq", Parser::cSymbol),
+ make_pair(L"\\subsetneqq", Parser::cSymbol),
+ make_pair(L"\\supsetneqq", Parser::cSymbol),
+ make_pair(L"\\varsubsetneqq", Parser::cSymbol),
+ make_pair(L"\\varsupsetneqq", Parser::cSymbol),
+ make_pair(L"\\leftleftarrows", Parser::cSymbol),
+ make_pair(L"\\rightrightarrows", Parser::cSymbol),
+ make_pair(L"\\leftrightarrows", Parser::cSymbol),
+ make_pair(L"\\rightleftarrows", Parser::cSymbol),
+ make_pair(L"\\Lleftarrow", Parser::cSymbol),
+ make_pair(L"\\Rrightarrow", Parser::cSymbol),
+ make_pair(L"\\twoheadleftarrow", Parser::cSymbol),
+ make_pair(L"\\twoheadrightarrow", Parser::cSymbol),
+ make_pair(L"\\leftarrowtail", Parser::cSymbol),
+ make_pair(L"\\rightarrowtail", Parser::cSymbol),
+ make_pair(L"\\looparrowleft", Parser::cSymbol),
+ make_pair(L"\\looparrowright", Parser::cSymbol),
+ make_pair(L"\\leftrightharpoons", Parser::cSymbol),
+ make_pair(L"\\rightleftharpoons", Parser::cSymbol),
+ make_pair(L"\\curvearrowleft", Parser::cSymbol),
+ make_pair(L"\\curvearrowright", Parser::cSymbol),
+ make_pair(L"\\circlearrowleft", Parser::cSymbol),
+ make_pair(L"\\circlearrowright", Parser::cSymbol),
+ make_pair(L"\\Lsh", Parser::cSymbol),
+ make_pair(L"\\Rsh", Parser::cSymbol),
+ make_pair(L"\\upuparrows", Parser::cSymbol),
+ make_pair(L"\\downdownarrows", Parser::cSymbol),
+ make_pair(L"\\multimap", Parser::cSymbol),
+ make_pair(L"\\rightsquigarrow", Parser::cSymbol),
+ make_pair(L"\\leftrightsquigarrow", Parser::cSymbol),
+ make_pair(L"\\nLeftarrow", Parser::cSymbol),
+ make_pair(L"\\nRightarrow", Parser::cSymbol),
+ make_pair(L"\\nleftrightarrow", Parser::cSymbol),
+ make_pair(L"\\nLeftrightarrow", Parser::cSymbol),
+ make_pair(L"\\pitchfork", Parser::cSymbol),
+ make_pair(L"\\nexists", Parser::cSymbol),
+ make_pair(L"\\lhd", Parser::cSymbol),
+ make_pair(L"\\rhd", Parser::cSymbol),
+ make_pair(L"\\unlhd", Parser::cSymbol),
+ make_pair(L"\\unrhd", Parser::cSymbol),
+ make_pair(L"\\leadsto", Parser::cSymbol),
+ make_pair(L"\\uplus", Parser::cSymbol),
+ make_pair(L"\\diamond", Parser::cSymbol),
+ make_pair(L"\\bigtriangleup", Parser::cSymbol),
+ make_pair(L"\\bigtriangledown", Parser::cSymbol),
+ make_pair(L"\\ominus", Parser::cSymbol),
+ make_pair(L"\\oslash", Parser::cSymbol),
+ make_pair(L"\\odot", Parser::cSymbol),
+ make_pair(L"\\bigcirc", Parser::cSymbol),
+ make_pair(L"\\amalg", Parser::cSymbol),
+ make_pair(L"\\prec", Parser::cSymbol),
+ make_pair(L"\\succ", Parser::cSymbol),
+ make_pair(L"\\preceq", Parser::cSymbol),
+ make_pair(L"\\succeq", Parser::cSymbol),
+ make_pair(L"\\dashv", Parser::cSymbol),
+ make_pair(L"\\asymp", Parser::cSymbol),
+ make_pair(L"\\doteq", Parser::cSymbolUnsafe),
+ make_pair(L"\\parallel", Parser::cSymbol),
+ make_pair(L"\\bowtie", Parser::cSymbolUnsafe),
+ make_pair(L"\\jmath", Parser::cSymbol),
+ make_pair(L"\\surd", Parser::cSymbol)
+};
+
+wishful_hash_map<wstring, Parser::TokenCode> gMathTokenTable(
+ gMathTokenArray,
+ END_ARRAY(gMathTokenArray)
+);
+
+pair<wstring, Parser::TokenCode> gTextTokenArray[] =
+{
+ make_pair(L"", Parser::cEndOfInput),
+ make_pair(L" ", Parser::cWhitespace),
+ make_pair(L"\\newcommand", Parser::cNewcommand),
+
+ make_pair(L"{", Parser::cBeginGroup),
+ make_pair(L"}", Parser::cEndGroup),
+
+ make_pair(L"$", Parser::cIllegal),
+ make_pair(L"%", Parser::cIllegal),
+ make_pair(L"#", Parser::cIllegal),
+ make_pair(L"&", Parser::cIllegal),
+ make_pair(L"\\\\", Parser::cIllegal),
+ make_pair(L"^", Parser::cIllegal),
+ make_pair(L"_", Parser::cIllegal),
+
+ make_pair(L"\\&", Parser::cSymbol),
+ make_pair(L"\\_", Parser::cSymbol),
+ make_pair(L"\\$", Parser::cSymbol),
+ make_pair(L"\\#", Parser::cSymbol),
+ make_pair(L"\\%", Parser::cSymbol),
+ make_pair(L"\\{", Parser::cSymbol),
+ make_pair(L"\\}", Parser::cSymbol),
+ make_pair(L"\\textbackslash", Parser::cSymbol),
+ make_pair(L"\\textvisiblespace", Parser::cSymbol),
+ make_pair(L"\\textasciicircum", Parser::cSymbol),
+ make_pair(L"\\textasciitilde", Parser::cSymbol),
+ make_pair(L"\\O", Parser::cSymbol),
+ make_pair(L"\\S", Parser::cSymbol),
+
+ make_pair(L"!", Parser::cSymbol),
+ make_pair(L"@", Parser::cSymbol),
+ make_pair(L"*", Parser::cSymbol),
+ make_pair(L"(", Parser::cSymbol),
+ make_pair(L")", Parser::cSymbol),
+ make_pair(L"-", Parser::cSymbol),
+ make_pair(L"=", Parser::cSymbol),
+ make_pair(L"+", Parser::cSymbol),
+ make_pair(L"[", Parser::cSymbol),
+ make_pair(L"]", Parser::cSymbol),
+ make_pair(L"|", Parser::cSymbol),
+ make_pair(L";", Parser::cSymbol),
+ make_pair(L":", Parser::cSymbol),
+ make_pair(L"<", Parser::cSymbol),
+ make_pair(L">", Parser::cSymbol),
+ make_pair(L",", Parser::cSymbol),
+ make_pair(L".", Parser::cSymbol),
+ make_pair(L"/", Parser::cSymbol),
+ make_pair(L"?", Parser::cSymbol),
+ make_pair(L"\"", Parser::cSymbol),
+ make_pair(L"\'", Parser::cSymbol),
+
+ make_pair(L"~", Parser::cSymbolUnsafe),
+ make_pair(L"\\,", Parser::cSymbolUnsafe),
+ make_pair(L"\\!", Parser::cSymbolUnsafe),
+ make_pair(L"\\ ", Parser::cSymbolUnsafe),
+ make_pair(L"\\;", Parser::cSymbolUnsafe),
+ make_pair(L"\\quad", Parser::cSymbolUnsafe),
+ make_pair(L"\\qquad", Parser::cSymbolUnsafe),
+
+ make_pair(L"\\hbox", Parser::cCommand1Arg),
+ make_pair(L"\\mbox", Parser::cCommand1Arg),
+ make_pair(L"\\emph", Parser::cCommand1Arg),
+ make_pair(L"\\text", Parser::cCommand1Arg),
+ make_pair(L"\\textit", Parser::cCommand1Arg),
+ make_pair(L"\\textbf", Parser::cCommand1Arg),
+ make_pair(L"\\textrm", Parser::cCommand1Arg),
+ make_pair(L"\\texttt", Parser::cCommand1Arg),
+ make_pair(L"\\textsf", Parser::cCommand1Arg),
+
+ make_pair(L"\\cyr", Parser::cCommand1Arg),
+ make_pair(L"\\jap", Parser::cCommand1Arg),
+
+ make_pair(L"\\rm", Parser::cStateChange),
+ make_pair(L"\\it", Parser::cStateChange),
+ make_pair(L"\\bf", Parser::cStateChange),
+ make_pair(L"\\tt", Parser::cStateChange),
+ make_pair(L"\\sf", Parser::cStateChange),
+
+ make_pair(L"\\color", Parser::cStateChange)
+};
+
+wishful_hash_map<wstring, Parser::TokenCode> gTextTokenTable(
+ gTextTokenArray,
+ END_ARRAY(gTextTokenArray)
+);
+
+// Tests whether the supplied token is in either the math or text token
+// tables.
+bool IsInTokenTables(const wstring& token)
+{
+ return
+ (gMathTokenTable.count(token) > 0) ||
+ (gTextTokenTable.count(token) > 0);
+}
+
+Parser::TokenCode Parser::GetMathTokenCode(const wstring& token) const
+{
+ wstring translatedToken = translateToken(token);
+ wishful_hash_map<wstring, TokenCode>::const_iterator
+ output = gMathTokenTable.find(translatedToken);
+
+ if (output != gMathTokenTable.end())
+ {
+ if (output->second != cIllegal)
+ return output->second;
+
+ // Give the user some helpful hints if they try to use certain
+ // illegal commands (e.g. "% is illegal, try \% instead").
+ if (translatedToken == L"%" || translatedToken == L"#" || translatedToken == L"$")
+ throw Exception(
+ L"IllegalCommandInMathModeWithHint",
+ token, L"\\" + translatedToken
+ );
+
+ else if (translatedToken == L"`" || translatedToken == L"\"")
+ throw Exception(L"IllegalCommandInMathMode", token);
+
+ throw logic_error(
+ "Unexpected illegal character in Parser::GetMathTokenCode"
+ );
+ }
+
+ if (translatedToken[0] == L'\\')
+ {
+ if (gTextTokenTable.count(translatedToken))
+ throw Exception(L"IllegalCommandInMathMode", token);
+ else
+ throw Exception(L"UnrecognisedCommand", token);
+ }
+
+ if (translatedToken[0] > 0x7F)
+ throw Exception(L"NonAsciiInMathMode");
+
+ if (
+ (translatedToken[0] >= L'a' && translatedToken[0] <= L'z') ||
+ (translatedToken[0] >= L'A' && translatedToken[0] <= L'Z') ||
+ (translatedToken[0] >= L'0' && translatedToken[0] <= L'9')
+ )
+ return cSymbol;
+
+ throw Exception(L"UnrecognisedCommand", token);
+}
+
+Parser::TokenCode Parser::GetTextTokenCode(const wstring& token) const
+{
+ wishful_hash_map<wstring, TokenCode>::const_iterator
+ output = gTextTokenTable.find(token);
+
+ if (output != gTextTokenTable.end())
+ {
+ if (output->second != cIllegal)
+ return output->second;
+
+ // Give the user some helpful hints if they try to use certain
+ // illegal commands.
+ if (token == L"&" || token == L"_" || token == L"%"
+ || token == L"#" || token == L"$")
+
+ throw Exception(
+ L"IllegalCommandInTextModeWithHint",
+ token, L"\\" + token
+ );
+
+ else if (token == L"\\\\")
+ throw Exception(
+ L"IllegalCommandInTextModeWithHint",
+ L"\\\\", L"\\textbackslash"
+ );
+
+ else if (token == L"^")
+ throw Exception(
+ L"IllegalCommandInTextModeWithHint",
+ L"^", L"\\textasciicircum"
+ );
+
+ else
+ throw Exception(L"IllegalCommandInTextMode", token);
+ }
+
+ if (token[0] == L'\\')
+ {
+ if (gMathTokenTable.count(token))
+ throw Exception(L"IllegalCommandInTextMode", token);
+ else
+ throw Exception(L"UnrecognisedCommand", token);
+ }
+
+ if (
+ (token[0] >= L'a' && token[0] <= L'z') ||
+ (token[0] >= L'A' && token[0] <= L'Z') ||
+ (token[0] >= L'0' && token[0] <= L'9') ||
+ (token[0] > 0x7F)
+ )
+ return cSymbol;
+
+ throw Exception(L"UnrecognisedCommand", token);
+}
+
+auto_ptr<ParseTree::MathNode> Parser::DoParse(const vector<wstring>& input)
+{
+ mTokenSource.reset(new MacroProcessor(input));
+
+ // Parse until we hit a closing token of some kind...
+ auto_ptr<ParseTree::MathNode> output = ParseMathList();
+
+ // ... and check that the closing token is actually the end of input.
+ switch (GetMathTokenCode(mTokenSource->Peek()))
+ {
+ case cEndOfInput: return output;
+ case cEndGroup: throw Exception(L"UnmatchedCloseBrace");
+ case cRight: throw Exception(L"UnmatchedRight");
+ case cNextCell: throw Exception(L"UnexpectedNextCell");
+ case cNextRow: throw Exception(L"UnexpectedNextRow");
+ case cEndEnvironment: throw Exception(L"UnmatchedEnd");
+ }
+
+ throw logic_error("Unexpected token code in Parser::DoParse");
+}
+
+auto_ptr<ParseTree::MathNode> Parser::ParseMathField()
+{
+ mTokenSource->SkipWhitespace();
+ wstring command = translateToken(mTokenSource->Get());
+
+ switch (GetMathTokenCode(command))
+ {
+ case cSymbol:
+ return auto_ptr<ParseTree::MathNode>(
+ new ParseTree::MathSymbol(command)
+ );
+
+ case cBeginGroup:
+ {
+ // Grab the argument surrounded by braces
+ auto_ptr<ParseTree::MathNode> field = ParseMathList();
+
+ // Gobble closing brace
+ if (mTokenSource->Get() != L"}")
+ throw Exception(L"UnmatchedOpenBrace");
+
+ return field;
+ }
+
+ case cEndOfInput:
+ throw Exception(L"MissingOpenBraceAtEnd");
+ }
+
+ throw Exception(L"MissingOpenBraceBefore", command);
+}
+
+auto_ptr<ParseTree::MathTable> Parser::ParseMathTable()
+{
+ auto_ptr<ParseTree::MathTable> table(new ParseTree::MathTable);
+ // "row" holds the current, incomplete row being parsed
+ auto_ptr<ParseTree::MathTableRow> row(new ParseTree::MathTableRow);
+
+ while (true)
+ {
+ auto_ptr<ParseTree::MathNode> entry = ParseMathList();
+
+ switch (GetMathTokenCode(mTokenSource->Peek()))
+ {
+ case cNextCell:
+ {
+ mTokenSource->Advance();
+ row->mEntries.push_back(entry.release());
+ break;
+ }
+
+ case cNextRow:
+ {
+ mTokenSource->Advance();
+ row->mEntries.push_back(entry.release());
+ table->mRows.push_back(row.release());
+ row.reset(new ParseTree::MathTableRow);
+ break;
+ }
+
+ case cEndGroup:
+ case cRight:
+ case cEndOfInput:
+ case cEndEnvironment:
+ {
+ // We only include the last row if it isn't blank,
+ // e.g. "\begin{matrix} a \\ \end{matrix}" should only
+ // result in a single row.
+
+ ParseTree::MathList* check =
+ dynamic_cast<ParseTree::MathList*>(entry.get());
+
+ if (!check ||
+ !check->mChildren.empty() || !row->mEntries.empty()
+ )
+ {
+ row->mEntries.push_back(entry.release());
+ table->mRows.push_back(row.release());
+ }
+
+ return table;
+ }
+
+ default:
+ throw logic_error(
+ "Unexpected token code in Parser::ParseMathTable"
+ );
+ }
+ }
+
+ // Hmmm... gcc seems to think the control flow can reach here...
+ throw logic_error("Unexpected control flow in Parser::ParseMathTable");
+}
+
+ParseTree::MathScripts* Parser::PrepareScripts(ParseTree::MathList* output)
+{
+ ParseTree::MathScripts* target;
+
+ if (output->mChildren.empty())
+ {
+ // If there are no nodes yet, make a new scripts node with an
+ // empty base
+ target = new ParseTree::MathScripts;
+ output->mChildren.push_back(target);
+ }
+ else
+ {
+ target = dynamic_cast<ParseTree::MathScripts*>(
+ output->mChildren.back()
+ );
+
+ if (!target)
+ {
+ // If the last node exists but is not a scripts node,
+ // shove it into the base of a new scripts node.
+ target = new ParseTree::MathScripts;
+ target->mBase.reset(output->mChildren.back());
+ output->mChildren.back() = target;
+ }
+ }
+
+ return target;
+}
+
+
+wstring Parser::ParseColourName()
+{
+ mTokenSource->SkipWhitespace();
+ if (mTokenSource->Get() != L"{")
+ throw Exception(L"MissingOpenBraceAfter", L"\\color");
+
+ wstring colourName;
+ while (true)
+ {
+ wstring c = mTokenSource->Get();
+ if (c == L"}")
+ {
+ // check colour name is valid
+ if (gColourTable.find(colourName) == gColourTable.end())
+ throw Exception(L"InvalidColour", colourName);
+ return colourName;
+ }
+ if (c == L"")
+ throw Exception(L"UnmatchedOpenBrace");
+ colourName += c;
+ if (c.size() != 1 ||
+ !(
+ (c[0] >= 'A' && c[0] <= 'Z') ||
+ (c[0] >= 'a' && c[0] <= 'z')
+ )
+ )
+ throw Exception(
+ L"InvalidColour",
+ colourName + L"..."
+ );
+ }
+}
+
+
+auto_ptr<ParseTree::MathNode> Parser::ParseMathList()
+{
+ auto_ptr<ParseTree::MathList> output(new ParseTree::MathList);
+
+ // infixNumerator temporarily holds the numerator of an infix command
+ // (like "\over"), while we are waiting for the denominator to be
+ // fully built up...
+ auto_ptr<ParseTree::MathList> infixNumerator;
+ // and the infix command itself is stored here:
+ wstring infixCommand;
+
+ while (true)
+ {
+ switch (GetMathTokenCode(mTokenSource->Peek()))
+ {
+ case cEndGroup:
+ case cRight:
+ case cNextCell:
+ case cNextRow:
+ case cEndEnvironment:
+ case cEndOfInput:
+ {
+ // It's a little strange that the following static_casts
+ // should be needed, but gcc 3.3 seems to require them.
+ // Don't know about later versions.
+ if (!infixCommand.empty())
+ return auto_ptr<ParseTree::MathNode>(
+ new ParseTree::MathCommand2Args(
+ infixCommand,
+ static_cast<auto_ptr<ParseTree::MathNode> >
+ (infixNumerator),
+ static_cast<auto_ptr<ParseTree::MathNode> >
+ (output),
+ true // true = this is an infix command rather
+ // than a two-argument command
+ )
+ );
+ else
+ {
+ if (output->mChildren.size() == 1)
+ {
+ // If there's only node in the list, return just
+ // that single node. (We need to actually remove
+ // it from output->mChildren to respect ownership
+ // rules; otherwise output's destructor will delete
+ // it *again*).
+ auto_ptr<ParseTree::MathNode> temp(
+ output->mChildren.back()
+ );
+ output->mChildren.clear();
+ return temp;
+ }
+ else
+ return static_cast<auto_ptr<ParseTree::MathNode> >
+ (output);
+ }
+ }
+
+ case cNewcommand:
+ {
+ // Pass back the macro definition to be handled by the
+ // attached MacroProcessor.
+ mTokenSource->HandleNewcommand();
+ break;
+ }
+
+ case cWhitespace:
+ {
+ // Skip whitespace.
+ mTokenSource->Advance();
+ break;
+ }
+
+ case cSymbol:
+ case cSymbolUnsafe:
+ {
+ output->mChildren.push_back(
+ new ParseTree::MathSymbol(translateToken(mTokenSource->Get()))
+ );
+ break;
+ }
+
+ case cBeginGroup:
+ {
+ mTokenSource->Advance();
+
+ // Grab stuff inside braces:
+ output->mChildren.push_back(
+ new ParseTree::MathGroup(ParseMathList())
+ );
+
+ // Gobble closing brace.
+ if (mTokenSource->Get() != L"}")
+ throw Exception(L"UnmatchedOpenBrace");
+ break;
+ }
+
+ case cBeginEnvironment:
+ {
+ // extract e.g. "matrix" from "\begin{matrix}"
+ wstring beginCommand = mTokenSource->Get();
+ wstring name
+ = beginCommand.substr(7, beginCommand.size() - 8);
+
+ auto_ptr<ParseTree::MathTable> table = ParseMathTable();
+
+ wstring endCommand = mTokenSource->Get();
+ if (GetMathTokenCode(endCommand) != cEndEnvironment)
+ throw Exception(L"UnmatchedBegin", beginCommand);
+
+ if (name != endCommand.substr(5, endCommand.size() - 6))
+ throw Exception(
+ L"MismatchedBeginAndEnd", beginCommand, endCommand
+ );
+
+ if (name == L"cases")
+ {
+ // check none of the rows have more than two entries
+ for (vector<ParseTree::MathTableRow*>::iterator
+ row = table->mRows.begin();
+ row != table->mRows.end();
+ row++
+ )
+ {
+ if ((*row)->mEntries.size() > 2)
+ throw Exception(L"CasesRowTooBig");
+ }
+ }
+
+ output->mChildren.push_back(
+ new ParseTree::MathEnvironment(name, table, false)
+ );
+ break;
+ }
+
+ case cShortEnvironment:
+ {
+ wstring command = translateToken(mTokenSource->Get());
+
+ // Strip initial backslash (e.g. "\substack" => "substack")
+ wstring name = command.substr(1, command.size() - 1);
+
+ // Gobble opening "{"
+ mTokenSource->SkipWhitespace();
+ if (mTokenSource->Get() != L"{")
+ throw Exception(L"MissingOpenBraceAfter", command);
+
+ auto_ptr<ParseTree::MathTable> table = ParseMathTable();
+
+ if (name == L"substack")
+ {
+ // check none of the rows have more than one entry
+ for (vector<ParseTree::MathTableRow*>::iterator
+ row = table->mRows.begin();
+ row != table->mRows.end();
+ row++
+ )
+ {
+ if ((*row)->mEntries.size() > 1)
+ throw Exception(L"SubstackRowTooBig");
+ }
+ }
+
+ // Gobble closing "}"
+ if (mTokenSource->Get() != L"}")
+ throw Exception(L"UnmatchedOpenBrace");
+
+ output->mChildren.push_back(
+ new ParseTree::MathEnvironment(name, table, true)
+ );
+ break;
+ }
+
+ case cEnterTextMode:
+ {
+ wstring command = mTokenSource->Get();
+
+ mTokenSource->SkipWhitespace();
+ if (mTokenSource->Peek() != L"{")
+ throw Exception(L"MissingOpenBraceAfter", command);
+
+ output->mChildren.push_back(
+ // Here is the only place in this function that we
+ // switch to text mode parsing:
+ new ParseTree::EnterTextMode(command, ParseTextField())
+ );
+ break;
+ }
+
+ case cLeft:
+ {
+ mTokenSource->Advance();
+ mTokenSource->SkipWhitespace();
+ wstring left = translateToken(mTokenSource->Get());
+ if (left.empty())
+ throw Exception(L"MissingDelimiter", L"\\left");
+ else if (!gDelimiterTable.count(left))
+ throw Exception(L"IllegalDelimiter", L"\\left");
+
+ auto_ptr<ParseTree::MathNode> child = ParseMathList();
+
+ if (mTokenSource->Peek() != L"\\right")
+ throw Exception(L"UnmatchedLeft");
+
+ mTokenSource->Advance();
+ mTokenSource->SkipWhitespace();
+ wstring right = translateToken(mTokenSource->Get());
+ if (right.empty())
+ throw Exception(L"MissingDelimiter", L"\\right");
+ else if (!gDelimiterTable.count(right))
+ throw Exception(L"IllegalDelimiter", L"\\right");
+
+ output->mChildren.push_back(
+ new ParseTree::MathDelimited(child, left, right)
+ );
+ break;
+ }
+
+ case cBig:
+ {
+ wstring command = translateToken(mTokenSource->Get());
+ mTokenSource->SkipWhitespace();
+ wstring delimiter = translateToken(mTokenSource->Get());
+ if (delimiter.empty())
+ throw Exception(L"MissingDelimiter", command);
+ else if (!gDelimiterTable.count(delimiter))
+ throw Exception(L"IllegalDelimiter", command);
+
+ output->mChildren.push_back(
+ new ParseTree::MathBig(command, delimiter)
+ );
+ break;
+ }
+
+ case cSuperscript:
+ {
+ mTokenSource->Advance();
+ ParseTree::MathScripts* target
+ = PrepareScripts(output.get());
+ if (target->mUpper.get())
+ throw Exception(L"DoubleSuperscript");
+ target->mUpper = ParseMathField();
+ break;
+ }
+
+ case cSubscript:
+ {
+ mTokenSource->Advance();
+ ParseTree::MathScripts* target
+ = PrepareScripts(output.get());
+ if (target->mLower.get())
+ throw Exception(L"DoubleSubscript");
+ target->mLower = ParseMathField();
+ break;
+ }
+
+ case cPrime:
+ {
+ // The idea here is to fill in "superscript" with
+ // an appropriate number of "\prime" commands and
+ // possibly a regular superscript indicated by "^".
+ //
+ // It (hopefully) has the same effect as the macro
+ // that TeX uses for the prime symbol.
+
+ auto_ptr<ParseTree::MathList> superscript(
+ new ParseTree::MathList
+ );
+
+ while (mTokenSource->Peek() == L"'")
+ {
+ superscript->mChildren.push_back(
+ new ParseTree::MathSymbol(L"\\prime")
+ );
+ mTokenSource->Advance();
+ }
+
+ ParseTree::MathScripts* target
+ = PrepareScripts(output.get());
+ if (target->mUpper.get())
+ throw Exception(L"DoubleSuperscript");
+
+ if (mTokenSource->Peek() == L"^")
+ {
+ mTokenSource->Advance();
+ superscript->mChildren.push_back(
+ ParseMathField().release()
+ );
+ }
+
+ target->mUpper.reset(
+ new ParseTree::MathGroup(
+ static_cast<auto_ptr<ParseTree::MathNode> >
+ (superscript)
+ )
+ );
+ break;
+ }
+
+ case cLimits:
+ {
+ wstring command = mTokenSource->Get();
+ if (output->mChildren.empty())
+ throw Exception(L"MisplacedLimits", command);
+
+ // We need to arrange things so that the child of the
+ // new MathLimits node is the base of a (possibly new)
+ // MathScripts node.
+
+ ParseTree::MathScripts* scripts =
+ dynamic_cast<ParseTree::MathScripts*>(
+ output->mChildren.back()
+ );
+
+ if (scripts)
+ scripts->mBase.reset(
+ new ParseTree::MathLimits(command, scripts->mBase)
+ );
+ else
+ output->mChildren.back() = new ParseTree::MathLimits(
+ command,
+ auto_ptr<ParseTree::MathNode>(
+ output->mChildren.back()
+ )
+ );
+
+ break;
+ }
+
+ case cStateChange:
+ {
+ wstring command = mTokenSource->Get();
+ if (command == L"\\color")
+ output->mChildren.push_back(
+ new ParseTree::MathColour(ParseColourName())
+ );
+ else
+ output->mChildren.push_back(
+ new ParseTree::MathStateChange(command)
+ );
+ break;
+ }
+
+ case cCommand1Arg:
+ {
+ wstring command = translateToken(mTokenSource->Get());
+ output->mChildren.push_back(
+ new ParseTree::MathCommand1Arg(
+ command, ParseMathField()
+ )
+ );
+ break;
+ }
+
+ case cCommand2Args:
+ {
+ wstring command = translateToken(mTokenSource->Get());
+ auto_ptr<ParseTree::MathNode> child1 = ParseMathField();
+ auto_ptr<ParseTree::MathNode> child2 = ParseMathField();
+ output->mChildren.push_back(
+ new ParseTree::MathCommand2Args(
+ command, child1, child2, false
+ )
+ );
+ break;
+ }
+
+ case cCommandInfix:
+ {
+ if (!infixCommand.empty())
+ throw Exception(
+ L"AmbiguousInfix", mTokenSource->Peek()
+ );
+
+ // When we see an infix command (e.g. "\over"), we do the
+ // same thing TeX does: clear out the entire math list
+ // being processed and dump it temporarily in
+ // "infixNumerator", and start processing the "denominator".
+
+ infixNumerator = output;
+ infixCommand = translateToken(mTokenSource->Get());
+ output.reset(new ParseTree::MathList);
+ break;
+ }
+
+ default:
+ throw logic_error(
+ "Unexpected token code in Parser::ParseMathList"
+ );
+ }
+ }
+
+ // Hmmm... gcc seems to think the control flow can reach here...
+ throw logic_error("Unexpected control flow in Parser::ParseMathList");
+}
+
+auto_ptr<ParseTree::TextNode> Parser::ParseTextField()
+{
+ mTokenSource->SkipWhitespace();
+ wstring command = mTokenSource->Get();
+
+ switch (GetTextTokenCode(command))
+ {
+ case cSymbol:
+ return auto_ptr<ParseTree::TextNode>(
+ new ParseTree::TextSymbol(command)
+ );
+
+ case cBeginGroup:
+ {
+ auto_ptr<ParseTree::TextNode> field(
+ new ParseTree::TextGroup(ParseTextList())
+ );
+ if (mTokenSource->Peek() != L"}")
+ throw Exception(L"UnmatchedOpenBrace");
+ mTokenSource->Advance();
+ return field;
+ }
+
+ case cEndOfInput:
+ throw Exception(L"MissingOpenBraceAtEnd");
+ }
+
+ throw Exception(L"MissingOpenBraceBefore", command);
+}
+
+auto_ptr<ParseTree::TextNode> Parser::ParseTextList()
+{
+ auto_ptr<ParseTree::TextList> output(new ParseTree::TextList);
+
+ while (true)
+ {
+ switch (GetTextTokenCode(mTokenSource->Peek()))
+ {
+ case cEndGroup:
+ case cEndOfInput:
+ {
+ if (output->mChildren.size() == 1)
+ {
+ // If there's only node in the list, return just that
+ // single node.
+ ParseTree::TextNode* temp = output->mChildren.back();
+ output->mChildren.pop_back();
+ return auto_ptr<ParseTree::TextNode>(temp);
+ }
+ else
+ return static_cast<auto_ptr<ParseTree::TextNode> >(
+ output
+ );
+ }
+
+ case cNewcommand:
+ {
+ mTokenSource->HandleNewcommand();
+ break;
+ }
+
+ case cBeginGroup:
+ {
+ mTokenSource->Advance();
+ output->mChildren.push_back(
+ new ParseTree::TextGroup(ParseTextList())
+ );
+ if (mTokenSource->Peek() != L"}")
+ throw Exception(L"UnmatchedOpenBrace");
+ mTokenSource->Advance();
+ break;
+ }
+
+ case cWhitespace:
+ case cSymbol:
+ case cSymbolUnsafe:
+ {
+ output->mChildren.push_back(
+ new ParseTree::TextSymbol(mTokenSource->Get())
+ );
+ break;
+ }
+
+ case cCommand1Arg:
+ {
+ wstring command = mTokenSource->Get();
+ output->mChildren.push_back(
+ new ParseTree::TextCommand1Arg(
+ command, ParseTextField()
+ )
+ );
+ break;
+ }
+
+ case cStateChange:
+ {
+ wstring command = mTokenSource->Get();
+ if (command == L"\\color")
+ output->mChildren.push_back(
+ new ParseTree::TextColour(ParseColourName())
+ );
+ else
+ output->mChildren.push_back(
+ new ParseTree::TextStateChange(command)
+ );
+ break;
+ }
+
+ default:
+ throw logic_error(
+ "Unexpected token code in Parser::ParseTextList"
+ );
+ }
+ }
+
+ // Hmmm... gcc seems to think the control flow can reach here...
+ throw logic_error("Unexpected control flow in Parser::ParseTextField");
+}
+
+}
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/BlahtexCore/Parser.h b/blahtexml/source/BlahtexCore/Parser.h
new file mode 100644
index 0000000..2e69b4f
--- /dev/null
+++ b/blahtexml/source/BlahtexCore/Parser.h
@@ -0,0 +1,142 @@
+// File "Parser.h"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#ifndef BLAHTEX_PARSER_H
+#define BLAHTEX_PARSER_H
+
+#include "MacroProcessor.h"
+#include "ParseTree.h"
+
+namespace blahtex
+{
+
+// The Parser class actually does the parsing work. It runs the supplied
+// input tokens through a MacroProcessor, and builds a parse tree from the
+// resulting expanded token stream.
+class Parser
+{
+
+public:
+ // Main function that the caller should use to do a parsing job.
+ // Input is a TeX string, output is the root of a parse tree.
+ std::auto_ptr<ParseTree::MathNode> DoParse(
+ const std::vector<std::wstring>& input
+ );
+
+ // The parser uses GetMathTokenCode (in math mode) or GetTextTokenCode
+ // (in text mode) to translate each incoming token into one of the
+ // following values:
+ enum TokenCode
+ {
+ cEndOfInput,
+ cWhitespace,
+ cNewcommand, // The "\newcommand" command.
+ cIllegal, // Single character commands that are illegal in
+ // the current mode (like "$", "%").
+ cBeginGroup, // Opening and closing braces ("{" and "}").
+ cEndGroup,
+ cNextCell, // The commands "&" and "\\".
+ cNextRow,
+ cSuperscript, // The commands "^" and "_".
+ cSubscript,
+ cPrime, // The prime symbol "'".
+ cCommand1Arg, // TeX commands accepting one argument.
+ cCommand2Args, // TeX commands accepting two arguments.
+ cCommandInfix, // Infix commands like "\over".
+ cLeft, // The "\left" and "\right" commands.
+ cRight,
+ cBig, // "\big" style commands.
+ cLimits, // "\limits", "\nolimits", or "\displaylimits".
+ cBeginEnvironment, // Like "\begin{matrix}", "\end{matrix}"
+ cEndEnvironment,
+ cShortEnvironment, // Like "\substack{...}". Inside the braces it
+ // behaves like an environment, but it doesn't
+ // use "\begin" or "\end".
+ cEnterTextMode, // Command that switch from math mode to text
+ // mode (e.g. "\text").
+ cStateChange, // State changes, e.g. "\rm", "\scriptstyle",
+ // "\color".
+ cSymbol, // Pretty much every other command: e.g.
+ // "a", "1", "\alpha", "+", "\rightarrow", etc.
+
+ // cSymbolUnsafe covers some commands that one might expect to get
+ // translated as cSymbol.
+ //
+ // The issue is that TeX/LaTeX/AMS-LaTeX expands certain commands as
+ // macros, and they subsequently become unsafe for use as a single
+ // symbol. For example, "x^\cong" is illegal in TeX, because "\cong"
+ // gets expanded as a macro, so we assign the code cSymbolUnsafe to
+ // "\cong". This is a bit of a nasty hack, but the only real
+ // alternative is to simulate a much larger portion of
+ // TeX/LaTeX/AMS-LaTeX, which at this stage is unpalatable :-)
+ cSymbolUnsafe
+ };
+
+private:
+ // Tokens are first filtered through this MacroProcessor object, so that
+ // the parser doesn't have to be aware of macros at all.
+ std::auto_ptr<MacroProcessor> mTokenSource;
+
+ // ParseMathList starts parsing a math list, until it reaches a command
+ // indicating the end of the list, like "}" or "\right" or "\end{...}".
+ std::auto_ptr<ParseTree::MathNode> ParseMathList();
+
+ // ParseMathField parses a TeX "math field", which is either a single
+ // symbol or an expression grouped with braces.
+ std::auto_ptr<ParseTree::MathNode> ParseMathField();
+
+ // Handle a table enclosed in something like "\begin{matrix} ...
+ // \end{matrix}"; i.e. it breaks input up into entries and rows based on
+ // "\\" and "&" commands.
+ std::auto_ptr<ParseTree::MathTable> ParseMathTable();
+
+ // PrepareScripts is called when we encounter "^" or "_". It ensures
+ // that the last element of output->mChildren is a MathScripts node
+ // whose base is the base of the "^" or "_" command.
+ //
+ // (The caller does not get ownership of the MathScripts node;
+ // PrepareScripts assigns this ownership to *output if necessary).
+ ParseTree::MathScripts* PrepareScripts(ParseTree::MathList* output);
+
+ // ParseTextList starts parsing a text list, until it reaches "}" or
+ // end of input.
+ std::auto_ptr<ParseTree::TextNode> ParseTextList();
+
+ // ParseTextField parses an argument to a command in text mode, which
+ // is either a single symbol or an expression grouped with braces.
+ std::auto_ptr<ParseTree::TextNode> ParseTextField();
+
+ // These functions determine the appropriate token code for the supplied
+ // token. Things like "1", "a", "+" are handled appropriately, as are
+ // backslash-prefixed commands listed in gMathTokenTable or
+ // gTextTokenTable.
+ TokenCode GetMathTokenCode(const std::wstring& token) const;
+ TokenCode GetTextTokenCode(const std::wstring& token) const;
+
+ // Parses stuff that occurs after "\color", e.g. " {red}", and checks
+ // that the colour is legal. Returns the colour name, e.g. "red".
+ std::wstring ParseColourName();
+};
+
+}
+
+#endif
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/BlahtexCore/XmlEncode.cpp b/blahtexml/source/BlahtexCore/XmlEncode.cpp
new file mode 100644
index 0000000..5a2beff
--- /dev/null
+++ b/blahtexml/source/BlahtexCore/XmlEncode.cpp
@@ -0,0 +1,694 @@
+// File "XmlEncode.cpp"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include <iomanip>
+#include <sstream>
+#include <map>
+#include "XmlEncode.h"
+
+using namespace std;
+
+namespace blahtex
+{
+
+struct UnicodeNameInfo
+{
+ wstring mShortName;
+ wstring mLongName;
+
+ UnicodeNameInfo()
+ { }
+
+ UnicodeNameInfo(
+ const wstring& shortName
+ ) :
+ mShortName(shortName)
+ { }
+
+ UnicodeNameInfo(
+ const wstring& shortName,
+ const wstring& longName
+ ) :
+ mShortName(shortName),
+ mLongName(longName)
+ { }
+};
+
+// This table lists all the non-ASCII characters that blahtex can give
+// names to. For each one it possibly lists a short and long MathML name.
+pair<wchar_t, UnicodeNameInfo> gUnicodeNameArray[] =
+{
+ make_pair(L'\U00000060', UnicodeNameInfo(L"grave", L"DiacriticalGrave")),
+ make_pair(L'\U000000A0', UnicodeNameInfo(L"nbsp", L"NonBreakingSpace")),
+ make_pair(L'\U000000A5', UnicodeNameInfo(L"yen")),
+ make_pair(L'\U000000A7', UnicodeNameInfo(L"sect")),
+ make_pair(L'\U000000AC', UnicodeNameInfo(L"not")),
+ make_pair(L'\U000000AE', UnicodeNameInfo(L"reg", L"circledR")),
+ make_pair(L'\U000000AF', UnicodeNameInfo(L"macr", L"OverBar")),
+ make_pair(L'\U000000B1', UnicodeNameInfo(L"pm", L"PlusMinus")),
+ make_pair(L'\U000000B4', UnicodeNameInfo(L"acute", L"DiacriticalAcute")),
+ make_pair(L'\U000000B6', UnicodeNameInfo(L"para")),
+ make_pair(L'\U000000B7', UnicodeNameInfo(L"middot", L"CenterDot")),
+ make_pair(L'\U000000D7', UnicodeNameInfo(L"times")),
+ make_pair(L'\U000000D8', UnicodeNameInfo(L"Oslash")),
+ make_pair(L'\U000000F0', UnicodeNameInfo(L"eth")),
+ make_pair(L'\U000000F7', UnicodeNameInfo(L"div", L"divide")),
+ make_pair(L'\U00000127', UnicodeNameInfo(L"hstrok")),
+ make_pair(L'\U00000131', UnicodeNameInfo(L"imath")),
+ make_pair(L'\U000002C7', UnicodeNameInfo(L"caron", L"Hacek")),
+ make_pair(L'\U000002D8', UnicodeNameInfo(L"breve", L"Breve")),
+ make_pair(L'\U000002DC', UnicodeNameInfo(L"tilde", L"DiacriticalTilde")),
+ make_pair(L'\U00000338', UnicodeNameInfo()), // FIX: combining character that needs some thought
+ make_pair(L'\U00000393', UnicodeNameInfo(L"Gamma")),
+ make_pair(L'\U00000394', UnicodeNameInfo(L"Delta")),
+ make_pair(L'\U00000398', UnicodeNameInfo(L"Theta")),
+ make_pair(L'\U0000039B', UnicodeNameInfo(L"Lambda")),
+ make_pair(L'\U0000039E', UnicodeNameInfo(L"Xi")),
+ make_pair(L'\U000003A0', UnicodeNameInfo(L"Pi")),
+ make_pair(L'\U000003A3', UnicodeNameInfo(L"Sigma")),
+ make_pair(L'\U000003A5', UnicodeNameInfo(L"Upsilon")),
+ make_pair(L'\U000003A6', UnicodeNameInfo(L"Phi")),
+ make_pair(L'\U000003A8', UnicodeNameInfo(L"Psi")),
+ make_pair(L'\U000003A9', UnicodeNameInfo(L"Omega")),
+ make_pair(L'\U000003B1', UnicodeNameInfo(L"alpha")),
+ make_pair(L'\U000003B2', UnicodeNameInfo(L"beta")),
+ make_pair(L'\U000003B3', UnicodeNameInfo(L"gamma")),
+ make_pair(L'\U000003B4', UnicodeNameInfo(L"delta")),
+ make_pair(L'\U000003B5', UnicodeNameInfo(L"epsiv", L"varepsilon")),
+ make_pair(L'\U000003B6', UnicodeNameInfo(L"zeta")),
+ make_pair(L'\U000003B7', UnicodeNameInfo(L"eta")),
+ make_pair(L'\U000003B8', UnicodeNameInfo(L"theta")),
+ make_pair(L'\U000003B9', UnicodeNameInfo(L"iota")),
+ make_pair(L'\U000003BA', UnicodeNameInfo(L"kappa")),
+ make_pair(L'\U000003BB', UnicodeNameInfo(L"lambda")),
+ make_pair(L'\U000003BC', UnicodeNameInfo(L"mu")),
+ make_pair(L'\U000003BD', UnicodeNameInfo(L"nu")),
+ make_pair(L'\U000003BE', UnicodeNameInfo(L"xi")),
+ make_pair(L'\U000003C0', UnicodeNameInfo(L"pi")),
+ make_pair(L'\U000003C1', UnicodeNameInfo(L"rho")),
+ make_pair(L'\U000003C2', UnicodeNameInfo(L"sigmav", L"varsigma")),
+ make_pair(L'\U000003C3', UnicodeNameInfo(L"sigma")),
+ make_pair(L'\U000003C4', UnicodeNameInfo(L"tau")),
+ make_pair(L'\U000003C5', UnicodeNameInfo(L"upsi", L"upsilon")),
+#if 0
+ // FIX: note Firefox 1.5 has &phi; and &varphi; around the wrong
+ // way, so better to stick with numeric codes for 0x3C6 and 0x3D5.
+ // See mozilla bug 321438.
+ make_pair(L'\U000003C6', UnicodeNameInfo(L"phiv", L"varphi")),
+ make_pair(L'\U000003D5', UnicodeNameInfo(L"phi", L"straightphi")),
+#endif
+ make_pair(L'\U000003C7', UnicodeNameInfo(L"chi")),
+ make_pair(L'\U000003C8', UnicodeNameInfo(L"psi")),
+ make_pair(L'\U000003C9', UnicodeNameInfo(L"omega")),
+ make_pair(L'\U000003D1', UnicodeNameInfo(L"thetav", L"vartheta")),
+ make_pair(L'\U000003D6', UnicodeNameInfo(L"piv", L"varpi")),
+ make_pair(L'\U000003DD', UnicodeNameInfo(L"gammad", L"digamma")),
+ make_pair(L'\U000003F0', UnicodeNameInfo(L"kappav", L"varkappa")),
+ make_pair(L'\U000003F1', UnicodeNameInfo(L"rhov", L"varrho")),
+ make_pair(L'\U000003F5', UnicodeNameInfo(L"epsi", L"straightepsilon")),
+ make_pair(L'\U000003F6', UnicodeNameInfo(L"bepsi", L"backepsilon")),
+ make_pair(L'\U00002020', UnicodeNameInfo(L"dagger")),
+ make_pair(L'\U00002021', UnicodeNameInfo(L"Dagger", L"ddagger")),
+ make_pair(L'\U00002022', UnicodeNameInfo(L"bull", L"bullet")),
+ make_pair(L'\U00002026', UnicodeNameInfo(L"hellip")),
+ make_pair(L'\U00002032', UnicodeNameInfo(L"prime")),
+ make_pair(L'\U00002035', UnicodeNameInfo(L"bprime", L"backprime")),
+ make_pair(L'\U00002102', UnicodeNameInfo(L"Copf", L"complexes")),
+ make_pair(L'\U0000210B', UnicodeNameInfo(L"Hscr", L"HilbertSpace")),
+ make_pair(L'\U0000210C', UnicodeNameInfo(L"Hfr", L"Poincareplane")),
+ make_pair(L'\U0000210D', UnicodeNameInfo(L"Hopf", L"quaternions")),
+ make_pair(L'\U0000210F', UnicodeNameInfo(L"hbar", L"planck")),
+ make_pair(L'\U00002110', UnicodeNameInfo(L"Iscr", L"imagline")),
+ make_pair(L'\U00002111', UnicodeNameInfo(L"Im", L"imagpart")),
+ make_pair(L'\U00002112', UnicodeNameInfo(L"Lscr", L"Laplacetrf")),
+ make_pair(L'\U00002113', UnicodeNameInfo(L"ell")),
+ make_pair(L'\U00002118', UnicodeNameInfo(L"wp", L"weierp")),
+ make_pair(L'\U00002119', UnicodeNameInfo(L"Popf", L"primes")),
+ make_pair(L'\U0000211A', UnicodeNameInfo(L"Qopf", L"rationals")),
+ make_pair(L'\U0000211B', UnicodeNameInfo(L"Rscr", L"realine")),
+ make_pair(L'\U0000211C', UnicodeNameInfo(L"Re", L"realpart")),
+ make_pair(L'\U0000211D', UnicodeNameInfo(L"Ropf", L"reals")),
+ make_pair(L'\U00002124', UnicodeNameInfo(L"Zopf", L"integers")),
+ make_pair(L'\U00002127', UnicodeNameInfo(L"mho")),
+ make_pair(L'\U00002128', UnicodeNameInfo(L"Zfr", L"zeetrf")),
+ make_pair(L'\U0000212C', UnicodeNameInfo(L"Bscr", L"Bernoullis")),
+ make_pair(L'\U0000212D', UnicodeNameInfo(L"Cfr", L"Cayleys")),
+ make_pair(L'\U00002130', UnicodeNameInfo(L"Escr", L"expectation")),
+ make_pair(L'\U00002131', UnicodeNameInfo(L"Fscr", L"Fouriertrf")),
+ make_pair(L'\U00002133', UnicodeNameInfo(L"Mscr", L"Mellintrf")),
+ make_pair(L'\U00002135', UnicodeNameInfo(L"aleph")),
+ make_pair(L'\U00002136', UnicodeNameInfo(L"beth")),
+ make_pair(L'\U00002137', UnicodeNameInfo(L"gimel")),
+ make_pair(L'\U00002138', UnicodeNameInfo(L"daleth")),
+ make_pair(L'\U00002190', UnicodeNameInfo(L"larr", L"LeftArrow")),
+ make_pair(L'\U00002191', UnicodeNameInfo(L"uarr", L"UpArrow")),
+ make_pair(L'\U00002192', UnicodeNameInfo(L"rarr", L"RightArrow")),
+ make_pair(L'\U00002193', UnicodeNameInfo(L"darr", L"DownArrow")),
+ make_pair(L'\U00002194', UnicodeNameInfo(L"harr", L"LeftRightArrow")),
+ make_pair(L'\U00002195', UnicodeNameInfo(L"varr", L"UpDownArrow")),
+ make_pair(L'\U00002196', UnicodeNameInfo(L"nwarr", L"UpperLeftArrow")),
+ make_pair(L'\U00002197', UnicodeNameInfo(L"nearr", L"UpperRightArrow")),
+ make_pair(L'\U00002198', UnicodeNameInfo(L"searr", L"LowerRightArrow")),
+ make_pair(L'\U00002199', UnicodeNameInfo(L"swarr", L"LowerLeftArrow")),
+ make_pair(L'\U0000219A', UnicodeNameInfo(L"nlarr", L"nleftarrow")),
+ make_pair(L'\U0000219B', UnicodeNameInfo(L"nrarr", L"nrightarrow")),
+ make_pair(L'\U0000219D', UnicodeNameInfo(L"rarrw", L"rightsquigarrow")),
+ make_pair(L'\U0000219E', UnicodeNameInfo(L"Larr", L"twoheadleftarrow")),
+ make_pair(L'\U000021A0', UnicodeNameInfo(L"Rarr", L"twoheadrightarrow")),
+ make_pair(L'\U000021A2', UnicodeNameInfo(L"larrtl", L"leftarrowtail")),
+ make_pair(L'\U000021A3', UnicodeNameInfo(L"rarrtl", L"rightarrowtail")),
+ make_pair(L'\U000021A6', UnicodeNameInfo(L"map", L"RightTeeArrow")),
+ make_pair(L'\U000021A9', UnicodeNameInfo(L"larrhk", L"hookleftarrow")),
+ make_pair(L'\U000021AA', UnicodeNameInfo(L"rarrhk", L"hookrightarrow")),
+ make_pair(L'\U000021AB', UnicodeNameInfo(L"larrlp", L"looparrowleft")),
+ make_pair(L'\U000021AC', UnicodeNameInfo(L"rarrlp", L"looparrowright")),
+ make_pair(L'\U000021AD', UnicodeNameInfo(L"harrw", L"leftrightsquigarrow")),
+ make_pair(L'\U000021AE', UnicodeNameInfo(L"nharr", L"nleftrightarrow")),
+ make_pair(L'\U000021B0', UnicodeNameInfo(L"lsh", L"Lsh")),
+ make_pair(L'\U000021B1', UnicodeNameInfo(L"rsh", L"Rsh")),
+ make_pair(L'\U000021B6', UnicodeNameInfo(L"cularr", L"curvearrowleft")),
+ make_pair(L'\U000021B7', UnicodeNameInfo(L"curarr", L"curvearrowright")),
+ make_pair(L'\U000021BA', UnicodeNameInfo(L"olarr", L"circlearrowleft")),
+ make_pair(L'\U000021BB', UnicodeNameInfo(L"orarr", L"circlearrowright")),
+ make_pair(L'\U000021BC', UnicodeNameInfo(L"lharu", L"leftharpoonup")),
+ make_pair(L'\U000021BD', UnicodeNameInfo(L"lhard", L"leftharpoondown")),
+ make_pair(L'\U000021BE', UnicodeNameInfo(L"uharr", L"upharpoonright")),
+ make_pair(L'\U000021BF', UnicodeNameInfo(L"uharl", L"upharpoonleft")),
+ make_pair(L'\U000021C0', UnicodeNameInfo(L"rharu", L"rightharpoonup")),
+ make_pair(L'\U000021C1', UnicodeNameInfo(L"rhard", L"rightharpoondown")),
+ make_pair(L'\U000021C2', UnicodeNameInfo(L"dharr", L"downharpoonright")),
+ make_pair(L'\U000021C3', UnicodeNameInfo(L"dharl", L"downharpoonleft")),
+ make_pair(L'\U000021C4', UnicodeNameInfo(L"rlarr", L"RightArrowLeftArrow")),
+ make_pair(L'\U000021C6', UnicodeNameInfo(L"lrarr", L"LeftArrowRightArrow")),
+ make_pair(L'\U000021C7', UnicodeNameInfo(L"llarr", L"leftleftarrows")),
+ make_pair(L'\U000021C8', UnicodeNameInfo(L"uuarr", L"upuparrows")),
+ make_pair(L'\U000021C9', UnicodeNameInfo(L"rrarr", L"rightrightarrows")),
+ make_pair(L'\U000021CA', UnicodeNameInfo(L"ddarr", L"downdownarrows")),
+ make_pair(L'\U000021CB', UnicodeNameInfo(L"lrhar", L"ReverseEquilibrium")),
+ make_pair(L'\U000021CC', UnicodeNameInfo(L"rlhar", L"Equilibrium")),
+ make_pair(L'\U000021CD', UnicodeNameInfo(L"nlArr", L"nLeftarrow")),
+ make_pair(L'\U000021CE', UnicodeNameInfo(L"nhArr", L"nLeftrightarrow")),
+ make_pair(L'\U000021CF', UnicodeNameInfo(L"nrArr", L"nRightarrow")),
+ make_pair(L'\U000021D0', UnicodeNameInfo(L"lArr", L"DoubleLeftArrow")),
+ make_pair(L'\U000021D1', UnicodeNameInfo(L"uArr", L"DoubleUpArrow")),
+ make_pair(L'\U000021D2', UnicodeNameInfo(L"rArr", L"DoubleRightArrow")),
+ make_pair(L'\U000021D3', UnicodeNameInfo(L"dArr", L"DoubleDownArrow")),
+ make_pair(L'\U000021D4', UnicodeNameInfo(L"hArr", L"DoubleLeftRightArrow")),
+ make_pair(L'\U000021D5', UnicodeNameInfo(L"vArr", L"DoubleUpDownArrow")),
+ make_pair(L'\U000021DA', UnicodeNameInfo(L"lAarr", L"Lleftarrow")),
+ make_pair(L'\U000021DB', UnicodeNameInfo(L"rAarr", L"Rrightarrow")),
+ make_pair(L'\U000021DD', UnicodeNameInfo(L"zigrarr")),
+ make_pair(L'\U00002200', UnicodeNameInfo(L"forall", L"ForAll")),
+ make_pair(L'\U00002201', UnicodeNameInfo(L"comp", L"complement")),
+ make_pair(L'\U00002202', UnicodeNameInfo(L"part", L"PartialD")),
+ make_pair(L'\U00002203', UnicodeNameInfo(L"exist", L"Exists")),
+ make_pair(L'\U00002204', UnicodeNameInfo(L"nexist", L"NotExists")),
+ make_pair(L'\U00002205', UnicodeNameInfo(L"empty", L"emptyset")),
+ make_pair(L'\U00002207', UnicodeNameInfo(L"nabla", L"Del")),
+ make_pair(L'\U00002208', UnicodeNameInfo(L"in", L"Element")),
+ make_pair(L'\U00002209', UnicodeNameInfo(L"notin", L"NotElement")),
+ make_pair(L'\U0000220B', UnicodeNameInfo(L"ni", L"ReverseElement")),
+ make_pair(L'\U0000220C', UnicodeNameInfo(L"notni", L"NotReverseElement")),
+ make_pair(L'\U0000220F', UnicodeNameInfo(L"prod", L"Product")),
+ make_pair(L'\U00002210', UnicodeNameInfo(L"coprod", L"Coproduct")),
+ make_pair(L'\U00002211', UnicodeNameInfo(L"sum", L"Sum")),
+ make_pair(L'\U00002213', UnicodeNameInfo(L"mp", L"MinusPlus")),
+ make_pair(L'\U00002214', UnicodeNameInfo(L"dotplus")),
+ make_pair(L'\U00002216', UnicodeNameInfo(L"setmn", L"Backslash")),
+ make_pair(L'\U00002218', UnicodeNameInfo(L"compfn", L"SmallCircle")),
+ make_pair(L'\U0000221A', UnicodeNameInfo(L"radic", L"Sqrt")),
+ make_pair(L'\U0000221D', UnicodeNameInfo(L"prop", L"Proportional")),
+ make_pair(L'\U0000221E', UnicodeNameInfo(L"infin")),
+ make_pair(L'\U00002220', UnicodeNameInfo(L"ang", L"angle")),
+ make_pair(L'\U00002221', UnicodeNameInfo(L"angmsd", L"measuredangle")),
+ make_pair(L'\U00002222', UnicodeNameInfo(L"angsph")),
+ make_pair(L'\U00002223', UnicodeNameInfo(L"mid", L"VerticalBar")),
+ make_pair(L'\U00002224', UnicodeNameInfo(L"nmid", L"NotVerticalBar")),
+ make_pair(L'\U00002225', UnicodeNameInfo(L"par", L"DoubleVerticalBar")),
+ make_pair(L'\U00002226', UnicodeNameInfo(L"npar", L"NotDoubleVerticalBar")),
+ make_pair(L'\U00002227', UnicodeNameInfo(L"and", L"wedge")),
+ make_pair(L'\U00002228', UnicodeNameInfo(L"or", L"vee")),
+ make_pair(L'\U00002229', UnicodeNameInfo(L"cap")),
+ make_pair(L'\U0000222A', UnicodeNameInfo(L"cup")),
+ make_pair(L'\U0000222B', UnicodeNameInfo(L"int", L"Integral")),
+ make_pair(L'\U0000222C', UnicodeNameInfo(L"Int")),
+ make_pair(L'\U0000222D', UnicodeNameInfo(L"tint", L"iiint")),
+ make_pair(L'\U0000222E', UnicodeNameInfo(L"conint", L"ContourIntegral")),
+ make_pair(L'\U00002234', UnicodeNameInfo(L"there4", L"Therefore")),
+ make_pair(L'\U00002235', UnicodeNameInfo(L"becaus", L"Because")),
+ make_pair(L'\U0000223C', UnicodeNameInfo(L"sim", L"Tilde")),
+ make_pair(L'\U0000223D', UnicodeNameInfo(L"bsim", L"backsim")),
+ make_pair(L'\U00002240', UnicodeNameInfo(L"wr", L"VerticalTilde")),
+ make_pair(L'\U00002241', UnicodeNameInfo(L"nsim", L"NotTilde")),
+ make_pair(L'\U00002242', UnicodeNameInfo(L"esim", L"EqualTilde")),
+ make_pair(L'\U00002243', UnicodeNameInfo(L"sime", L"TildeEqual")),
+ make_pair(L'\U00002244', UnicodeNameInfo(L"nsime", L"NotTildeEqual")),
+ make_pair(L'\U00002245', UnicodeNameInfo(L"cong", L"TildeFullEqual")),
+ make_pair(L'\U00002247', UnicodeNameInfo(L"ncong", L"NotTildeFullEqual")),
+ make_pair(L'\U00002248', UnicodeNameInfo(L"ap", L"TildeTilde")),
+ make_pair(L'\U00002249', UnicodeNameInfo(L"nap", L"NotTildeTilde")),
+ make_pair(L'\U0000224A', UnicodeNameInfo(L"ape", L"approxeq")),
+ make_pair(L'\U0000224E', UnicodeNameInfo(L"bump", L"HumpDownHump")),
+ make_pair(L'\U0000224F', UnicodeNameInfo(L"nbump", L"NotHumpDownHump")),
+ make_pair(L'\U00002250', UnicodeNameInfo(L"esdot", L"DotEqual")),
+ make_pair(L'\U00002251', UnicodeNameInfo(L"eDot", L"doteqdot")),
+ make_pair(L'\U00002252', UnicodeNameInfo(L"efDot", L"fallingdotseq")),
+ make_pair(L'\U00002253', UnicodeNameInfo(L"erDot", L"risingdotseq")),
+ make_pair(L'\U00002256', UnicodeNameInfo(L"ecir", L"eqcirc")),
+ make_pair(L'\U00002257', UnicodeNameInfo(L"cire", L"circeq")),
+ make_pair(L'\U0000225C', UnicodeNameInfo(L"trie", L"triangleq")),
+ make_pair(L'\U00002260', UnicodeNameInfo(L"ne", L"NotEqual")),
+ make_pair(L'\U00002261', UnicodeNameInfo(L"equiv", L"Congruent")),
+ make_pair(L'\U00002262', UnicodeNameInfo(L"nequiv", L"NotCongruent")),
+ make_pair(L'\U00002264', UnicodeNameInfo(L"le", L"leq")),
+ make_pair(L'\U00002265', UnicodeNameInfo(L"ge", L"GreaterEqual")),
+ make_pair(L'\U00002266', UnicodeNameInfo(L"lE", L"LessFullEqual")),
+ make_pair(L'\U00002267', UnicodeNameInfo(L"gE", L"GreaterFullEqual")),
+ make_pair(L'\U00002268', UnicodeNameInfo(L"lnE", L"lneqq")),
+ make_pair(L'\U00002269', UnicodeNameInfo(L"gnE", L"gneqq")),
+ make_pair(L'\U0000226A', UnicodeNameInfo(L"Lt", L"NestedLessLess")),
+ make_pair(L'\U0000226B', UnicodeNameInfo(L"Gt", L"NestedGreaterGreater")),
+ make_pair(L'\U0000226C', UnicodeNameInfo(L"twixt", L"between")),
+ make_pair(L'\U0000226E', UnicodeNameInfo(L"nlt", L"NotLess")),
+ make_pair(L'\U0000226F', UnicodeNameInfo(L"ngt", L"NotGreater")),
+ make_pair(L'\U00002270', UnicodeNameInfo(L"nle", L"NotLessEqual")),
+ make_pair(L'\U00002271', UnicodeNameInfo(L"nge", L"NotGreaterEqual")),
+ make_pair(L'\U00002272', UnicodeNameInfo(L"lsim", L"LessTilde")),
+ make_pair(L'\U00002273', UnicodeNameInfo(L"gsim", L"GreaterTilde")),
+ make_pair(L'\U00002276', UnicodeNameInfo(L"lg", L"LessGreater")),
+ make_pair(L'\U00002277', UnicodeNameInfo(L"gl", L"GreaterLess")),
+ make_pair(L'\U0000227A', UnicodeNameInfo(L"pr", L"Precedes")),
+ make_pair(L'\U0000227B', UnicodeNameInfo(L"sc", L"Succeeds")),
+ make_pair(L'\U0000227C', UnicodeNameInfo(L"prcue", L"PrecedesSlantEqual")),
+ make_pair(L'\U0000227D', UnicodeNameInfo(L"sccue", L"SucceedsSlantEqual")),
+ make_pair(L'\U0000227E', UnicodeNameInfo(L"prsim", L"PrecedesTilde")),
+ make_pair(L'\U0000227F', UnicodeNameInfo(L"scsim", L"SucceedsTilde")),
+ make_pair(L'\U00002280', UnicodeNameInfo(L"npr", L"NotPrecedes")),
+ make_pair(L'\U00002281', UnicodeNameInfo(L"nsc", L"NotSucceeds")),
+ make_pair(L'\U00002282', UnicodeNameInfo(L"sub", L"subset")),
+ make_pair(L'\U00002283', UnicodeNameInfo(L"sup", L"supset")),
+ make_pair(L'\U00002284', UnicodeNameInfo(L"nsub")),
+ make_pair(L'\U00002285', UnicodeNameInfo(L"nsup")),
+ make_pair(L'\U00002286', UnicodeNameInfo(L"sube", L"SubsetEqual")),
+ make_pair(L'\U00002287', UnicodeNameInfo(L"supe", L"SupersetEqual")),
+ make_pair(L'\U00002288', UnicodeNameInfo(L"nsube", L"NotSubsetEqual")),
+ make_pair(L'\U00002289', UnicodeNameInfo(L"nsupe", L"NotSupersetEqual")),
+ make_pair(L'\U0000228A', UnicodeNameInfo(L"subne", L"subsetneq")),
+ make_pair(L'\U0000228B', UnicodeNameInfo(L"supne", L"supsetneq")),
+ make_pair(L'\U0000228E', UnicodeNameInfo(L"uplus", L"UnionPlus")),
+ make_pair(L'\U0000228F', UnicodeNameInfo(L"sqsub", L"SquareSubset")),
+ make_pair(L'\U00002290', UnicodeNameInfo(L"sqsup", L"SquareSuperset")),
+ make_pair(L'\U00002291', UnicodeNameInfo(L"sqsube", L"SquareSubsetEqual")),
+ make_pair(L'\U00002292', UnicodeNameInfo(L"sqsupe", L"SquareSupersetEqual")),
+ make_pair(L'\U00002293', UnicodeNameInfo(L"sqcap", L"SquareIntersection")),
+ make_pair(L'\U00002294', UnicodeNameInfo(L"sqcup", L"SquareUnion")),
+ make_pair(L'\U00002295', UnicodeNameInfo(L"oplus", L"CirclePlus")),
+ make_pair(L'\U00002296', UnicodeNameInfo(L"ominus", L"CircleMinus")),
+ make_pair(L'\U00002297', UnicodeNameInfo(L"otimes", L"CircleTimes")),
+ make_pair(L'\U00002298', UnicodeNameInfo(L"osol")),
+ make_pair(L'\U00002299', UnicodeNameInfo(L"odot", L"CircleDot")),
+ make_pair(L'\U0000229A', UnicodeNameInfo(L"ocir", L"circledcirc")),
+ make_pair(L'\U0000229B', UnicodeNameInfo(L"oast", L"circledast")),
+ make_pair(L'\U0000229D', UnicodeNameInfo(L"odash", L"circleddash")),
+ make_pair(L'\U0000229E', UnicodeNameInfo(L"plusb", L"boxplus")),
+ make_pair(L'\U0000229F', UnicodeNameInfo(L"minusb", L"boxminus")),
+ make_pair(L'\U000022A0', UnicodeNameInfo(L"timesb", L"boxtimes")),
+ make_pair(L'\U000022A1', UnicodeNameInfo(L"sdotb", L"dotsquare")),
+ make_pair(L'\U000022A2', UnicodeNameInfo(L"vdash", L"RightTee")),
+ make_pair(L'\U000022A3', UnicodeNameInfo(L"dashv", L"LeftTee")),
+ make_pair(L'\U000022A4', UnicodeNameInfo(L"top", L"DownTee")),
+ make_pair(L'\U000022A5', UnicodeNameInfo(L"bot", L"UpTee")),
+ make_pair(L'\U000022A7', UnicodeNameInfo(L"models")),
+ make_pair(L'\U000022A8', UnicodeNameInfo(L"vDash", L"DoubleRightTee")),
+ make_pair(L'\U000022A9', UnicodeNameInfo(L"Vdash")),
+ make_pair(L'\U000022AA', UnicodeNameInfo(L"Vvdash")),
+ make_pair(L'\U000022AC', UnicodeNameInfo(L"nvdash")),
+ make_pair(L'\U000022AD', UnicodeNameInfo(L"nvDash")),
+ make_pair(L'\U000022AE', UnicodeNameInfo(L"nVdash")),
+ make_pair(L'\U000022AF', UnicodeNameInfo(L"nVDash")),
+ make_pair(L'\U000022B2', UnicodeNameInfo(L"vltri", L"LeftTriangle")),
+ make_pair(L'\U000022B3', UnicodeNameInfo(L"vrtri", L"RightTriangle")),
+ make_pair(L'\U000022B4', UnicodeNameInfo(L"ltrie", L"LeftTriangleEqual")),
+ make_pair(L'\U000022B5', UnicodeNameInfo(L"rtrie", L"RightTriangleEqual")),
+ make_pair(L'\U000022B8', UnicodeNameInfo(L"mumap", L"multimap")),
+ make_pair(L'\U000022BA', UnicodeNameInfo(L"intcal", L"intercal")),
+ make_pair(L'\U000022BB', UnicodeNameInfo(L"veebar")),
+ make_pair(L'\U000022C0', UnicodeNameInfo(L"xwedge", L"Wedge")),
+ make_pair(L'\U000022C1', UnicodeNameInfo(L"xvee", L"Vee")),
+ make_pair(L'\U000022C2', UnicodeNameInfo(L"xcap", L"Intersection")),
+ make_pair(L'\U000022C3', UnicodeNameInfo(L"xcup", L"Union")),
+ make_pair(L'\U000022C4', UnicodeNameInfo(L"diam", L"Diamond")),
+ make_pair(L'\U000022C5', UnicodeNameInfo(L"sdot")),
+ make_pair(L'\U000022C6', UnicodeNameInfo(L"Star")),
+ make_pair(L'\U000022C7', UnicodeNameInfo(L"divonx", L"divideontimes")),
+ make_pair(L'\U000022C8', UnicodeNameInfo(L"bowtie")),
+ make_pair(L'\U000022C9', UnicodeNameInfo(L"ltimes")),
+ make_pair(L'\U000022CA', UnicodeNameInfo(L"rtimes")),
+ make_pair(L'\U000022CB', UnicodeNameInfo(L"lthree", L"leftthreetimes")),
+ make_pair(L'\U000022CC', UnicodeNameInfo(L"rthree", L"rightthreetimes")),
+ make_pair(L'\U000022CD', UnicodeNameInfo(L"bsime", L"backsimeq")),
+ make_pair(L'\U000022CE', UnicodeNameInfo(L"cuvee", L"curlyvee")),
+ make_pair(L'\U000022CF', UnicodeNameInfo(L"cuwed", L"curlywedge")),
+ make_pair(L'\U000022D0', UnicodeNameInfo(L"Sub", L"Subset")),
+ make_pair(L'\U000022D1', UnicodeNameInfo(L"Sup", L"Supset")),
+ make_pair(L'\U000022D2', UnicodeNameInfo(L"Cap")),
+ make_pair(L'\U000022D3', UnicodeNameInfo(L"Cup")),
+ make_pair(L'\U000022D4', UnicodeNameInfo(L"fork", L"pitchfork")),
+ make_pair(L'\U000022D6', UnicodeNameInfo(L"ltdot", L"lessdot")),
+ make_pair(L'\U000022D7', UnicodeNameInfo(L"gtdot", L"gtrdot")),
+ make_pair(L'\U000022D8', UnicodeNameInfo(L"Ll")),
+ make_pair(L'\U000022D9', UnicodeNameInfo(L"Gg")),
+ make_pair(L'\U000022DA', UnicodeNameInfo(L"leg", L"LessEqualGreater")),
+ make_pair(L'\U000022DB', UnicodeNameInfo(L"gel", L"GreaterEqualLess")),
+ make_pair(L'\U000022DE', UnicodeNameInfo(L"cuepr", L"curlyeqprec")),
+ make_pair(L'\U000022DF', UnicodeNameInfo(L"cuesc", L"curlyeqsucc")),
+ make_pair(L'\U000022E2', UnicodeNameInfo(L"nsqsube", L"NotSquareSubsetEqual")),
+ make_pair(L'\U000022E3', UnicodeNameInfo(L"nsqsupe", L"NotSquareSupersetEqual")),
+ make_pair(L'\U000022E6', UnicodeNameInfo(L"lnsim")),
+ make_pair(L'\U000022E7', UnicodeNameInfo(L"gnsim")),
+ make_pair(L'\U000022E8', UnicodeNameInfo(L"prnsim", L"precnsim")),
+ make_pair(L'\U000022E9', UnicodeNameInfo(L"scnsim", L"succnsim")),
+ make_pair(L'\U000022EA', UnicodeNameInfo(L"nltri", L"NotLeftTriangle")),
+ make_pair(L'\U000022EB', UnicodeNameInfo(L"nrtri", L"NotRightTriangle")),
+ make_pair(L'\U000022EC', UnicodeNameInfo(L"nltrie", L"NotLeftTriangleEqual")),
+ make_pair(L'\U000022ED', UnicodeNameInfo(L"nrtrie", L"NotRightTriangleEqual")),
+ make_pair(L'\U000022EE', UnicodeNameInfo(L"vellip")),
+ make_pair(L'\U000022EF', UnicodeNameInfo(L"ctdot")),
+ make_pair(L'\U000022F1', UnicodeNameInfo(L"dtdot")),
+ make_pair(L'\U00002305', UnicodeNameInfo(L"barwed", L"barwedge")),
+ make_pair(L'\U00002306', UnicodeNameInfo(L"Barwed", L"doublebarwedge")),
+ make_pair(L'\U00002308', UnicodeNameInfo(L"lceil", L"LeftCeiling")),
+ make_pair(L'\U00002309', UnicodeNameInfo(L"rceil", L"RightCeiling")),
+ make_pair(L'\U0000230A', UnicodeNameInfo(L"lfloor", L"LeftFloor")),
+ make_pair(L'\U0000230B', UnicodeNameInfo(L"rfloor", L"RightFloor")),
+ make_pair(L'\U0000231C', UnicodeNameInfo(L"ulcorn", L"ulcorner")),
+ make_pair(L'\U0000231D', UnicodeNameInfo(L"urcorn", L"urcorner")),
+ make_pair(L'\U0000231E', UnicodeNameInfo(L"dlcorn", L"llcorner")),
+ make_pair(L'\U0000231F', UnicodeNameInfo(L"drcorn", L"lrcorner")),
+ make_pair(L'\U00002322', UnicodeNameInfo(L"frown", L"sfrown")),
+ make_pair(L'\U00002323', UnicodeNameInfo(L"smile", L"ssmile")),
+ make_pair(L'\U00002329', UnicodeNameInfo(L"lang", L"LeftAngleBracket")),
+ make_pair(L'\U0000232A', UnicodeNameInfo(L"rang", L"RightAngleBracket")),
+ make_pair(L'\U000023B5', UnicodeNameInfo(L"bbrk", L"UnderBracket")),
+ make_pair(L'\U000024C8', UnicodeNameInfo(L"oS", L"circledS")),
+ make_pair(L'\U000025A1', UnicodeNameInfo(L"squ", L"Square")),
+ make_pair(L'\U000025B3', UnicodeNameInfo(L"xutri", L"bigtriangleup")),
+ make_pair(L'\U000025B4', UnicodeNameInfo(L"utrif", L"blacktriangle")),
+ make_pair(L'\U000025B5', UnicodeNameInfo(L"utri", L"triangle")),
+ make_pair(L'\U000025B6', UnicodeNameInfo()),
+ make_pair(L'\U000025B9', UnicodeNameInfo(L"rtri", L"triangleright")),
+ make_pair(L'\U000025BD', UnicodeNameInfo(L"xdtri", L"bigtriangledown")),
+ make_pair(L'\U000025BE', UnicodeNameInfo(L"dtrif", L"blacktriangledown")),
+ make_pair(L'\U000025BF', UnicodeNameInfo(L"dtri", L"triangledown")),
+ make_pair(L'\U000025C0', UnicodeNameInfo()),
+ make_pair(L'\U000025C3', UnicodeNameInfo(L"ltri", L"triangleleft")),
+ make_pair(L'\U000025CA', UnicodeNameInfo(L"loz", L"lozenge")),
+ make_pair(L'\U000025EF', UnicodeNameInfo(L"xcirc", L"bigcirc")),
+ make_pair(L'\U000025FC', UnicodeNameInfo(L"FilledSmallSquare")),
+ make_pair(L'\U00002605', UnicodeNameInfo(L"starf", L"bigstar")),
+ make_pair(L'\U00002660', UnicodeNameInfo(L"spades", L"spadesuit")),
+ make_pair(L'\U00002663', UnicodeNameInfo(L"clubs", L"clubsuit")),
+ make_pair(L'\U00002665', UnicodeNameInfo(L"hearts", L"heartsuit")),
+ make_pair(L'\U00002666', UnicodeNameInfo(L"diams", L"diamondsuit")),
+ make_pair(L'\U0000266D', UnicodeNameInfo(L"flat")),
+ make_pair(L'\U0000266E', UnicodeNameInfo(L"natur", L"natural")),
+ make_pair(L'\U0000266F', UnicodeNameInfo(L"sharp")),
+ make_pair(L'\U00002713', UnicodeNameInfo(L"check", L"checkmark")),
+ make_pair(L'\U00002720', UnicodeNameInfo(L"malt", L"maltese")),
+ make_pair(L'\U000027F5', UnicodeNameInfo(L"xlarr", L"LongLeftArrow")),
+ make_pair(L'\U000027F6', UnicodeNameInfo(L"xrarr", L"LongRightArrow")),
+ make_pair(L'\U000027F7', UnicodeNameInfo(L"xharr", L"LongLeftRightArrow")),
+ make_pair(L'\U000027F8', UnicodeNameInfo(L"xlArr", L"DoubleLongLeftArrow")),
+ make_pair(L'\U000027F9', UnicodeNameInfo(L"xrArr", L"DoubleLongRightArrow")),
+ make_pair(L'\U000027FA', UnicodeNameInfo(L"xhArr", L"DoubleLongLeftRightArrow")),
+ make_pair(L'\U000027FC', UnicodeNameInfo(L"xmap", L"longMapsto")),
+ make_pair(L'\U0000290E', UnicodeNameInfo(L"lBarr")),
+ make_pair(L'\U0000290F', UnicodeNameInfo(L"rBarr", L"dbkarow")),
+ make_pair(L'\U000029EB', UnicodeNameInfo(L"lozf", L"blacklozenge")),
+ make_pair(L'\U00002A00', UnicodeNameInfo(L"xodot", L"bigodot")),
+ make_pair(L'\U00002A01', UnicodeNameInfo(L"xoplus", L"bigoplus")),
+ make_pair(L'\U00002A02', UnicodeNameInfo(L"xotime", L"bigotimes")),
+ make_pair(L'\U00002A04', UnicodeNameInfo(L"xuplus", L"biguplus")),
+ make_pair(L'\U00002A06', UnicodeNameInfo(L"xsqcup", L"bigsqcup")),
+ make_pair(L'\U00002A0C', UnicodeNameInfo(L"qint", L"iiiint")),
+ make_pair(L'\U00002A2F', UnicodeNameInfo(L"Cross")),
+ make_pair(L'\U00002A3F', UnicodeNameInfo(L"amalg")),
+ make_pair(L'\U00002A7D', UnicodeNameInfo(L"les", L"LessSlantEqual")),
+ make_pair(L'\U00002A7E', UnicodeNameInfo(L"ges", L"GreaterSlantEqual")),
+ make_pair(L'\U00002A85', UnicodeNameInfo(L"lap", L"lessapprox")),
+ make_pair(L'\U00002A86', UnicodeNameInfo(L"gap", L"gtrapprox")),
+ make_pair(L'\U00002A89', UnicodeNameInfo(L"lnap", L"lnapprox")),
+ make_pair(L'\U00002A8A', UnicodeNameInfo(L"gnap", L"gnapprox")),
+ make_pair(L'\U00002A8B', UnicodeNameInfo(L"lEg", L"lesseqqgtr")),
+ make_pair(L'\U00002A8C', UnicodeNameInfo(L"gEl", L"gtreqqless")),
+ make_pair(L'\U00002A95', UnicodeNameInfo(L"els", L"eqslantless")),
+ make_pair(L'\U00002A96', UnicodeNameInfo(L"egs", L"eqslantgtr")),
+ make_pair(L'\U00002AAF', UnicodeNameInfo(L"pre", L"PrecedesEqual")),
+ make_pair(L'\U00002AB0', UnicodeNameInfo(L"sce", L"SucceedsEqual")),
+ make_pair(L'\U00002AB5', UnicodeNameInfo(L"prnE", L"precneqq")),
+ make_pair(L'\U00002AB6', UnicodeNameInfo(L"scnE", L"succneqq")),
+ make_pair(L'\U00002AB7', UnicodeNameInfo(L"prap", L"precapprox")),
+ make_pair(L'\U00002AB8', UnicodeNameInfo(L"scap", L"succapprox")),
+ make_pair(L'\U00002AB9', UnicodeNameInfo(L"prnap", L"precnapprox")),
+ make_pair(L'\U00002ABA', UnicodeNameInfo(L"scnap", L"succnapprox")),
+ make_pair(L'\U00002AC5', UnicodeNameInfo(L"subE", L"subseteqq")),
+ make_pair(L'\U00002AC6', UnicodeNameInfo(L"supE", L"supseteqq")),
+ make_pair(L'\U00002ACB', UnicodeNameInfo(L"subnE", L"subsetneqq")),
+ make_pair(L'\U00002ACC', UnicodeNameInfo(L"supnE", L"supsetneqq")),
+ make_pair(L'\U0000FE00', UnicodeNameInfo()), // FIX: think about this combining character...
+ make_pair(L'\U0000FE37', UnicodeNameInfo(L"OverBrace")),
+ make_pair(L'\U0000FE38', UnicodeNameInfo(L"UnderBrace")),
+ make_pair(L'\U0001D49C', UnicodeNameInfo(L"Ascr")),
+ make_pair(L'\U0001D49E', UnicodeNameInfo(L"Cscr")),
+ make_pair(L'\U0001D49F', UnicodeNameInfo(L"Dscr")),
+ make_pair(L'\U0001D4A2', UnicodeNameInfo(L"Gscr")),
+ make_pair(L'\U0001D4A5', UnicodeNameInfo(L"Jscr")),
+ make_pair(L'\U0001D4A6', UnicodeNameInfo(L"Kscr")),
+ make_pair(L'\U0001D4A9', UnicodeNameInfo(L"Nscr")),
+ make_pair(L'\U0001D4AA', UnicodeNameInfo(L"Oscr")),
+ make_pair(L'\U0001D4AB', UnicodeNameInfo(L"Pscr")),
+ make_pair(L'\U0001D4AC', UnicodeNameInfo(L"Qscr")),
+ make_pair(L'\U0001D4AE', UnicodeNameInfo(L"Sscr")),
+ make_pair(L'\U0001D4AF', UnicodeNameInfo(L"Tscr")),
+ make_pair(L'\U0001D4B0', UnicodeNameInfo(L"Uscr")),
+ make_pair(L'\U0001D4B1', UnicodeNameInfo(L"Vscr")),
+ make_pair(L'\U0001D4B2', UnicodeNameInfo(L"Wscr")),
+ make_pair(L'\U0001D4B3', UnicodeNameInfo(L"Xscr")),
+ make_pair(L'\U0001D4B4', UnicodeNameInfo(L"Yscr")),
+ make_pair(L'\U0001D4B5', UnicodeNameInfo(L"Zscr")),
+ make_pair(L'\U0001D4D0', UnicodeNameInfo()), // mathematical bold script capitals
+ make_pair(L'\U0001D4D1', UnicodeNameInfo()),
+ make_pair(L'\U0001D4D2', UnicodeNameInfo()),
+ make_pair(L'\U0001D4D3', UnicodeNameInfo()),
+ make_pair(L'\U0001D4D4', UnicodeNameInfo()),
+ make_pair(L'\U0001D4D5', UnicodeNameInfo()),
+ make_pair(L'\U0001D4D6', UnicodeNameInfo()),
+ make_pair(L'\U0001D4D7', UnicodeNameInfo()),
+ make_pair(L'\U0001D4D8', UnicodeNameInfo()),
+ make_pair(L'\U0001D4D9', UnicodeNameInfo()),
+ make_pair(L'\U0001D4DA', UnicodeNameInfo()),
+ make_pair(L'\U0001D4DB', UnicodeNameInfo()),
+ make_pair(L'\U0001D4DC', UnicodeNameInfo()),
+ make_pair(L'\U0001D4DD', UnicodeNameInfo()),
+ make_pair(L'\U0001D4DE', UnicodeNameInfo()),
+ make_pair(L'\U0001D4DF', UnicodeNameInfo()),
+ make_pair(L'\U0001D4E0', UnicodeNameInfo()),
+ make_pair(L'\U0001D4E1', UnicodeNameInfo()),
+ make_pair(L'\U0001D4E2', UnicodeNameInfo()),
+ make_pair(L'\U0001D4E3', UnicodeNameInfo()),
+ make_pair(L'\U0001D4E4', UnicodeNameInfo()),
+ make_pair(L'\U0001D4E5', UnicodeNameInfo()),
+ make_pair(L'\U0001D4E6', UnicodeNameInfo()),
+ make_pair(L'\U0001D4E7', UnicodeNameInfo()),
+ make_pair(L'\U0001D4E8', UnicodeNameInfo()),
+ make_pair(L'\U0001D4E9', UnicodeNameInfo()),
+ make_pair(L'\U0001D504', UnicodeNameInfo(L"Afr")),
+ make_pair(L'\U0001D505', UnicodeNameInfo(L"Bfr")),
+ make_pair(L'\U0001D507', UnicodeNameInfo(L"Dfr")),
+ make_pair(L'\U0001D508', UnicodeNameInfo(L"Efr")),
+ make_pair(L'\U0001D509', UnicodeNameInfo(L"Ffr")),
+ make_pair(L'\U0001D50A', UnicodeNameInfo(L"Gfr")),
+ make_pair(L'\U0001D50D', UnicodeNameInfo(L"Jfr")),
+ make_pair(L'\U0001D50E', UnicodeNameInfo(L"Kfr")),
+ make_pair(L'\U0001D50F', UnicodeNameInfo(L"Lfr")),
+ make_pair(L'\U0001D510', UnicodeNameInfo(L"Mfr")),
+ make_pair(L'\U0001D511', UnicodeNameInfo(L"Nfr")),
+ make_pair(L'\U0001D512', UnicodeNameInfo(L"Ofr")),
+ make_pair(L'\U0001D513', UnicodeNameInfo(L"Pfr")),
+ make_pair(L'\U0001D514', UnicodeNameInfo(L"Qfr")),
+ make_pair(L'\U0001D516', UnicodeNameInfo(L"Sfr")),
+ make_pair(L'\U0001D517', UnicodeNameInfo(L"Tfr")),
+ make_pair(L'\U0001D518', UnicodeNameInfo(L"Ufr")),
+ make_pair(L'\U0001D519', UnicodeNameInfo(L"Vfr")),
+ make_pair(L'\U0001D51A', UnicodeNameInfo(L"Wfr")),
+ make_pair(L'\U0001D51B', UnicodeNameInfo(L"Xfr")),
+ make_pair(L'\U0001D51C', UnicodeNameInfo(L"Yfr")),
+ make_pair(L'\U0001D51E', UnicodeNameInfo(L"afr")),
+ make_pair(L'\U0001D51F', UnicodeNameInfo(L"bfr")),
+ make_pair(L'\U0001D520', UnicodeNameInfo(L"cfr")),
+ make_pair(L'\U0001D521', UnicodeNameInfo(L"dfr")),
+ make_pair(L'\U0001D522', UnicodeNameInfo(L"efr")),
+ make_pair(L'\U0001D523', UnicodeNameInfo(L"ffr")),
+ make_pair(L'\U0001D524', UnicodeNameInfo(L"gfr")),
+ make_pair(L'\U0001D525', UnicodeNameInfo(L"hfr")),
+ make_pair(L'\U0001D526', UnicodeNameInfo(L"ifr")),
+ make_pair(L'\U0001D527', UnicodeNameInfo(L"jfr")),
+ make_pair(L'\U0001D528', UnicodeNameInfo(L"kfr")),
+ make_pair(L'\U0001D529', UnicodeNameInfo(L"lfr")),
+ make_pair(L'\U0001D52A', UnicodeNameInfo(L"mfr")),
+ make_pair(L'\U0001D52B', UnicodeNameInfo(L"nfr")),
+ make_pair(L'\U0001D52C', UnicodeNameInfo(L"ofr")),
+ make_pair(L'\U0001D52D', UnicodeNameInfo(L"pfr")),
+ make_pair(L'\U0001D52E', UnicodeNameInfo(L"qfr")),
+ make_pair(L'\U0001D52F', UnicodeNameInfo(L"rfr")),
+ make_pair(L'\U0001D530', UnicodeNameInfo(L"sfr")),
+ make_pair(L'\U0001D531', UnicodeNameInfo(L"tfr")),
+ make_pair(L'\U0001D532', UnicodeNameInfo(L"ufr")),
+ make_pair(L'\U0001D533', UnicodeNameInfo(L"vfr")),
+ make_pair(L'\U0001D534', UnicodeNameInfo(L"wfr")),
+ make_pair(L'\U0001D535', UnicodeNameInfo(L"xfr")),
+ make_pair(L'\U0001D536', UnicodeNameInfo(L"yfr")),
+ make_pair(L'\U0001D537', UnicodeNameInfo(L"zfr")),
+ make_pair(L'\U0001D538', UnicodeNameInfo(L"Aopf")),
+ make_pair(L'\U0001D539', UnicodeNameInfo(L"Bopf")),
+ make_pair(L'\U0001D53B', UnicodeNameInfo(L"Dopf")),
+ make_pair(L'\U0001D53C', UnicodeNameInfo(L"Eopf")),
+ make_pair(L'\U0001D53D', UnicodeNameInfo(L"Fopf")),
+ make_pair(L'\U0001D53E', UnicodeNameInfo(L"Gopf")),
+ make_pair(L'\U0001D540', UnicodeNameInfo(L"Iopf")),
+ make_pair(L'\U0001D541', UnicodeNameInfo(L"Jopf")),
+ make_pair(L'\U0001D542', UnicodeNameInfo(L"Kopf")),
+ make_pair(L'\U0001D543', UnicodeNameInfo(L"Lopf")),
+ make_pair(L'\U0001D544', UnicodeNameInfo(L"Mopf")),
+ make_pair(L'\U0001D546', UnicodeNameInfo(L"Oopf")),
+ make_pair(L'\U0001D54A', UnicodeNameInfo(L"Sopf")),
+ make_pair(L'\U0001D54B', UnicodeNameInfo(L"Topf")),
+ make_pair(L'\U0001D54C', UnicodeNameInfo(L"Uopf")),
+ make_pair(L'\U0001D54D', UnicodeNameInfo(L"Vopf")),
+ make_pair(L'\U0001D54E', UnicodeNameInfo(L"Wopf")),
+ make_pair(L'\U0001D54F', UnicodeNameInfo(L"Xopf")),
+ make_pair(L'\U0001D550', UnicodeNameInfo(L"Yopf")),
+ make_pair(L'\U0001D55C', UnicodeNameInfo(L"kopf")),
+ make_pair(L'\U0001D6A5', UnicodeNameInfo())
+};
+
+wishful_hash_map<wchar_t, UnicodeNameInfo> gUnicodeNameTable(
+ gUnicodeNameArray,
+ END_ARRAY(gUnicodeNameArray)
+);
+
+
+// FIX:
+// Need to read about and think about combining characters.
+// In particular, does the current strategy work for *named* entities
+// and combining characters? I'm not sure.
+
+
+// XmlEncode() handles conversion of non-ASCII characters to entities.
+// It uses the "options" parameter and gUnicodeNameTable to decide how to
+// translate each character.
+wstring XmlEncode(
+ const wstring& input,
+ const EncodingOptions& options
+)
+{
+ wostringstream os;
+ for (wstring::const_iterator
+ ptr = input.begin(); ptr != input.end(); ptr++
+ )
+ {
+ if (*ptr == L'&')
+ os << L"&amp;";
+ else if (*ptr == L'<')
+ os << L"&lt;";
+ else if (*ptr == L'>')
+ os << L"&gt;";
+ else if (*ptr <= 0x7F)
+ os << *ptr;
+ else
+ {
+ wishful_hash_map<wchar_t, UnicodeNameInfo>::const_iterator
+ search = gUnicodeNameTable.find(*ptr);
+
+ if (search == gUnicodeNameTable.end())
+ {
+ if (options.mOtherEncodingRaw)
+ os << *ptr;
+ else
+ os << L"&#x" << hex
+ << static_cast<unsigned>(*ptr) << L";";
+ }
+ else
+ {
+ EncodingOptions::MathmlEncoding encoding
+ = options.mMathmlEncoding;
+
+ // Deal with plane-1 characters.
+ if (!options.mAllowPlane1 &&
+ static_cast<unsigned>(*ptr) >= 0x10000 &&
+ (
+ encoding == EncodingOptions::cMathmlEncodingNumeric
+ ||
+ encoding == EncodingOptions::cMathmlEncodingRaw
+ )
+ )
+ {
+ encoding = EncodingOptions::cMathmlEncodingShort;
+ }
+
+ // Notice the missing "break"s in this switch statement.
+ // We are falling back on other encoding methods if certain
+ // ones aren't available.
+ switch (encoding)
+ {
+ case EncodingOptions::cMathmlEncodingLong:
+ if (!search->second.mLongName.empty())
+ {
+ os << L"&" << search->second.mLongName << L";";
+ break;
+ }
+
+ case EncodingOptions::cMathmlEncodingShort:
+ if (!search->second.mShortName.empty())
+ {
+ os << L"&" << search->second.mShortName << L";";
+ break;
+ }
+
+ case EncodingOptions::cMathmlEncodingNumeric:
+ os << L"&#x" << hex << static_cast<unsigned>(*ptr)
+ << L";";
+ break;
+
+ case EncodingOptions::cMathmlEncodingRaw:
+ os << *ptr;
+ break;
+ }
+
+ }
+ }
+ }
+
+ return os.str();
+}
+
+}
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/BlahtexCore/XmlEncode.h b/blahtexml/source/BlahtexCore/XmlEncode.h
new file mode 100644
index 0000000..fd376be
--- /dev/null
+++ b/blahtexml/source/BlahtexCore/XmlEncode.h
@@ -0,0 +1,42 @@
+// File "XmlEncode.h"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#ifndef BLAHTEX_XMLENCODE_H
+#define BLAHTEX_XMLENCODE_H
+
+#include <string>
+#include "Misc.h"
+
+namespace blahtex
+{
+
+// Encodes the given string as XML using the supplied options.
+// (See Misc.h for explanation of EncodingOptions.)
+extern std::wstring XmlEncode(
+ const std::wstring& input,
+ const EncodingOptions& options
+);
+
+
+}
+
+#endif
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/BlahtexXMLin/AttributesImpl.cpp b/blahtexml/source/BlahtexXMLin/AttributesImpl.cpp
new file mode 100644
index 0000000..26876ab
--- /dev/null
+++ b/blahtexml/source/BlahtexXMLin/AttributesImpl.cpp
@@ -0,0 +1,131 @@
+// File "AttributesImpl.cpp"
+//
+// blahtexml (version 0.5)
+// Copyright (C) 2007-2008, Gilles Van Assche
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include "AttributesImpl.h"
+
+AttributesImpl::AttributesImpl()
+{
+}
+
+AttributesImpl::AttributesImpl(const Attributes& attributes)
+{
+ for(unsigned int i=0; i<attributes.getLength(); ++i) {
+ Attribute attribute;
+ attribute.qName = XercesString(attributes.getQName(i));
+ attribute.uri = XercesString(attributes.getURI(i));
+ attribute.localPart = XercesString(attributes.getLocalName(i));
+ attribute.value = XercesString(attributes.getValue(i));
+ attribute.type = XercesString(attributes.getType(i));
+ theAttributes.push_back(attribute);
+ }
+}
+
+unsigned int AttributesImpl::getLength() const
+{
+ return theAttributes.size();
+}
+
+const XMLCh* AttributesImpl::getURI(const unsigned int index) const
+{
+ return theAttributes[index].uri.c_str();
+}
+
+const XMLCh* AttributesImpl::getLocalName(const unsigned int index) const
+{
+ return theAttributes[index].localPart.c_str();
+}
+
+const XMLCh* AttributesImpl::getQName(const unsigned int index) const
+{
+ return theAttributes[index].qName.c_str();
+}
+
+const XMLCh* AttributesImpl::getType(const unsigned int index) const
+{
+ return theAttributes[index].type.c_str();
+}
+
+const XMLCh* AttributesImpl::getValue(const unsigned int index) const
+{
+ return theAttributes[index].value.c_str();
+}
+
+int AttributesImpl::getIndex(const XMLCh* const uri, const XMLCh* const localPart) const
+{
+ for(unsigned int i=0;i<theAttributes.size();i++)
+ if(XMLString::equals(getURI(i), uri) && XMLString::equals(getLocalName(i), localPart))
+ return i;
+ return -1;
+}
+
+int AttributesImpl::getIndex(const XMLCh* const qName ) const
+{
+ for(unsigned int i=0;i<theAttributes.size();i++)
+ if(XMLString::equals(getQName(i), qName))
+ return i;
+ return -1;
+}
+
+const XMLCh* AttributesImpl::getType(const XMLCh* const uri, const XMLCh* const localPart ) const
+{
+ for(unsigned int i=0;i<theAttributes.size();i++)
+ if(XMLString::equals(getURI(i), uri) && XMLString::equals(getLocalName(i), localPart))
+ return getType(i);
+ return NULL;
+}
+
+const XMLCh* AttributesImpl::getType(const XMLCh* const qName) const
+{
+ for(unsigned int i=0;i<theAttributes.size();i++)
+ if(XMLString::equals(getQName(i), qName))
+ return getType(i);
+ return NULL;
+}
+
+const XMLCh* AttributesImpl::getValue(const XMLCh* const uri, const XMLCh* const localPart ) const
+{
+ for(unsigned int i=0;i<theAttributes.size();i++)
+ if(XMLString::equals(getURI(i), uri) && XMLString::equals(getLocalName(i), localPart))
+ return getValue(i);
+ return NULL;
+}
+
+const XMLCh* AttributesImpl::getValue(const XMLCh* const qName) const
+{
+ for(unsigned int i=0;i<theAttributes.size();i++)
+ if(XMLString::equals(getQName(i), qName))
+ return getValue(i);
+ return NULL;
+}
+
+void AttributesImpl::removeAttribute(const unsigned int index)
+{
+ theAttributes.erase(theAttributes.begin()+index);
+}
+
+void AttributesImpl::addAttribute(const XercesString& qName, const XercesString& uri, const XercesString& localPart, const XercesString& value, const XercesString& type)
+{
+ Attribute attribute;
+ attribute.qName = qName;
+ attribute.uri = uri;
+ attribute.localPart = localPart;
+ attribute.value = value;
+ attribute.type = type;
+ theAttributes.push_back(attribute);
+}
diff --git a/blahtexml/source/BlahtexXMLin/AttributesImpl.h b/blahtexml/source/BlahtexXMLin/AttributesImpl.h
new file mode 100644
index 0000000..9ee3afd
--- /dev/null
+++ b/blahtexml/source/BlahtexXMLin/AttributesImpl.h
@@ -0,0 +1,63 @@
+// File "AttributesImpl.h"
+//
+// blahtexml (version 0.5)
+// Copyright (C) 2007-2008, Gilles Van Assche
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#ifndef ATTRIBUTESIMPL_H
+#define ATTRIBUTESIMPL_H
+
+#include <string>
+#include <vector>
+#include <xercesc/sax2/Attributes.hpp>
+#include <xercesc/util/XMLString.hpp>
+#include "XercesString.h"
+
+XERCES_CPP_NAMESPACE_USE
+using namespace std;
+
+struct Attribute {
+ XercesString qName;
+ XercesString uri;
+ XercesString localPart;
+ XercesString value;
+ XercesString type;
+};
+
+class AttributesImpl : public Attributes
+{
+private:
+ vector<Attribute> theAttributes;
+public:
+ AttributesImpl();
+ AttributesImpl(const Attributes& attributes);
+ virtual unsigned int getLength() const;
+ virtual const XMLCh* getURI(const unsigned int index) const;
+ virtual const XMLCh* getLocalName(const unsigned int index) const;
+ virtual const XMLCh* getQName(const unsigned int index) const;
+ virtual const XMLCh* getType(const unsigned int index) const;
+ virtual const XMLCh* getValue(const unsigned int index) const;
+ virtual int getIndex(const XMLCh* const uri, const XMLCh* const localPart ) const;
+ virtual int getIndex(const XMLCh* const qName ) const;
+ virtual const XMLCh* getType(const XMLCh* const uri, const XMLCh* const localPart ) const;
+ virtual const XMLCh* getType(const XMLCh* const qName) const;
+ virtual const XMLCh* getValue(const XMLCh* const uri, const XMLCh* const localPart ) const;
+ virtual const XMLCh* getValue(const XMLCh* const qName) const;
+ void removeAttribute(const unsigned int index);
+ void addAttribute(const XercesString& qName, const XercesString& uri, const XercesString& localPart, const XercesString& value, const XercesString& type);
+};
+
+#endif
diff --git a/blahtexml/source/BlahtexXMLin/BlahtexFilter.cpp b/blahtexml/source/BlahtexXMLin/BlahtexFilter.cpp
new file mode 100644
index 0000000..87f095d
--- /dev/null
+++ b/blahtexml/source/BlahtexXMLin/BlahtexFilter.cpp
@@ -0,0 +1,171 @@
+// File "BlahtexFilter.cpp"
+//
+// blahtexml (version 0.5)
+// Copyright (C) 2007-2008, Gilles Van Assche
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include "AttributesImpl.h"
+#include "BlahtexFilter.h"
+#include "XercesString.h"
+#include <iostream>
+#include <xercesc/framework/StdInInputSource.hpp>
+#include <xercesc/framework/XMLFormatter.hpp>
+#include <xercesc/parsers/SAX2XMLFilterImpl.hpp>
+#include <xercesc/sax2/Attributes.hpp>
+#include <xercesc/sax2/SAX2XMLReader.hpp>
+#include <xercesc/sax2/XMLReaderFactory.hpp>
+#include <xercesc/util/OutOfMemoryException.hpp>
+#include <xercesc/util/PlatformUtils.hpp>
+#include <xercesc/util/TransService.hpp>
+#include <xercesc/util/XMLString.hpp>
+#include <xercesc/util/XMLUniDefs.hpp>
+
+using namespace std;
+
+extern wstring GetErrorMessage(const blahtex::Exception& e);
+
+BlahtexFilter::BlahtexFilter(SAX2XMLReader* parent, blahtex::Interface& anInterface)
+ : SAX2XMLFilterImpl(parent), interface(anInterface), numberOfErrors(0)
+{
+}
+
+BlahtexFilter::~BlahtexFilter()
+{
+}
+
+void BlahtexFilter::startElement(const XMLCh* const uri, const XMLCh* const localname,
+ const XMLCh* const qname, const Attributes& attributes)
+{
+ static AttributesImpl emptyAttributes;
+ static XercesString empty;
+ static XercesString blahtexmlNamespace("http://gva.noekeon.org/blahtexml");
+ static XercesString m("m");
+ static XercesString inlin("inline");
+ static XercesString block("block");
+ static XercesString error("error");
+ static XercesString display("display");
+
+ bool blockMode = false;
+ bool encloseInMathTag = false;
+ int eqAttrIndex = attributes.getIndex(blahtexmlNamespace.c_str(), m.c_str());
+ if (eqAttrIndex < 0) {
+ eqAttrIndex = attributes.getIndex(blahtexmlNamespace.c_str(), inlin.c_str());
+ encloseInMathTag = (eqAttrIndex >= 0);
+ }
+ if (eqAttrIndex < 0) {
+ eqAttrIndex = attributes.getIndex(blahtexmlNamespace.c_str(), block.c_str());
+ encloseInMathTag = (eqAttrIndex >= 0);
+ blockMode = (eqAttrIndex >= 0);
+ }
+ if (eqAttrIndex >= 0) {
+ try {
+ XercesString inputXerces(attributes.getValue(eqAttrIndex));
+ wstring input = inputXerces.convertTowstring();
+ interface.ProcessInput(input);
+
+ AttributesImpl newAttributes(attributes);
+ newAttributes.removeAttribute(eqAttrIndex);
+ SAX2XMLFilterImpl::startElement(uri, localname, qname, newAttributes);
+
+ XercesString MathMLnamespace("http://www.w3.org/1998/Math/MathML");
+ XercesString unprefixedMath(L"math");
+ wstring MathMLprefix;
+ bool MathMLexistingNamespace = getMathMLprefix(MathMLprefix);
+ if ((desiredMathMLPrefixType == PrefixNone) && (!(MathMLprefix == L""))) {
+ MathMLexistingNamespace = false;
+ MathMLprefix = L"";
+ }
+ if ((desiredMathMLPrefixType == PrefixAdd) && (!(MathMLprefix == desiredMathMLPrefix))) {
+ MathMLexistingNamespace = false;
+ MathMLprefix = desiredMathMLPrefix;
+ }
+ XercesString MathMLprefix_(MathMLprefix);
+
+ if (!MathMLexistingNamespace)
+ SAX2XMLFilterImpl::startPrefixMapping(MathMLprefix_.c_str(), MathMLnamespace.c_str());
+ XercesString prefixedMath((MathMLprefix == L"") ? L"math" : (MathMLprefix + L":math"));
+ if (encloseInMathTag) {
+ AttributesImpl mathAttributes;
+ if (blockMode)
+ mathAttributes.addAttribute(display.c_str(), empty.c_str(), display.c_str(), block.c_str(), empty.c_str());
+ SAX2XMLFilterImpl::startElement(MathMLnamespace.c_str(), unprefixedMath.c_str(), prefixedMath.c_str(), mathAttributes);
+ }
+ interface.PrintAsSAX2(*this, MathMLprefix, true);
+ if (encloseInMathTag)
+ SAX2XMLFilterImpl::endElement(MathMLnamespace.c_str(), unprefixedMath.c_str(), prefixedMath.c_str());
+ if (!MathMLexistingNamespace)
+ SAX2XMLFilterImpl::endPrefixMapping(MathMLprefix_.c_str());
+ }
+ catch (blahtex::Exception& e) {
+ wstring output = GetErrorMessage(e);
+ XercesString outputXerces(output);
+ SAX2XMLFilterImpl::startElement(uri, localname, qname, attributes);
+ SAX2XMLFilterImpl::startPrefixMapping(empty.c_str(), blahtexmlNamespace.c_str());
+ SAX2XMLFilterImpl::startElement(blahtexmlNamespace.c_str(), error.c_str(), error.c_str(), emptyAttributes);
+ SAX2XMLFilterImpl::characters(outputXerces.data(), outputXerces.length());
+ SAX2XMLFilterImpl::endElement(blahtexmlNamespace.c_str(), error.c_str(), error.c_str());
+ SAX2XMLFilterImpl::endPrefixMapping(empty.c_str());
+ numberOfErrors++;
+ }
+ }
+ else {
+ SAX2XMLFilterImpl::startElement(uri, localname, qname, attributes);
+ }
+}
+
+void BlahtexFilter::startPrefixMapping(const XMLCh* const prefix, const XMLCh* const uri)
+{
+ XercesString prefixXerces(prefix), uriXerces(uri);
+ pair<wstring, wstring> context(prefixXerces.convertTowstring(), uriXerces.convertTowstring());
+ namespaceContext.push_front(context);
+ SAX2XMLFilterImpl::startPrefixMapping(prefix, uri);
+}
+
+void BlahtexFilter::endPrefixMapping(const XMLCh* const prefix)
+{
+ XercesString prefixXerces(prefix);
+ wstring prefixwstring=prefixXerces.convertTowstring();
+ for(list_wstring2::iterator i=namespaceContext.begin(); i != namespaceContext.end(); ++i) {
+ if ((*i).first == prefixwstring) {
+ namespaceContext.erase(i);
+ break;
+ }
+ }
+ SAX2XMLFilterImpl::endPrefixMapping(prefix);
+}
+
+bool BlahtexFilter::getMathMLprefix(wstring& prefix)
+{
+ for(list_wstring2::iterator i=namespaceContext.begin(); i != namespaceContext.end(); ++i) {
+ if ((*i).second == L"http://www.w3.org/1998/Math/MathML") {
+ prefix = (*i).first;
+ return true;
+ }
+ }
+ prefix = L"";
+ return false;
+}
+
+int BlahtexFilter::getNumberOfErrors()
+{
+ return numberOfErrors;
+}
+
+void BlahtexFilter::setDesiredMathMLPrefixType(PrefixType aPrefixType, const wstring& aPrefix)
+{
+ desiredMathMLPrefixType = aPrefixType;
+ desiredMathMLPrefix = aPrefix;
+}
diff --git a/blahtexml/source/BlahtexXMLin/BlahtexFilter.h b/blahtexml/source/BlahtexXMLin/BlahtexFilter.h
new file mode 100644
index 0000000..de63ca2
--- /dev/null
+++ b/blahtexml/source/BlahtexXMLin/BlahtexFilter.h
@@ -0,0 +1,51 @@
+// File "BlahtexFilter.h"
+//
+// blahtexml (version 0.5)
+// Copyright (C) 2007-2008, Gilles Van Assche
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include <list>
+#include <string>
+#include <utility>
+#include <xercesc/parsers/SAX2XMLFilterImpl.hpp>
+#include "../BlahtexCore/Interface.h"
+
+XERCES_CPP_NAMESPACE_USE
+
+typedef list<pair<wstring, wstring> > list_wstring2;
+
+class BlahtexFilter : public SAX2XMLFilterImpl
+{
+public:
+ typedef enum { PrefixAuto, PrefixNone, PrefixAdd } PrefixType;
+protected:
+ blahtex::Interface& interface;
+ list_wstring2 namespaceContext;
+ int numberOfErrors;
+ PrefixType desiredMathMLPrefixType;
+ wstring desiredMathMLPrefix;
+public:
+ BlahtexFilter(SAX2XMLReader* parent, blahtex::Interface& anInterface);
+ ~BlahtexFilter();
+ virtual void startElement(const XMLCh* const uri, const XMLCh* const localname,
+ const XMLCh* const qname, const Attributes& attributes);
+ virtual void startPrefixMapping(const XMLCh* const prefix, const XMLCh* const uri);
+ virtual void endPrefixMapping(const XMLCh* const prefix);
+ int getNumberOfErrors();
+ void setDesiredMathMLPrefixType(PrefixType aPrefixType, const wstring& aPrefix);
+protected:
+ bool getMathMLprefix(wstring& prefix);
+};
diff --git a/blahtexml/source/BlahtexXMLin/SAX2Output.cpp b/blahtexml/source/BlahtexXMLin/SAX2Output.cpp
new file mode 100644
index 0000000..d615551
--- /dev/null
+++ b/blahtexml/source/BlahtexXMLin/SAX2Output.cpp
@@ -0,0 +1,165 @@
+// File "SAX2Output.cpp"
+//
+// blahtexml (version 0.5)
+// Copyright (C) 2007-2008, Gilles Van Assche
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include <iostream>
+#include <xercesc/sax2/Attributes.hpp>
+#include <xercesc/util/XMLUniDefs.hpp>
+#include "SAX2Output.h"
+#include "XercesString.h"
+
+XERCES_CPP_NAMESPACE_USE
+using namespace std;
+
+SAX2Output::SAX2Output(ostream& aOut, const char* const encodingName,
+ const XMLFormatter::UnRepFlags unRepFlags)
+ : out(aOut), format(encodingName, 0, this, XMLFormatter::NoEscapes, unRepFlags),
+ doctype(DoctypeNone), publicID(), DTD(), rootElementEncountered(false)
+{
+ xmlDeclaration(encodingName);
+}
+
+SAX2Output::SAX2Output(ostream& aOut, const char* const encodingName,
+ const XMLFormatter::UnRepFlags unRepFlags,
+ Doctype aDoctype, const XercesString& aPublicID, const XercesString& aDTD)
+ : out(aOut), format(encodingName, 0, this, XMLFormatter::NoEscapes, unRepFlags),
+ doctype(aDoctype), publicID(aPublicID), DTD(aDTD), rootElementEncountered(false)
+{
+ xmlDeclaration(encodingName);
+}
+
+SAX2Output::~SAX2Output()
+{
+}
+
+void SAX2Output::writeChars(const XMLByte* const toWrite,
+ const unsigned int count, XMLFormatter* const formatter)
+{
+ out.write((const char*)toWrite, count);
+ out << std::flush;
+}
+
+void SAX2Output::error(const SAXParseException& e)
+{
+ cerr << endl
+ << "!!! Parsing error at " << XercesString(e.getSystemId()) << ":" << e.getLineNumber()
+ << ":" << e.getColumnNumber() << " : " << XercesString(e.getMessage()) << endl;
+}
+
+void SAX2Output::fatalError(const SAXParseException& e)
+{
+ cerr << endl
+ << "!!! Fatal parsing error at " << XercesString(e.getSystemId()) << ":" << e.getLineNumber()
+ << ":" << e.getColumnNumber() << " : " << XercesString(e.getMessage()) << endl;
+}
+
+void SAX2Output::warning(const SAXParseException& e)
+{
+ cerr << endl
+ << "!!! Parsing warning at " << XercesString(e.getSystemId()) << ":" << e.getLineNumber()
+ << ":" << e.getColumnNumber() << " : " << XercesString(e.getMessage()) << endl;
+}
+
+void SAX2Output::characters(const XMLCh* const chars,
+ const unsigned int length)
+{
+ format.formatBuf(chars, length, XMLFormatter::CharEscapes);
+}
+
+void SAX2Output::endElement(const XMLCh* const uri,
+ const XMLCh* const localname, const XMLCh* const qname)
+{
+ format << XMLFormatter::NoEscapes << chOpenAngle << chForwardSlash << qname << chCloseAngle;
+}
+
+void SAX2Output::ignorableWhitespace(const XMLCh* const chars,
+ const unsigned int length)
+{
+ format.formatBuf(chars, length, XMLFormatter::NoEscapes);
+}
+
+void SAX2Output::processingInstruction(const XMLCh* const target,
+ const XMLCh* const data)
+{
+ format << XMLFormatter::NoEscapes << chOpenAngle << chQuestion << target;
+ if (data) format << chSpace << data;
+ format << XMLFormatter::NoEscapes << chQuestion << chCloseAngle;
+}
+
+void SAX2Output::startElement(const XMLCh* const uri, const XMLCh* const localname,
+ const XMLCh* const qname, const Attributes& attributes)
+{
+ if(!rootElementEncountered) {
+ doctypeDeclaration(qname);
+ rootElementEncountered = true;
+ }
+ format << XMLFormatter::NoEscapes << chOpenAngle ;
+ format << qname ;
+
+ XercesString xmlns("xmlns");
+ for(list<pair<XercesString, XercesString> >::const_iterator i=prefixMappings.begin(); i!=prefixMappings.end(); ++i) {
+ format << XMLFormatter::NoEscapes << chSpace;
+ if ((*i).first.size() > 0)
+ format << xmlns.c_str() << chColon << (*i).first.c_str();
+ else
+ format << xmlns.c_str();
+ format << chEqual << chDoubleQuote << XMLFormatter::AttrEscapes << (*i).second.c_str()
+ << XMLFormatter::NoEscapes << chDoubleQuote;
+ }
+ prefixMappings.clear();
+
+ unsigned int len = attributes.getLength();
+ for (unsigned int index=0; index<len; ++index) {
+ format << XMLFormatter::NoEscapes << chSpace << attributes.getQName(index)
+ << chEqual << chDoubleQuote
+ << XMLFormatter::AttrEscapes << attributes.getValue(index)
+ << XMLFormatter::NoEscapes << chDoubleQuote;
+ }
+ format << chCloseAngle;
+}
+
+void SAX2Output::startPrefixMapping(const XMLCh* const prefix, const XMLCh* const uri)
+{
+ prefixMappings.push_back(pair<XercesString, XercesString>(XercesString(prefix), XercesString(uri)));
+}
+
+void SAX2Output::xmlDeclaration(const char* const encodingName)
+{
+ static XercesString xmlDeclaration1("<?xml version=\"1.0\" encoding=\"");
+ XercesString xmlDeclaration2(encodingName);
+ static XercesString xmlDeclaration3("\"?>");
+ format << xmlDeclaration1.c_str() << xmlDeclaration2.c_str() << xmlDeclaration3.c_str() << chLF;
+}
+
+void SAX2Output::doctypeDeclaration(const XMLCh* const qname)
+{
+ static XercesString doctypeDeclaration("<!DOCTYPE");
+ static XercesString systemDeclaration("SYSTEM");
+ static XercesString publicDeclaration("PUBLIC");
+ if((doctype == DoctypeSystem) || (doctype == DoctypePublic)) {
+ format << XMLFormatter::NoEscapes << doctypeDeclaration.c_str() << chSpace
+ << qname << chSpace;
+ if (doctype == DoctypeSystem)
+ format << XMLFormatter::NoEscapes << systemDeclaration.c_str() << chSpace;
+ else {
+ format << XMLFormatter::NoEscapes << publicDeclaration.c_str() << chSpace;
+ format << XMLFormatter::NoEscapes << chDoubleQuote << publicID.c_str() << chDoubleQuote << chSpace;
+ }
+ format << XMLFormatter::NoEscapes << chDoubleQuote << DTD.c_str() << chDoubleQuote << chCloseAngle << chLF;
+ }
+}
diff --git a/blahtexml/source/BlahtexXMLin/SAX2Output.h b/blahtexml/source/BlahtexXMLin/SAX2Output.h
new file mode 100644
index 0000000..f803aef
--- /dev/null
+++ b/blahtexml/source/BlahtexXMLin/SAX2Output.h
@@ -0,0 +1,57 @@
+// File "SAX2Output.h"
+//
+// blahtexml (version 0.5)
+// Copyright (C) 2007-2008, Gilles Van Assche
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include <list>
+#include <utility>
+#include <xercesc/sax2/DefaultHandler.hpp>
+#include <xercesc/framework/XMLFormatter.hpp>
+#include "XercesString.h"
+
+XERCES_CPP_NAMESPACE_USE
+
+class SAX2Output : public DefaultHandler, private XMLFormatTarget
+{
+public:
+ typedef enum { DoctypeNone=0, DoctypeSystem, DoctypePublic } Doctype;
+protected:
+ ostream& out;
+ std::list<std::pair<XercesString, XercesString> > prefixMappings;
+ XMLFormatter format;
+ Doctype doctype;
+ XercesString publicID, DTD;
+ bool rootElementEncountered;
+public:
+ SAX2Output(ostream& aOut, const char* const encodingName, const XMLFormatter::UnRepFlags unRepFlags);
+ SAX2Output(ostream& aOut, const char* const encodingName, const XMLFormatter::UnRepFlags unRepFlags,
+ Doctype aDoctype, const XercesString& aPublicID, const XercesString& aDTD);
+ ~SAX2Output();
+ virtual void characters(const XMLCh* const chars, const unsigned int length);
+ virtual void endElement( const XMLCh* const uri, const XMLCh* const localname, const XMLCh* const qname);
+ virtual void error(const SAXParseException& e);
+ virtual void fatalError(const SAXParseException& e);
+ virtual void ignorableWhitespace(const XMLCh* const chars, const unsigned int length);
+ virtual void processingInstruction(const XMLCh* const target, const XMLCh* const data);
+ virtual void startElement(const XMLCh* const uri, const XMLCh* const localname, const XMLCh* const qname, const Attributes& attributes);
+ virtual void startPrefixMapping(const XMLCh* const prefix, const XMLCh* const uri);
+ virtual void warning(const SAXParseException& e);
+ virtual void writeChars(const XMLByte* const toWrite, const unsigned int count, XMLFormatter* const formatter);
+private:
+ void xmlDeclaration(const char* const encodingName);
+ void doctypeDeclaration(const XMLCh* const qname);
+};
diff --git a/blahtexml/source/BlahtexXMLin/XercesString.cpp b/blahtexml/source/BlahtexXMLin/XercesString.cpp
new file mode 100644
index 0000000..69c8613
--- /dev/null
+++ b/blahtexml/source/BlahtexXMLin/XercesString.cpp
@@ -0,0 +1,90 @@
+// File "XercesString.cpp"
+//
+// blahtexml (version 0.5)
+// Copyright (C) 2007-2008, Gilles Van Assche
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include <iostream>
+#include "XercesString.h"
+
+XercesString::XercesString()
+ : basic_string<XMLCh>()
+{
+}
+
+XercesString::XercesString(const XMLCh *s)
+ : basic_string<XMLCh>(s)
+{
+}
+
+XercesString::XercesString(const char *s)
+ : basic_string<XMLCh>()
+{
+ XMLCh *Xs = XMLString::transcode(s);
+ assign(Xs);
+ XMLString::release(&Xs);
+}
+
+XercesString::XercesString(const wstring& in)
+ : basic_string<XMLCh>()
+{
+ for(unsigned int i=0; i<in.length(); ++i) {
+ wchar_t utf32 = in[i];
+ if (utf32 >= 0x10000UL) {
+ push_back(0xD800 - 0x40 + (utf32 >> 10));
+ push_back(0xDC00 + (utf32 & 0x3FF));
+ }
+ else
+ push_back(utf32);
+ }
+}
+
+wstring XercesString::convertTowstring()
+{
+ wstring out;
+ wchar_t ucs4;
+ bool firstSurrogate = false;
+ for(unsigned int i=0; i<length(); ++i) {
+ XMLCh utf16 = at(i);
+ if (firstSurrogate) {
+ if ((utf16 & 0xFC00) == 0xDC00) {
+ ucs4 += utf16-0xDC00UL+0x10000UL;
+ out.push_back(ucs4);
+ }
+ /*else
+ throw (...);*/
+ firstSurrogate = false;
+ }
+ else if ((utf16 & 0xFC00) == 0xD800) {
+ ucs4 = (utf16-0xD800UL) << 10;
+ firstSurrogate = true;
+ }
+ else {
+ out.push_back(utf16);
+ }
+ }
+ return out;
+}
+
+using namespace std;
+
+ostream& operator<<(ostream& out, const XercesString& s)
+{
+ char *temp = XMLString::transcode(s.c_str());
+ out << temp;
+ XMLString::release(&temp);
+ return out;
+}
diff --git a/blahtexml/source/BlahtexXMLin/XercesString.h b/blahtexml/source/BlahtexXMLin/XercesString.h
new file mode 100644
index 0000000..3d774ea
--- /dev/null
+++ b/blahtexml/source/BlahtexXMLin/XercesString.h
@@ -0,0 +1,40 @@
+// File "XercesString.h"
+//
+// blahtexml (version 0.5)
+// Copyright (C) 2007-2008, Gilles Van Assche
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#ifndef XERCESSTRING_H
+#define XERCESSTRING_H
+
+#include <string>
+#include <xercesc/util/XMLString.hpp>
+
+XERCES_CPP_NAMESPACE_USE
+using namespace std;
+
+class XercesString : public basic_string<XMLCh>
+{
+public:
+ XercesString();
+ XercesString(const char *s);
+ XercesString(const XMLCh *s);
+ XercesString(const wstring& in);
+ wstring convertTowstring();
+ friend ostream& operator<<(ostream& out, const XercesString& s);
+};
+
+#endif
diff --git a/blahtexml/source/Messages.cpp b/blahtexml/source/Messages.cpp
new file mode 100644
index 0000000..aff7457
--- /dev/null
+++ b/blahtexml/source/Messages.cpp
@@ -0,0 +1,328 @@
+// File "Messages.cpp"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include <map>
+#include "BlahtexCore/Misc.h"
+
+using namespace std;
+
+// This is an array containing all the possible error codes that blahtex
+// can emit, together with their English translations.
+//
+// The sequences $0, $1, etc correspond to the numbered arguments of the
+// error.
+
+// FIX: a future version of the command line application should have a
+// "--language" option and read error messages from a file.
+
+pair<wstring, wstring> gEnglishMessagesArray[] =
+{
+ // Input syntax errors:
+
+ make_pair(L"NonAsciiInMathMode",
+ L"Non-ASCII characters may only be used in text mode "
+ L"(try enclosing the problem characters in \"\\text{...}\")"
+ ),
+
+ make_pair(L"IllegalCharacter",
+ L"Illegal character in input"
+ ),
+
+ make_pair(L"ReservedCommand",
+ L"The command \"$0\" is reserved for internal use by blahtex"
+ ),
+
+ make_pair(L"TooManyTokens",
+ L"The input is too long"
+ ),
+
+ make_pair(L"InvalidColour",
+ L"The colour \"$0\" is invalid"
+ ),
+
+ make_pair(L"IllegalFinalBackslash",
+ L"Illegal backslash \"\\\" at end of input"
+ ),
+
+ make_pair(L"UnrecognisedCommand",
+ L"Unrecognised command \"$0\""
+ ),
+
+ make_pair(L"IllegalCommandInMathMode",
+ L"The command \"$0\" is illegal in math mode"
+ ),
+
+ make_pair(L"IllegalCommandInMathModeWithHint",
+ L"The command \"$0\" is illegal in math mode "
+ L"(perhaps you intended to use \"$1\" instead?)"
+ ),
+
+ make_pair(L"IllegalCommandInTextMode",
+ L"The command \"$0\" is illegal in text mode"
+ ),
+
+ make_pair(L"IllegalCommandInTextModeWithHint",
+ L"The command \"$0\" is illegal in text mode "
+ L"(perhaps you intended to use \"$1\" instead?)"
+ ),
+
+ make_pair(L"MissingOpenBraceBefore",
+ L"Missing open brace \"{\" before \"$0\""
+ ),
+
+ make_pair(L"MissingOpenBraceAfter",
+ L"Missing open brace \"{\" after \"$0\""
+ ),
+
+ make_pair(L"MissingOpenBraceAtEnd",
+ L"Missing open brace \"{\" at end of input"
+ ),
+
+ make_pair(L"NotEnoughArguments",
+ L"Not enough arguments were supplied for \"$0\""
+ ),
+
+ make_pair(L"MissingCommandAfterNewcommand",
+ L"Missing or illegal new command name after \"\\newcommand\" "
+ L"(there must be precisely one command defined; it must begin "
+ L"with a backslash \"\\\" and contain only alphabetic characters)"
+ ),
+
+ make_pair(L"IllegalRedefinition",
+ L"The command \"$0\" has already been defined; "
+ L"you cannot redefine it"
+ ),
+
+ make_pair(L"MissingOrIllegalParameterCount",
+ L"Missing or illegal parameter count in definition of \"$0\" "
+ L"(must be a single digit between 1 and 9 inclusive)"
+ ),
+
+ make_pair(L"MissingOrIllegalParameterIndex",
+ L"Missing or illegal parameter index in definition of \"$0\""
+ ),
+
+ make_pair(L"UnmatchedOpenBracket",
+ L"Encountered open bracket \"[\" without matching "
+ L"close bracket \"]\""
+ ),
+
+ make_pair(L"UnmatchedOpenBrace",
+ L"Encountered open brace \"{\" without matching close brace \"}\""
+ ),
+
+ make_pair(L"UnmatchedCloseBrace",
+ L"Encountered close brace \"}\" without matching open brace \"{\""
+ ),
+
+ make_pair(L"UnmatchedLeft",
+ L"Encountered \"\\left\" without matching \"\\right\""
+ ),
+
+ make_pair(L"UnmatchedRight",
+ L"Encountered \"\\right\" without matching \"\\left\""
+ ),
+
+ make_pair(L"UnmatchedBegin",
+ L"Encountered \"\\begin\" without matching \"\\end\""
+ ),
+
+ make_pair(L"UnmatchedEnd",
+ L"Encountered \"\\end\" without matching \"\\begin\""
+ ),
+
+ make_pair(L"UnexpectedNextCell",
+ L"The command \"&\" may only appear inside a "
+ L"\"\\begin ... \\end\" block"
+ ),
+
+ make_pair(L"UnexpectedNextRow",
+ L"The command \"\\\\\" may only appear inside a "
+ L"\"\\begin ... \\end\" block"
+ ),
+
+ make_pair(L"MismatchedBeginAndEnd",
+ L"The commands \"$0\" and \"$1\" do not match"
+ ),
+
+ make_pair(L"CasesRowTooBig",
+ L"There can only be two entries in each row of a \"cases\" block"
+ ),
+
+ make_pair(L"SubstackRowTooBig",
+ L"There can only be one entry in each row of a \"substack\" block"
+ ),
+
+ make_pair(L"MissingDelimiter",
+ L"Missing delimiter after \"$0\""
+ ),
+
+ make_pair(L"IllegalDelimiter",
+ L"Illegal delimiter following \"$0\""
+ ),
+
+ make_pair(L"MisplacedLimits",
+ L"The command \"$0\" can only appear after a math operator "
+ L"(consider using \"\\mathop\")"
+ ),
+
+ make_pair(L"DoubleSuperscript",
+ L"Encountered two superscripts attached to the same base "
+ L"(only one is allowed)"
+ ),
+
+ make_pair(L"DoubleSubscript",
+ L"Encountered two subscripts attached to the same base "
+ L"(only one is allowed)"
+ ),
+
+ make_pair(L"AmbiguousInfix",
+ L"Ambiguous placement of \"$0\" (try using additional "
+ L"braces \"{ ... }\" to disambiguate)"
+ ),
+
+ // Errors specific to generating MathML:
+
+ make_pair(L"UnavailableSymbolFontCombination",
+ L"The symbol \"$0\" is not available in the font \"$1\""
+ ),
+
+ make_pair(L"TooManyMathmlNodes",
+ L"There are too many nodes in the MathML tree"
+ ),
+
+ // Errors specific to generating purified TeX
+
+ make_pair(L"PngIncompatibleCharacter",
+ L"Unable to correctly generate PNG containing the character $0"
+ ),
+
+ make_pair(L"WrongFontEncoding",
+ L"The symbol \"$0\" may not appear in font encoding \"$1\""
+ ),
+
+ make_pair(L"WrongFontEncodingWithHint",
+ L"The symbol \"$0\" may not appear in font encoding \"$1\" "
+ L"(try using the \"$2{...}\" command)"
+ ),
+
+ make_pair(L"IllegalNestedFontEncodings",
+ L"Font encoding commands may not be nested"
+ ),
+
+ make_pair(L"LatexPackageUnavailable",
+ L"Unable to render PNG because "
+ L"the LaTeX package \"$0\" is unavailable"
+ ),
+
+ make_pair(L"LatexFontNotSpecified",
+ L"No LaTeX font has been specified for \"$0\""
+ ),
+
+ // Now we have errors which may be generated by the command-line
+ // application (i.e. by main.cpp)
+
+ make_pair(L"InvalidUtf8Input",
+ L"The input string was not valid UTF-8"
+ ),
+
+ make_pair(L"CannotCreateTexFile",
+ L"Cannot create tex file"
+ ),
+
+ make_pair(L"CannotWriteTexFile",
+ L"Cannot write to tex file"
+ ),
+
+ make_pair(L"CannotRunLatex",
+ L"Cannot run latex"
+ ),
+
+ make_pair(L"CannotRunDvipng",
+ L"Cannot run dvipng"
+ ),
+
+ make_pair(L"CannotWritePngDirectory",
+ L"Cannot write to output PNG directory"
+ ),
+
+ make_pair(L"CannotChangeDirectory",
+ L"Cannot change working directory"
+ )
+};
+
+wishful_hash_map<wstring, wstring> gEnglishMessagesTable(
+ gEnglishMessagesArray,
+ END_ARRAY(gEnglishMessagesArray)
+);
+
+
+// GetErrorMessage() converts the given exception into an English
+// string, using the table gEnglishMessagesTable.
+wstring GetErrorMessage(const blahtex::Exception& e)
+{
+ wishful_hash_map<wstring, wstring>::const_iterator
+ messageLookup = gEnglishMessagesTable.find(e.GetCode());
+ if (messageLookup == gEnglishMessagesTable.end())
+ return L"";
+
+ const wstring& source = messageLookup->second;
+ wstring message;
+
+ // Perform argument substitution on error message, e.g. "$2" gets
+ // replaced with contents of mArgs[2]
+ for (wstring::const_iterator
+ ptr = source.begin(); ptr != source.end(); ptr++
+ )
+ {
+ if (*ptr == L'$')
+ {
+ ptr++;
+ int n = (*ptr) - L'0';
+ if (n >= 0 && n < e.GetArgs().size())
+ message += e.GetArgs()[n];
+ else
+ message += L"???";
+ }
+ else
+ message += *ptr;
+ }
+
+ return message;
+}
+
+
+// Returns a string containing a list of all possible error code and
+// their corresponding messages.
+wstring GetErrorMessages()
+{
+ wstring output;
+
+ for (wishful_hash_map<wstring, wstring>::const_iterator
+ ptr = gEnglishMessagesTable.begin();
+ ptr != gEnglishMessagesTable.end();
+ ++ptr
+ )
+ output += ptr->first + L" " + ptr->second + L"\n";
+
+ return output;
+}
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/UnicodeConverter.cpp b/blahtexml/source/UnicodeConverter.cpp
new file mode 100644
index 0000000..4267b04
--- /dev/null
+++ b/blahtexml/source/UnicodeConverter.cpp
@@ -0,0 +1,222 @@
+// File "UnicodeConverter.cpp"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include "UnicodeConverter.h"
+#include <iostream>
+#include <stdexcept>
+#include <cerrno>
+
+using namespace std;
+
+UnicodeConverter::~UnicodeConverter()
+{
+ if (mIsOpen)
+ {
+ iconv_close(mInHandle);
+ iconv_close(mOutHandle);
+ }
+}
+
+void UnicodeConverter::Open()
+{
+ if (mIsOpen)
+ throw logic_error(
+ "UnicodeConverter::Open called on already open object"
+ );
+
+ if (sizeof(wchar_t) != 4)
+ throw runtime_error(
+ "The wchar_t data type on this system is not four bytes wide"
+ );
+
+ // Determine endian-ness of wchar_t.
+ // (Really we should be able to just use "WCHAR_T". This unfortunately
+ // doesn't seem to available on darwin.)
+ wchar_t testChar = L'A';
+ const char* UcsString =
+ (*(reinterpret_cast<char*>(&testChar)) == 'A')
+ ? "UCS-4LE" : "UCS-4BE";
+
+ mInHandle = iconv_open(UcsString, "UTF-8");
+ if (mInHandle == (iconv_t)(-1))
+ {
+ switch (errno)
+ {
+ case EMFILE:
+ throw runtime_error(
+ "iconv_open failed with errno == EMFILE"
+ );
+ case ENFILE:
+ throw runtime_error(
+ "iconv_open failed with errno == ENFILE"
+ );
+ case ENOMEM:
+ throw runtime_error(
+ "iconv_open failed with errno == ENOMEM"
+ );
+ case EINVAL:
+ throw runtime_error(
+ "iconv_open failed with errno == EINVAL"
+ );
+ default:
+ throw runtime_error(
+ "iconv_open failed with unknown error code"
+ );
+ }
+ }
+
+ mOutHandle = iconv_open("UTF-8", UcsString);
+ if (mOutHandle == (iconv_t)(-1))
+ {
+ switch (errno)
+ {
+ case EMFILE:
+ throw runtime_error(
+ "iconv_open failed with errno == EMFILE"
+ );
+ case ENFILE:
+ throw runtime_error(
+ "iconv_open failed with errno == ENFILE"
+ );
+ case ENOMEM:
+ throw runtime_error(
+ "iconv_open failed with errno == ENOMEM"
+ );
+ case EINVAL:
+ throw runtime_error(
+ "iconv_open failed with errno == EINVAL"
+ );
+ default:
+ throw runtime_error(
+ "iconv_open failed with unknown error code"
+ );
+ }
+ }
+
+ mIsOpen = true;
+}
+
+wstring UnicodeConverter::ConvertIn(const string& input)
+{
+ if (!mIsOpen)
+ throw logic_error(
+ "UnicodeConverter::ConvertIn called "
+ "before UnicodeConverter::Open"
+ );
+
+ char* inputBuf = new char[input.size()];
+ memcpy(inputBuf, input.c_str(), input.size());
+
+ char* outputBuf = new char[input.size() * 4];
+
+ // The following garbage is needed to handle the unfortunate
+ // inconsistency between Linux and BSD definitions for the second
+ // parameter of iconv. BSD (including Mac OS X) requires const char*,
+ // whereas Linux requires char*, and neither option seems to produce
+ // error-free, warning-free compilation on both systems simultaneously.
+#ifdef BLAHTEX_ICONV_CONST
+ const
+#endif
+ char* source = inputBuf;
+ char* dest = outputBuf;
+
+ size_t inBytesLeft = input.size();
+ size_t outBytesLeft = input.size() * 4;
+
+ if (iconv(
+ mInHandle,
+ &source,
+ &inBytesLeft,
+ &dest,
+ &outBytesLeft
+ ) == -1)
+ {
+ delete[] inputBuf;
+ delete[] outputBuf;
+ switch (errno)
+ {
+ case EILSEQ:
+ case EINVAL: throw UnicodeConverter::Exception();
+ default:
+ throw logic_error(
+ "Conversion problem in UnicodeConverter::ConvertIn"
+ );
+ }
+ }
+
+ wstring output(
+ reinterpret_cast<wchar_t*>(outputBuf),
+ input.size() - outBytesLeft / 4
+ );
+ delete[] inputBuf;
+ delete[] outputBuf;
+ return output;
+}
+
+string UnicodeConverter::ConvertOut(const wstring& input)
+{
+ if (!mIsOpen)
+ throw logic_error(
+ "UnicodeConverter::ConvertOut called "
+ "before UnicodeConverter::Open"
+ );
+
+ wchar_t* inputBuf = new wchar_t[input.size()];
+ wmemcpy(inputBuf, input.c_str(), input.size());
+
+ char* outputBuf = new char[input.size() * 4];
+
+#ifdef BLAHTEX_ICONV_CONST
+ const
+#endif
+ char* source = reinterpret_cast<char*>(inputBuf);
+ char* dest = outputBuf;
+
+ size_t inBytesLeft = input.size() * 4;
+ size_t outBytesLeft = input.size() * 4;
+
+ if (iconv(
+ mOutHandle,
+ &source,
+ &inBytesLeft,
+ &dest,
+ &outBytesLeft
+ ) == -1)
+ {
+ delete[] inputBuf;
+ delete[] outputBuf;
+ switch (errno)
+ {
+ case EILSEQ:
+ case EINVAL: throw UnicodeConverter::Exception();
+ default:
+ throw logic_error(
+ "Conversion problem in UnicodeConverter::ConvertIn"
+ );
+ }
+ }
+
+ string output(outputBuf, input.size() * 4 - outBytesLeft);
+ delete[] inputBuf;
+ delete[] outputBuf;
+ return output;
+}
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/UnicodeConverter.h b/blahtexml/source/UnicodeConverter.h
new file mode 100644
index 0000000..261d6d6
--- /dev/null
+++ b/blahtexml/source/UnicodeConverter.h
@@ -0,0 +1,70 @@
+// File "UnicodeConverter.h"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#ifndef BLAHTEX_UNICODE_CONVERTER_H
+#define BLAHTEX_UNICODE_CONVERTER_H
+
+#include <string>
+#include <iconv.h>
+
+// UnicodeConverter handles all UTF8 <=> wchar_t conversions. It's
+// basically a wrapper for the iconv library in terms of
+// - string (assumed to be in UTF-8) and
+// - wstring (in internal wchar_t format, which may be big-endian or
+// little-endian depending on platform).
+class UnicodeConverter
+{
+ public:
+ UnicodeConverter() :
+ mIsOpen(false)
+ { }
+
+ ~UnicodeConverter();
+
+ // Open() must be called before using this object.
+ //
+ // It will throw a std::runtime_error object if
+ // (1) we are running on a platform with less than 4 bytes
+ // per wchar_t, or
+ // (2) an appropriate iconv_t converter object can't be created
+ void Open();
+
+ std::wstring ConvertIn(const std::string& input);
+ std::string ConvertOut(const std::wstring& input);
+
+ // The above 'ConvertIn' and 'ConvertOut' functions will throw this
+ // exception object if their input is invalid (e.g. invalid UTF-8).
+ // More serious problems report a std::logic_error.
+ class Exception
+ {
+ };
+
+ private:
+ bool mIsOpen;
+
+ // mOutHandle is the iconv object handling wchar_t => UTF-8,
+ // mInHandle does the other way.
+ iconv_t mOutHandle;
+ iconv_t mInHandle;
+};
+
+#endif
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/main.cpp b/blahtexml/source/main.cpp
new file mode 100644
index 0000000..a7e22a8
--- /dev/null
+++ b/blahtexml/source/main.cpp
@@ -0,0 +1,694 @@
+// File "main.cpp"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// blahtexml (version 0.5)
+// Copyright (C) 2007-2008, Gilles Van Assche
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include "BlahtexCore/Interface.h"
+#include "UnicodeConverter.h"
+#include "mainPng.h"
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+
+using namespace std;
+using namespace blahtex;
+
+#ifdef BLAHTEXML_USING_XERCES
+#include <iostream>
+#include <string.h>
+#include <xercesc/framework/StdInInputSource.hpp>
+#include <xercesc/framework/XMLFormatter.hpp>
+#include <xercesc/parsers/SAX2XMLFilterImpl.hpp>
+#include <xercesc/sax2/Attributes.hpp>
+#include <xercesc/sax2/SAX2XMLReader.hpp>
+#include <xercesc/sax2/XMLReaderFactory.hpp>
+#include <xercesc/util/OutOfMemoryException.hpp>
+#include <xercesc/util/PlatformUtils.hpp>
+#include <xercesc/util/TransService.hpp>
+#include <xercesc/util/XMLString.hpp>
+#include <xercesc/util/XMLUniDefs.hpp>
+#include "BlahtexXMLin/BlahtexFilter.h"
+#include "BlahtexXMLin/SAX2Output.h"
+#endif
+
+string gBlahtexVersion = "0.5";
+
+// A single global instance of UnicodeConverter.
+UnicodeConverter gUnicodeConverter;
+
+// Imported from Messages.cpp:
+extern wstring GetErrorMessage(const blahtex::Exception& e);
+extern wstring GetErrorMessages();
+
+// FormatError() converts a blahtex Exception object into a string like
+// "<error><id>...</id><arg>...</arg><arg>...</arg> ...
+// <message>...</message></error".
+wstring FormatError(
+ const blahtex::Exception& e,
+ const EncodingOptions& options
+)
+{
+ wstring output = L"<error><id>" + e.GetCode() + L"</id>";
+ for (vector<wstring>::const_iterator
+ arg = e.GetArgs().begin(); arg != e.GetArgs().end(); arg++
+ )
+ output += L"<arg>" + XmlEncode(*arg, options) + L"</arg>";
+
+ output += L"<message>";
+ output += XmlEncode(GetErrorMessage(e), options);
+ output += L"</message>";
+
+ output += L"</error>";
+ return output;
+}
+
+// ShowUsage() prints a help screen.
+void ShowUsage()
+{
+ cout << "\n"
+#ifdef BLAHTEXML_USING_XERCES
+"Blahtexml version " << gBlahtexVersion << "\n"
+#else
+"Blahtex version " << gBlahtexVersion << "\n"
+#endif
+"Copyright (C) 2006, David Harvey\n"
+#ifdef BLAHTEXML_USING_XERCES
+"Copyright (C) 2007-2008, Gilles Van Assche\n"
+#endif
+"\n"
+"This is free software; see the source "
+"for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY "
+"or FITNESS FOR A PARTICULAR PURPOSE.\n"
+"\n"
+#ifdef BLAHTEXML_USING_XERCES
+"Normal mode: blahtexml [ options ] < inputfile > outputfile\n"
+"XML input mode: blahtexml --xmlin [ options ] < inputfile > outputfile\n"
+#else
+"Usage: blahtex [ options ] < inputfile > outputfile\n"
+#endif
+"\n"
+"SUMMARY OF OPTIONS (see manual for details)\n"
+"\n"
+" --texvc-compatible-commands\n"
+"\n"
+" --mathml\n"
+" --indented\n"
+" --spacing { strict | moderate | relaxed }\n"
+" --mathml-version-1-fonts\n"
+" --disallow-plane-1\n"
+" --mathml-encoding { raw | numeric | short | long }\n"
+" --other-encoding { raw | numeric }\n"
+"\n"
+" --png\n"
+" --use-ucs-package\n"
+" --use-cjk-package\n"
+" --use-preview-package\n"
+" --japanese-font fontname\n"
+" --shell-latex command\n"
+" --shell-dvipng command\n"
+" --temp-directory directory\n"
+" --png-directory directory\n"
+"\n"
+" --debug { parse | layout | purified }\n"
+" --keep-temp-files\n"
+" --throw-logic-error\n"
+" --print-error-messages\n"
+"\n"
+#ifdef BLAHTEXML_USING_XERCES
+" --doctype-system DTD\n"
+" --doctype-public PublicID DTD\n"
+" --doctype-xhtml+mathml\n"
+" --mathml-nsprefix-auto\n"
+" --mathml-nsprefix-none\n"
+" --mathml-nsprefix prefix\n"
+"\n"
+"\n"
+#endif
+"More information available at www.blahtex.org\n"
+"\n";
+
+ // FIX: need command line option to select output DPI
+
+ exit(0);
+}
+
+// CommandLineException is used for reporting incorrect command line
+// syntax.
+struct CommandLineException
+{
+ string mMessage;
+
+ CommandLineException(
+ const string& message
+ ) :
+ mMessage(message)
+ { }
+};
+
+// Adds a trailing slash to the string, if it's not already there.
+void AddTrailingSlash(string& s)
+{
+ if (!s.empty() && s[s.size() - 1] != '/')
+ s += '/';
+}
+
+#ifdef BLAHTEXML_USING_XERCES
+SAX2Output::Doctype outputDoctype = SAX2Output::DoctypeNone;
+string outputPublicID;
+string outputDTD;
+BlahtexFilter::PrefixType MathMLPrefixType = BlahtexFilter::PrefixAuto;
+string MathMLPrefix;
+int batchXMLConversion(blahtex::Interface& interface)
+{
+ cerr << "\n"
+ "Blahtexml version " << gBlahtexVersion << "\n"
+ "Copyright (C) 2006, David Harvey\n"
+ "Copyright (C) 2007-2008, Gilles Van Assche\n"
+ "\n"
+ "This is free software; see the source "
+ "for copying conditions. There is NO\n"
+ "warranty; not even for MERCHANTABILITY "
+ "or FITNESS FOR A PARTICULAR PURPOSE.\n";
+ cerr << endl;
+ try {
+ XMLPlatformUtils::Initialize();
+ }
+ catch (const XMLException& toCatch) {
+ XERCES_STD_QUALIFIER cerr << "Error during initialization! :\n" << XMLString::transcode(toCatch.getMessage()) << endl;
+ return 1;
+ }
+ SAX2XMLReader* reader = XMLReaderFactory::createXMLReader();
+ BlahtexFilter* parser = new BlahtexFilter(reader, interface);
+
+ parser->setFeature(XMLUni::fgSAX2CoreValidation, false);
+ parser->setFeature(XMLUni::fgSAX2CoreNameSpaces, true);
+ parser->setFeature(XMLUni::fgXercesSchema, false);
+ parser->setFeature(XMLUni::fgXercesSchemaFullChecking, false);
+ parser->setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, false);
+
+ XercesString _MathMLPrefix(MathMLPrefix.c_str());
+ wstring __MathMLPrefix = _MathMLPrefix.convertTowstring();
+ parser->setDesiredMathMLPrefixType(MathMLPrefixType, __MathMLPrefix);
+
+ int parserErrors = 0;
+ int result = 0;
+ {
+ StdInInputSource stdIn;
+ try {
+ XercesString publicID(outputPublicID.c_str());
+ XercesString DTD(outputDTD.c_str());
+ SAX2Output output(cout, "UTF-8", XMLFormatter::UnRep_CharRef, outputDoctype, publicID, DTD);
+ parser->setContentHandler(&output);
+ parser->setErrorHandler(&output);
+ parser->parse(stdIn);
+ parserErrors = parser->getErrorCount();
+ }
+ catch (const OutOfMemoryException&) {
+ cerr << endl;
+ cerr << "Out of memory exception" << endl;
+ result = 1;
+ }
+ catch (const XMLException& e) {
+ char *errorString = XMLString::transcode(e.getMessage());
+ cerr << endl;
+ cerr << "Error: " << errorString << endl;
+ XMLString::release(&errorString);
+ result = 1;
+ }
+ }
+ if (parserErrors > 0) {
+ cerr << "During the XML input parsing, ";
+ if (parserErrors == 1) cerr << "an error";
+ else cerr << parserErrors << " errors";
+ cerr << " occurred." << endl;
+ }
+ int blahtexErrors = parser->getNumberOfErrors();
+ if (blahtexErrors > 0) {
+ cerr << "During the blahtex conversion, ";
+ if (blahtexErrors == 1) cerr << "an error";
+ else cerr << blahtexErrors << " errors";
+ cerr << " occurred.\nSee the output file for more information." << endl;
+ result = 1;
+ }
+ delete parser;
+ delete reader;
+ XMLPlatformUtils::Terminate();
+ return result;
+}
+#endif
+
+int main (int argc, char* const argv[]) {
+ // This outermost try block catches std::runtime_error
+ // and CommandLineException.
+ try
+ {
+ gUnicodeConverter.Open();
+
+ blahtex::Interface interface;
+
+ bool doPng = false;
+ bool doMathml = false;
+#ifdef BLAHTEXML_USING_XERCES
+ bool doXMLinput = false;
+#endif
+
+ bool debugLayoutTree = false;
+ bool debugParseTree = false;
+ bool debugPurifiedTex = false;
+ bool deleteTempFiles = true;
+
+ string shellLatex = "latex";
+ string shellDvipng = "dvipng";
+ string tempDirectory = "./";
+ string pngDirectory = "./";
+
+ // Process command line arguments
+ for (int i = 1; i < argc; i++)
+ {
+ string arg(argv[i]);
+
+ if (arg == "--help")
+ ShowUsage();
+
+ else if (arg == "--print-error-messages")
+ {
+ cout << gUnicodeConverter.ConvertOut(GetErrorMessages())
+ << endl;
+ return 0;
+ }
+
+ else if (arg == "--throw-logic-error")
+ throw logic_error("Aaarrrgggghhhh!");
+
+ else if (arg == "--shell-latex")
+ {
+ if (++i == argc)
+ throw CommandLineException(
+ "Missing string after \"--shell-latex\""
+ );
+ shellLatex = string(argv[i]);
+ }
+
+ else if (arg == "--shell-dvipng")
+ {
+ if (++i == argc)
+ throw CommandLineException(
+ "Missing string after \"--shell-dvipng\""
+ );
+ shellDvipng = string(argv[i]);
+ }
+
+ else if (arg == "--temp-directory")
+ {
+ if (++i == argc)
+ throw CommandLineException(
+ "Missing string after \"--temp-directory\""
+ );
+ tempDirectory = string(argv[i]);
+ AddTrailingSlash(tempDirectory);
+ }
+
+ else if (arg == "--png-directory")
+ {
+ if (++i == argc)
+ throw CommandLineException(
+ "Missing string after \"--png-directory\""
+ );
+ pngDirectory = string(argv[i]);
+ AddTrailingSlash(pngDirectory);
+ }
+
+ else if (arg == "--use-ucs-package")
+ interface.mPurifiedTexOptions.mAllowUcs = true;
+
+ else if (arg == "--use-cjk-package")
+ interface.mPurifiedTexOptions.mAllowCJK = true;
+
+ else if (arg == "--use-preview-package")
+ interface.mPurifiedTexOptions.mAllowPreview = true;
+
+ else if (arg == "--japanese-font")
+ {
+ if (++i == argc)
+ throw CommandLineException(
+ "Missing string after \"--japanese-font\""
+ );
+ interface.mPurifiedTexOptions.mJapaneseFont =
+ gUnicodeConverter.ConvertIn(string(argv[i]));
+ }
+
+ else if (arg == "--indented")
+ interface.mIndented = true;
+
+ else if (arg == "--spacing")
+ {
+ if (++i == argc)
+ throw CommandLineException(
+ "Missing string after \"--spacing\""
+ );
+ arg = string(argv[i]);
+
+ if (arg == "strict")
+ interface.mMathmlOptions.mSpacingControl
+ = MathmlOptions::cSpacingControlStrict;
+
+ else if (arg == "moderate")
+ interface.mMathmlOptions.mSpacingControl
+ = MathmlOptions::cSpacingControlModerate;
+
+ else if (arg == "relaxed")
+ interface.mMathmlOptions.mSpacingControl
+ = MathmlOptions::cSpacingControlRelaxed;
+
+ else
+ throw CommandLineException(
+ "Illegal string after \"--spacing\""
+ );
+ }
+
+ else if (arg == "--mathml-version-1-fonts")
+ interface.mMathmlOptions.mUseVersion1FontAttributes = true;
+
+ else if (arg == "--texvc-compatible-commands")
+ interface.mTexvcCompatibility = true;
+
+ else if (arg == "--png")
+ doPng = true;
+
+ else if (arg == "--mathml")
+ doMathml = true;
+
+ else if (arg == "--mathml-encoding")
+ {
+ if (++i == argc)
+ throw CommandLineException(
+ "Missing string after \"--mathml-encoding\""
+ );
+ arg = string(argv[i]);
+
+ if (arg == "raw")
+ interface.mEncodingOptions.mMathmlEncoding
+ = EncodingOptions::cMathmlEncodingRaw;
+
+ else if (arg == "numeric")
+ interface.mEncodingOptions.mMathmlEncoding
+ = EncodingOptions::cMathmlEncodingNumeric;
+
+ else if (arg == "short")
+ interface.mEncodingOptions.mMathmlEncoding
+ = EncodingOptions::cMathmlEncodingShort;
+
+ else if (arg == "long")
+ interface.mEncodingOptions.mMathmlEncoding
+ = EncodingOptions::cMathmlEncodingLong;
+
+ else
+ throw CommandLineException(
+ "Illegal string after \"--mathml-encoding\""
+ );
+ }
+
+ else if (arg == "--disallow-plane-1")
+ {
+ interface.mMathmlOptions .mAllowPlane1 = false;
+ interface.mEncodingOptions.mAllowPlane1 = false;
+ }
+
+ else if (arg == "--other-encoding")
+ {
+ if (++i == argc)
+ throw CommandLineException(
+ "Missing string after \"--other-encoding\""
+ );
+ arg = string(argv[i]);
+ if (arg == "raw")
+ interface.mEncodingOptions.mOtherEncodingRaw = true;
+ else if (arg == "numeric")
+ interface.mEncodingOptions.mOtherEncodingRaw = false;
+ else
+ throw CommandLineException(
+ "Illegal string after \"--other-encoding\""
+ );
+ }
+
+ else if (arg == "--debug")
+ {
+ if (++i == argc)
+ throw CommandLineException(
+ "Missing string after \"--debug\""
+ );
+ arg = string(argv[i]);
+ if (arg == "layout")
+ debugLayoutTree = true;
+ else if (arg == "parse")
+ debugParseTree = true;
+ else if (arg == "purified")
+ debugPurifiedTex = true;
+ else
+ throw CommandLineException(
+ "Illegal string after \"--debug\""
+ );
+ }
+
+ else if (arg == "--keep-temp-files")
+ deleteTempFiles = false;
+#ifdef BLAHTEXML_USING_XERCES
+ else if (arg == "--xmlin")
+ doXMLinput = true;
+ else if (arg == "--doctype-system") {
+ outputDoctype = SAX2Output::DoctypeSystem;
+ if (++i == argc) throw CommandLineException("Missing string after \"--doctype-system\"");
+ outputDTD = argv[i];
+ }
+ else if (arg == "--doctype-public") {
+ outputDoctype = SAX2Output::DoctypePublic;
+ if (++i == argc) throw CommandLineException("Missing two strings after \"--doctype-public\"");
+ outputPublicID = argv[i];
+ if (++i == argc) throw CommandLineException("Missing one string after \"--doctype-public\"");
+ outputDTD = argv[i];
+ }
+ else if (arg == "--doctype-xhtml+mathml") {
+ outputDoctype = SAX2Output::DoctypePublic;
+ outputPublicID = "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN";
+ outputDTD = "http://www.w3.org/TR/MathML2/dtd/xhtml-math11-f.dtd";
+ MathMLPrefixType = BlahtexFilter::PrefixNone;
+ }
+ else if (arg == "--mathml-nsprefix-auto")
+ MathMLPrefixType = BlahtexFilter::PrefixAuto;
+ else if (arg == "--mathml-nsprefix-none")
+ MathMLPrefixType = BlahtexFilter::PrefixNone;
+ else if (arg == "--mathml-nsprefix") {
+ MathMLPrefixType = BlahtexFilter::PrefixAdd;
+ if (++i == argc) throw CommandLineException("Missing string after \"--mathml-nsprefix\"");
+ MathMLPrefix = argv[i];
+ }
+#endif
+ else
+ throw CommandLineException(
+ "Unrecognised command line option \"" + arg + "\""
+ );
+ }
+
+ // Finished processing command line, now process the input
+
+#ifdef BLAHTEXML_USING_XERCES
+ if (doXMLinput)
+ return batchXMLConversion(interface);
+#endif
+ if (isatty(0))
+ ShowUsage();
+
+ wostringstream mainOutput;
+
+ try
+ {
+ wstring input;
+
+ // Read input file
+ string inputUtf8;
+ {
+ char c;
+ while (cin.get(c))
+ inputUtf8 += c;
+ }
+
+ // This try block converts UnicodeConverter::Exception into an
+ // input syntax error, i.e. if the user supplies invalid UTF-8.
+ // (Later we treat such exceptions as debug assertions.)
+ try
+ {
+ input = gUnicodeConverter.ConvertIn(inputUtf8);
+ }
+ catch (UnicodeConverter::Exception& e)
+ {
+ throw blahtex::Exception(L"InvalidUtf8Input");
+ }
+
+ // Build the parse and layout trees.
+ interface.ProcessInput(input);
+
+ if (debugParseTree)
+ {
+ mainOutput << L"\n=== BEGIN PARSE TREE ===\n\n";
+ interface.GetManager()->GetParseTree()->Print(mainOutput);
+ mainOutput << L"\n=== END PARSE TREE ===\n\n";
+ }
+
+ if (debugLayoutTree)
+ {
+ mainOutput << L"\n=== BEGIN LAYOUT TREE ===\n\n";
+ wostringstream temp;
+ interface.GetManager()->GetLayoutTree()->Print(temp);
+ mainOutput << XmlEncode(temp.str(), EncodingOptions());
+ mainOutput << L"\n=== END LAYOUT TREE ===\n\n";
+ }
+
+ // Generate purified TeX if required.
+ if (doPng || debugPurifiedTex)
+ {
+ // This stream is where we build the PNG output block:
+ wostringstream pngOutput;
+
+ try
+ {
+ wstring purifiedTex = interface.GetPurifiedTex();
+
+ if (debugPurifiedTex)
+ {
+ pngOutput << L"\n=== BEGIN PURIFIED TEX ===\n\n";
+ pngOutput << purifiedTex;
+ pngOutput << L"\n=== END PURIFIED TEX ===\n\n";
+ }
+
+ // Make the system calls to generate the PNG image
+ // if requested.
+ if (doPng)
+ {
+ PngInfo info = MakePngFile(
+ purifiedTex,
+ tempDirectory,
+ pngDirectory,
+ "",
+ shellLatex,
+ shellDvipng,
+ deleteTempFiles
+ );
+
+ // The height and depth measurements are only
+ // valid if the "preview" package is used:
+ if (interface.mPurifiedTexOptions.mAllowPreview
+ && info.mDimensionsValid
+ )
+ {
+ pngOutput << L"<height>"
+ << info.mHeight << L"</height>\n";
+ pngOutput << L"<depth>"
+ << info.mDepth << L"</depth>\n";
+ }
+
+ pngOutput << L"<md5>"
+ << gUnicodeConverter.ConvertIn(info.mMd5)
+ << L"</md5>\n";
+ }
+ }
+
+ // Catching errors that occurred during PNG generation:
+ catch (blahtex::Exception& e)
+ {
+ pngOutput.str(L"");
+ pngOutput << FormatError(e, interface.mEncodingOptions)
+ << endl;
+ }
+
+ mainOutput << L"<png>\n" << pngOutput.str() << L"</png>\n";
+ }
+
+ // This block generates MathML output if requested.
+ if (doMathml)
+ {
+ // This stream is where we build the MathML output block:
+ wostringstream mathmlOutput;
+
+ try
+ {
+ mathmlOutput << L"<markup>\n";
+ mathmlOutput << interface.GetMathml();
+ if (!interface.mIndented)
+ mathmlOutput << L"\n";
+ mathmlOutput << L"</markup>\n";
+ }
+
+ // Catch errors in generating the MathML:
+ catch (blahtex::Exception& e)
+ {
+ mathmlOutput.str(L"");
+ mathmlOutput
+ << FormatError(e, interface.mEncodingOptions)
+ << endl;
+ }
+
+ mainOutput << L"<mathml>\n" << mathmlOutput.str()
+ << L"</mathml>\n";
+ }
+ }
+
+ // This catches input syntax errors.
+ catch (blahtex::Exception& e)
+ {
+ mainOutput.str(L"");
+ mainOutput << FormatError(e, interface.mEncodingOptions)
+ << endl;
+ }
+
+ cout << "<blahtex>\n"
+ << gUnicodeConverter.ConvertOut(mainOutput.str())
+ << "</blahtex>\n";
+ }
+
+ // The following errors might occur if there's a bug in blahtex that
+ // some assertion condition picked up. We still want to report these
+ // nicely to the user so that they can notify the developers.
+ catch (std::logic_error& e)
+ {
+ // WARNING: this doesn't XML-encode the message
+ // (We don't expect to the message to contain the characters &<>)
+ cout << "<blahtex>\n<logicError>" << e.what()
+ << "</logicError>\n</blahtex>\n";
+ }
+
+ // These indicate incorrect command line syntax:
+ catch (CommandLineException& e)
+ {
+ cout << "blahtex: " << e.mMessage << " (try \"blahtex --help\")\n";
+ }
+
+ // These kind of errors should only occur if the program has been
+ // installed incorrectly.
+ catch (std::runtime_error& e)
+ {
+ cout << "blahtex runtime error: " << e.what() << endl;
+ }
+
+ return 0;
+}
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/mainPng.cpp b/blahtexml/source/mainPng.cpp
new file mode 100644
index 0000000..d6cf072
--- /dev/null
+++ b/blahtexml/source/mainPng.cpp
@@ -0,0 +1,213 @@
+// File "mainPng.cpp"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include "BlahtexCore/Misc.h"
+#include "UnicodeConverter.h"
+#include "md5Wrapper.h"
+#include "mainPng.h"
+#include <cerrno>
+#include <sys/stat.h>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+
+
+using namespace std;
+using namespace blahtex;
+
+
+// From main.cpp:
+extern UnicodeConverter gUnicodeConverter;
+
+
+// TemporaryFile manages a temporary file; it deletes the named file when
+// the object goes out of scope.
+class TemporaryFile
+{
+ string mFilename;
+
+ // This flag might get set to false if we are in some kind of
+ // debugging mode and want to keep temp files.
+ bool mShouldDelete;
+
+public:
+ TemporaryFile(
+ const string& filename,
+ bool shouldDelete = true
+ ) :
+ mFilename(filename),
+ mShouldDelete(shouldDelete)
+ { }
+
+ ~TemporaryFile()
+ {
+ if (mShouldDelete)
+ unlink(mFilename.c_str());
+ }
+};
+
+
+// Tests whether a file exists
+bool FileExists(const string& filename)
+{
+ struct stat temp;
+ return (stat(filename.c_str(), &temp) == 0);
+}
+
+
+// Attempts to run given command from the given directory.
+// Returns true if the system() call was successful, otherwise false.
+// Can throw a "CannotChangeDirectory" exception if problems occur.
+bool Execute(
+ const string& command,
+ const string& directory = "./"
+)
+{
+ char buffer[5000];
+
+ bool NeedToChange = (directory != "" && directory != "./");
+
+ if (NeedToChange)
+ {
+ if (getcwd(buffer, 5000) == NULL)
+ throw blahtex::Exception(L"CannotChangeDirectory");
+
+ if (chdir(directory.c_str()) != 0)
+ throw blahtex::Exception(L"CannotChangeDirectory");
+ }
+
+ bool result = (system(command.c_str()) == 0);
+
+ if (NeedToChange)
+ {
+ if (chdir(buffer) != 0)
+ throw blahtex::Exception(L"CannotChangeDirectory");
+ }
+
+ return result;
+}
+
+
+PngInfo MakePngFile(
+ const wstring& purifiedTex,
+ const string& tempDirectory,
+ const string& pngDirectory,
+ const string& pngFilename,
+ const string& shellLatex,
+ const string& shellDvipng,
+ bool deleteTempFiles
+)
+{
+ PngInfo info;
+
+ string purifiedTexUtf8 = gUnicodeConverter.ConvertOut(purifiedTex);
+
+ // This md5 is used for the temp filenames.
+ string md5 = ComputeMd5(purifiedTexUtf8);
+
+ string pngActualFilename =
+ pngFilename.empty() ? (md5 + ".png") : pngFilename;
+
+ // Send output to tex file.
+ {
+ ofstream texFile(
+ (tempDirectory + md5 + ".tex").c_str(),
+ ios::out | ios::binary
+ );
+ if (!texFile)
+ throw blahtex::Exception(
+ L"CannotCreateTexFile"
+ );
+ texFile << purifiedTexUtf8;
+ if (!texFile)
+ throw blahtex::Exception(
+ L"CannotWriteTexFile"
+ );
+ }
+
+ // These are temporary files we want deleted when we're done.
+ TemporaryFile texTemp(tempDirectory + md5 + ".tex", deleteTempFiles);
+ TemporaryFile auxTemp(tempDirectory + md5 + ".aux", deleteTempFiles);
+ TemporaryFile logTemp(tempDirectory + md5 + ".log", deleteTempFiles);
+ TemporaryFile dviTemp(tempDirectory + md5 + ".dvi", deleteTempFiles);
+ TemporaryFile dataTemp(tempDirectory + md5 + ".data", deleteTempFiles);
+
+
+ if (!Execute(
+ shellLatex + " " + md5 + ".tex >/dev/null 2>/dev/null",
+ tempDirectory
+ )
+ ||
+ !FileExists(tempDirectory + md5 + ".dvi")
+ )
+ throw blahtex::Exception(L"CannotRunLatex");
+
+
+ if (!Execute(
+ shellDvipng + " " + md5 + ".dvi " +
+ "--picky --bg Transparent --gamma 1.3 -D 120 -q -T tight " +
+ "--height --depth " +
+ "-o \"" + pngActualFilename +
+ "\" > " + md5 + ".data 2>/dev/null",
+ tempDirectory
+ )
+ ||
+ !FileExists(tempDirectory + pngActualFilename)
+ )
+ throw blahtex::Exception(L"CannotRunDvipng");
+
+ if (rename(
+ (tempDirectory + pngActualFilename).c_str(),
+ (pngDirectory + pngActualFilename).c_str()
+ ))
+ throw blahtex::Exception(L"CannotWritePngDirectory");
+
+
+ // Read the height and depth of the image from dvipng's output.
+ {
+ ifstream dataFile(
+ (tempDirectory + md5 + ".data").c_str(),
+ ios::in | ios::binary
+ );
+
+ if (dataFile)
+ {
+ string line, temp;
+ while (getline(dataFile, line))
+ {
+ string::size_type heightPos = line.find("height=");
+ string::size_type depthPos = line.find("depth=");
+ if (heightPos != string::npos && depthPos != string::npos)
+ {
+ info.mDimensionsValid = true;
+ temp = line.substr(heightPos + 7, 1000);
+ istringstream(temp) >> info.mHeight;
+ string temp = line.substr(depthPos + 6, 1000);
+ istringstream(temp) >> info.mDepth;
+ }
+ }
+ }
+ }
+
+ info.mMd5 = md5;
+ return info;
+}
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/mainPng.h b/blahtexml/source/mainPng.h
new file mode 100644
index 0000000..3c79ffe
--- /dev/null
+++ b/blahtexml/source/mainPng.h
@@ -0,0 +1,62 @@
+// File "mainPng.h"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#ifndef BLAHTEX_MAINPNG_H
+#define BLAHTEX_MAINPNG_H
+
+#include <string>
+
+// Records information about a PNG file generated by MakePngFile.
+struct PngInfo
+{
+ // The PNG is stored in md5.png.
+ std::string mMd5;
+
+ // These are the height and depth reported by dvipng.
+ // They are only valid if mDimensionsValid is set.
+ bool mDimensionsValid;
+ int mHeight;
+ int mDepth;
+
+ PngInfo() :
+ mDimensionsValid(false)
+ { }
+};
+
+// Generates a PNG file. Uses tempDirectory for storage of temporary files
+// (.tex, .dvi, .log, .data). Expects tempDirectory and pngDirectory to
+// include a terminating slash. The output file will be stored in the
+// directory pngDirectory in the file pngFilename; if pngFilename is an
+// empty string, MakePngFile will just use the md5 that it computes (which
+// gets returned in PngInfo).
+extern PngInfo MakePngFile(
+ const std::wstring& purifiedTex,
+ const std::string& tempDirectory,
+ const std::string& pngDirectory,
+ const std::string& pngFilename,
+ const std::string& shellLatex,
+ const std::string& shellDvipng,
+ bool deleteTempFiles
+);
+
+
+#endif
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/md5.c b/blahtexml/source/md5.c
new file mode 100644
index 0000000..faeca9d
--- /dev/null
+++ b/blahtexml/source/md5.c
@@ -0,0 +1,381 @@
+/*
+ Copyright (C) 1999, 2000, 2002 Aladdin Enterprises. All rights reserved.
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+
+ L. Peter Deutsch
+ ghost@aladdin.com
+
+ */
+/* $Id: md5.c,v 1.1.1.1 2007/12/23 13:02:11 gvanas Exp $ */
+/*
+ Independent implementation of MD5 (RFC 1321).
+
+ This code implements the MD5 Algorithm defined in RFC 1321, whose
+ text is available at
+ http://www.ietf.org/rfc/rfc1321.txt
+ The code is derived from the text of the RFC, including the test suite
+ (section A.5) but excluding the rest of Appendix A. It does not include
+ any code or documentation that is identified in the RFC as being
+ copyrighted.
+
+ The original and principal author of md5.c is L. Peter Deutsch
+ <ghost@aladdin.com>. Other authors are noted in the change history
+ that follows (in reverse chronological order):
+
+ 2002-04-13 lpd Clarified derivation from RFC 1321; now handles byte order
+ either statically or dynamically; added missing #include <string.h>
+ in library.
+ 2002-03-11 lpd Corrected argument list for main(), and added int return
+ type, in test program and T value program.
+ 2002-02-21 lpd Added missing #include <stdio.h> in test program.
+ 2000-07-03 lpd Patched to eliminate warnings about "constant is
+ unsigned in ANSI C, signed in traditional"; made test program
+ self-checking.
+ 1999-11-04 lpd Edited comments slightly for automatic TOC extraction.
+ 1999-10-18 lpd Fixed typo in header comment (ansi2knr rather than md5).
+ 1999-05-03 lpd Original version.
+ */
+
+#include "md5.h"
+#include <string.h>
+
+#undef BYTE_ORDER /* 1 = big-endian, -1 = little-endian, 0 = unknown */
+#ifdef ARCH_IS_BIG_ENDIAN
+# define BYTE_ORDER (ARCH_IS_BIG_ENDIAN ? 1 : -1)
+#else
+# define BYTE_ORDER 0
+#endif
+
+#define T_MASK ((md5_word_t)~0)
+#define T1 /* 0xd76aa478 */ (T_MASK ^ 0x28955b87)
+#define T2 /* 0xe8c7b756 */ (T_MASK ^ 0x173848a9)
+#define T3 0x242070db
+#define T4 /* 0xc1bdceee */ (T_MASK ^ 0x3e423111)
+#define T5 /* 0xf57c0faf */ (T_MASK ^ 0x0a83f050)
+#define T6 0x4787c62a
+#define T7 /* 0xa8304613 */ (T_MASK ^ 0x57cfb9ec)
+#define T8 /* 0xfd469501 */ (T_MASK ^ 0x02b96afe)
+#define T9 0x698098d8
+#define T10 /* 0x8b44f7af */ (T_MASK ^ 0x74bb0850)
+#define T11 /* 0xffff5bb1 */ (T_MASK ^ 0x0000a44e)
+#define T12 /* 0x895cd7be */ (T_MASK ^ 0x76a32841)
+#define T13 0x6b901122
+#define T14 /* 0xfd987193 */ (T_MASK ^ 0x02678e6c)
+#define T15 /* 0xa679438e */ (T_MASK ^ 0x5986bc71)
+#define T16 0x49b40821
+#define T17 /* 0xf61e2562 */ (T_MASK ^ 0x09e1da9d)
+#define T18 /* 0xc040b340 */ (T_MASK ^ 0x3fbf4cbf)
+#define T19 0x265e5a51
+#define T20 /* 0xe9b6c7aa */ (T_MASK ^ 0x16493855)
+#define T21 /* 0xd62f105d */ (T_MASK ^ 0x29d0efa2)
+#define T22 0x02441453
+#define T23 /* 0xd8a1e681 */ (T_MASK ^ 0x275e197e)
+#define T24 /* 0xe7d3fbc8 */ (T_MASK ^ 0x182c0437)
+#define T25 0x21e1cde6
+#define T26 /* 0xc33707d6 */ (T_MASK ^ 0x3cc8f829)
+#define T27 /* 0xf4d50d87 */ (T_MASK ^ 0x0b2af278)
+#define T28 0x455a14ed
+#define T29 /* 0xa9e3e905 */ (T_MASK ^ 0x561c16fa)
+#define T30 /* 0xfcefa3f8 */ (T_MASK ^ 0x03105c07)
+#define T31 0x676f02d9
+#define T32 /* 0x8d2a4c8a */ (T_MASK ^ 0x72d5b375)
+#define T33 /* 0xfffa3942 */ (T_MASK ^ 0x0005c6bd)
+#define T34 /* 0x8771f681 */ (T_MASK ^ 0x788e097e)
+#define T35 0x6d9d6122
+#define T36 /* 0xfde5380c */ (T_MASK ^ 0x021ac7f3)
+#define T37 /* 0xa4beea44 */ (T_MASK ^ 0x5b4115bb)
+#define T38 0x4bdecfa9
+#define T39 /* 0xf6bb4b60 */ (T_MASK ^ 0x0944b49f)
+#define T40 /* 0xbebfbc70 */ (T_MASK ^ 0x4140438f)
+#define T41 0x289b7ec6
+#define T42 /* 0xeaa127fa */ (T_MASK ^ 0x155ed805)
+#define T43 /* 0xd4ef3085 */ (T_MASK ^ 0x2b10cf7a)
+#define T44 0x04881d05
+#define T45 /* 0xd9d4d039 */ (T_MASK ^ 0x262b2fc6)
+#define T46 /* 0xe6db99e5 */ (T_MASK ^ 0x1924661a)
+#define T47 0x1fa27cf8
+#define T48 /* 0xc4ac5665 */ (T_MASK ^ 0x3b53a99a)
+#define T49 /* 0xf4292244 */ (T_MASK ^ 0x0bd6ddbb)
+#define T50 0x432aff97
+#define T51 /* 0xab9423a7 */ (T_MASK ^ 0x546bdc58)
+#define T52 /* 0xfc93a039 */ (T_MASK ^ 0x036c5fc6)
+#define T53 0x655b59c3
+#define T54 /* 0x8f0ccc92 */ (T_MASK ^ 0x70f3336d)
+#define T55 /* 0xffeff47d */ (T_MASK ^ 0x00100b82)
+#define T56 /* 0x85845dd1 */ (T_MASK ^ 0x7a7ba22e)
+#define T57 0x6fa87e4f
+#define T58 /* 0xfe2ce6e0 */ (T_MASK ^ 0x01d3191f)
+#define T59 /* 0xa3014314 */ (T_MASK ^ 0x5cfebceb)
+#define T60 0x4e0811a1
+#define T61 /* 0xf7537e82 */ (T_MASK ^ 0x08ac817d)
+#define T62 /* 0xbd3af235 */ (T_MASK ^ 0x42c50dca)
+#define T63 0x2ad7d2bb
+#define T64 /* 0xeb86d391 */ (T_MASK ^ 0x14792c6e)
+
+
+static void
+md5_process(md5_state_t *pms, const md5_byte_t *data /*[64]*/)
+{
+ md5_word_t
+ a = pms->abcd[0], b = pms->abcd[1],
+ c = pms->abcd[2], d = pms->abcd[3];
+ md5_word_t t;
+#if BYTE_ORDER > 0
+ /* Define storage only for big-endian CPUs. */
+ md5_word_t X[16];
+#else
+ /* Define storage for little-endian or both types of CPUs. */
+ md5_word_t xbuf[16];
+ const md5_word_t *X;
+#endif
+
+ {
+#if BYTE_ORDER == 0
+ /*
+ * Determine dynamically whether this is a big-endian or
+ * little-endian machine, since we can use a more efficient
+ * algorithm on the latter.
+ */
+ static const int w = 1;
+
+ if (*((const md5_byte_t *)&w)) /* dynamic little-endian */
+#endif
+#if BYTE_ORDER <= 0 /* little-endian */
+ {
+ /*
+ * On little-endian machines, we can process properly aligned
+ * data without copying it.
+ */
+ if (!((data - (const md5_byte_t *)0) & 3)) {
+ /* data are properly aligned */
+ X = (const md5_word_t *)data;
+ } else {
+ /* not aligned */
+ memcpy(xbuf, data, 64);
+ X = xbuf;
+ }
+ }
+#endif
+#if BYTE_ORDER == 0
+ else /* dynamic big-endian */
+#endif
+#if BYTE_ORDER >= 0 /* big-endian */
+ {
+ /*
+ * On big-endian machines, we must arrange the bytes in the
+ * right order.
+ */
+ const md5_byte_t *xp = data;
+ int i;
+
+# if BYTE_ORDER == 0
+ X = xbuf; /* (dynamic only) */
+# else
+# define xbuf X /* (static only) */
+# endif
+ for (i = 0; i < 16; ++i, xp += 4)
+ xbuf[i] = xp[0] + (xp[1] << 8) + (xp[2] << 16) + (xp[3] << 24);
+ }
+#endif
+ }
+
+#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
+
+ /* Round 1. */
+ /* Let [abcd k s i] denote the operation
+ a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */
+#define F(x, y, z) (((x) & (y)) | (~(x) & (z)))
+#define SET(a, b, c, d, k, s, Ti)\
+ t = a + F(b,c,d) + X[k] + Ti;\
+ a = ROTATE_LEFT(t, s) + b
+ /* Do the following 16 operations. */
+ SET(a, b, c, d, 0, 7, T1);
+ SET(d, a, b, c, 1, 12, T2);
+ SET(c, d, a, b, 2, 17, T3);
+ SET(b, c, d, a, 3, 22, T4);
+ SET(a, b, c, d, 4, 7, T5);
+ SET(d, a, b, c, 5, 12, T6);
+ SET(c, d, a, b, 6, 17, T7);
+ SET(b, c, d, a, 7, 22, T8);
+ SET(a, b, c, d, 8, 7, T9);
+ SET(d, a, b, c, 9, 12, T10);
+ SET(c, d, a, b, 10, 17, T11);
+ SET(b, c, d, a, 11, 22, T12);
+ SET(a, b, c, d, 12, 7, T13);
+ SET(d, a, b, c, 13, 12, T14);
+ SET(c, d, a, b, 14, 17, T15);
+ SET(b, c, d, a, 15, 22, T16);
+#undef SET
+
+ /* Round 2. */
+ /* Let [abcd k s i] denote the operation
+ a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s). */
+#define G(x, y, z) (((x) & (z)) | ((y) & ~(z)))
+#define SET(a, b, c, d, k, s, Ti)\
+ t = a + G(b,c,d) + X[k] + Ti;\
+ a = ROTATE_LEFT(t, s) + b
+ /* Do the following 16 operations. */
+ SET(a, b, c, d, 1, 5, T17);
+ SET(d, a, b, c, 6, 9, T18);
+ SET(c, d, a, b, 11, 14, T19);
+ SET(b, c, d, a, 0, 20, T20);
+ SET(a, b, c, d, 5, 5, T21);
+ SET(d, a, b, c, 10, 9, T22);
+ SET(c, d, a, b, 15, 14, T23);
+ SET(b, c, d, a, 4, 20, T24);
+ SET(a, b, c, d, 9, 5, T25);
+ SET(d, a, b, c, 14, 9, T26);
+ SET(c, d, a, b, 3, 14, T27);
+ SET(b, c, d, a, 8, 20, T28);
+ SET(a, b, c, d, 13, 5, T29);
+ SET(d, a, b, c, 2, 9, T30);
+ SET(c, d, a, b, 7, 14, T31);
+ SET(b, c, d, a, 12, 20, T32);
+#undef SET
+
+ /* Round 3. */
+ /* Let [abcd k s t] denote the operation
+ a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s). */
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+#define SET(a, b, c, d, k, s, Ti)\
+ t = a + H(b,c,d) + X[k] + Ti;\
+ a = ROTATE_LEFT(t, s) + b
+ /* Do the following 16 operations. */
+ SET(a, b, c, d, 5, 4, T33);
+ SET(d, a, b, c, 8, 11, T34);
+ SET(c, d, a, b, 11, 16, T35);
+ SET(b, c, d, a, 14, 23, T36);
+ SET(a, b, c, d, 1, 4, T37);
+ SET(d, a, b, c, 4, 11, T38);
+ SET(c, d, a, b, 7, 16, T39);
+ SET(b, c, d, a, 10, 23, T40);
+ SET(a, b, c, d, 13, 4, T41);
+ SET(d, a, b, c, 0, 11, T42);
+ SET(c, d, a, b, 3, 16, T43);
+ SET(b, c, d, a, 6, 23, T44);
+ SET(a, b, c, d, 9, 4, T45);
+ SET(d, a, b, c, 12, 11, T46);
+ SET(c, d, a, b, 15, 16, T47);
+ SET(b, c, d, a, 2, 23, T48);
+#undef SET
+
+ /* Round 4. */
+ /* Let [abcd k s t] denote the operation
+ a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s). */
+#define I(x, y, z) ((y) ^ ((x) | ~(z)))
+#define SET(a, b, c, d, k, s, Ti)\
+ t = a + I(b,c,d) + X[k] + Ti;\
+ a = ROTATE_LEFT(t, s) + b
+ /* Do the following 16 operations. */
+ SET(a, b, c, d, 0, 6, T49);
+ SET(d, a, b, c, 7, 10, T50);
+ SET(c, d, a, b, 14, 15, T51);
+ SET(b, c, d, a, 5, 21, T52);
+ SET(a, b, c, d, 12, 6, T53);
+ SET(d, a, b, c, 3, 10, T54);
+ SET(c, d, a, b, 10, 15, T55);
+ SET(b, c, d, a, 1, 21, T56);
+ SET(a, b, c, d, 8, 6, T57);
+ SET(d, a, b, c, 15, 10, T58);
+ SET(c, d, a, b, 6, 15, T59);
+ SET(b, c, d, a, 13, 21, T60);
+ SET(a, b, c, d, 4, 6, T61);
+ SET(d, a, b, c, 11, 10, T62);
+ SET(c, d, a, b, 2, 15, T63);
+ SET(b, c, d, a, 9, 21, T64);
+#undef SET
+
+ /* Then perform the following additions. (That is increment each
+ of the four registers by the value it had before this block
+ was started.) */
+ pms->abcd[0] += a;
+ pms->abcd[1] += b;
+ pms->abcd[2] += c;
+ pms->abcd[3] += d;
+}
+
+void
+md5_init(md5_state_t *pms)
+{
+ pms->count[0] = pms->count[1] = 0;
+ pms->abcd[0] = 0x67452301;
+ pms->abcd[1] = /*0xefcdab89*/ T_MASK ^ 0x10325476;
+ pms->abcd[2] = /*0x98badcfe*/ T_MASK ^ 0x67452301;
+ pms->abcd[3] = 0x10325476;
+}
+
+void
+md5_append(md5_state_t *pms, const md5_byte_t *data, int nbytes)
+{
+ const md5_byte_t *p = data;
+ int left = nbytes;
+ int offset = (pms->count[0] >> 3) & 63;
+ md5_word_t nbits = (md5_word_t)(nbytes << 3);
+
+ if (nbytes <= 0)
+ return;
+
+ /* Update the message length. */
+ pms->count[1] += nbytes >> 29;
+ pms->count[0] += nbits;
+ if (pms->count[0] < nbits)
+ pms->count[1]++;
+
+ /* Process an initial partial block. */
+ if (offset) {
+ int copy = (offset + nbytes > 64 ? 64 - offset : nbytes);
+
+ memcpy(pms->buf + offset, p, copy);
+ if (offset + copy < 64)
+ return;
+ p += copy;
+ left -= copy;
+ md5_process(pms, pms->buf);
+ }
+
+ /* Process full blocks. */
+ for (; left >= 64; p += 64, left -= 64)
+ md5_process(pms, p);
+
+ /* Process a final partial block. */
+ if (left)
+ memcpy(pms->buf, p, left);
+}
+
+void
+md5_finish(md5_state_t *pms, md5_byte_t digest[16])
+{
+ static const md5_byte_t pad[64] = {
+ 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
+ md5_byte_t data[8];
+ int i;
+
+ /* Save the length before padding. */
+ for (i = 0; i < 8; ++i)
+ data[i] = (md5_byte_t)(pms->count[i >> 2] >> ((i & 3) << 3));
+ /* Pad to 56 bytes mod 64. */
+ md5_append(pms, pad, ((55 - (pms->count[0] >> 3)) & 63) + 1);
+ /* Append the length. */
+ md5_append(pms, data, 8);
+ for (i = 0; i < 16; ++i)
+ digest[i] = (md5_byte_t)(pms->abcd[i >> 2] >> ((i & 3) << 3));
+}
diff --git a/blahtexml/source/md5.h b/blahtexml/source/md5.h
new file mode 100644
index 0000000..fd6a32d
--- /dev/null
+++ b/blahtexml/source/md5.h
@@ -0,0 +1,91 @@
+/*
+ Copyright (C) 1999, 2002 Aladdin Enterprises. All rights reserved.
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+
+ L. Peter Deutsch
+ ghost@aladdin.com
+
+ */
+/* $Id: md5.h,v 1.1.1.1 2007/12/23 13:02:11 gvanas Exp $ */
+/*
+ Independent implementation of MD5 (RFC 1321).
+
+ This code implements the MD5 Algorithm defined in RFC 1321, whose
+ text is available at
+ http://www.ietf.org/rfc/rfc1321.txt
+ The code is derived from the text of the RFC, including the test suite
+ (section A.5) but excluding the rest of Appendix A. It does not include
+ any code or documentation that is identified in the RFC as being
+ copyrighted.
+
+ The original and principal author of md5.h is L. Peter Deutsch
+ <ghost@aladdin.com>. Other authors are noted in the change history
+ that follows (in reverse chronological order):
+
+ 2002-04-13 lpd Removed support for non-ANSI compilers; removed
+ references to Ghostscript; clarified derivation from RFC 1321;
+ now handles byte order either statically or dynamically.
+ 1999-11-04 lpd Edited comments slightly for automatic TOC extraction.
+ 1999-10-18 lpd Fixed typo in header comment (ansi2knr rather than md5);
+ added conditionalization for C++ compilation from Martin
+ Purschke <purschke@bnl.gov>.
+ 1999-05-03 lpd Original version.
+ */
+
+#ifndef md5_INCLUDED
+# define md5_INCLUDED
+
+/*
+ * This package supports both compile-time and run-time determination of CPU
+ * byte order. If ARCH_IS_BIG_ENDIAN is defined as 0, the code will be
+ * compiled to run only on little-endian CPUs; if ARCH_IS_BIG_ENDIAN is
+ * defined as non-zero, the code will be compiled to run only on big-endian
+ * CPUs; if ARCH_IS_BIG_ENDIAN is not defined, the code will be compiled to
+ * run on either big- or little-endian CPUs, but will run slightly less
+ * efficiently on either one than if ARCH_IS_BIG_ENDIAN is defined.
+ */
+
+typedef unsigned char md5_byte_t; /* 8-bit byte */
+typedef unsigned int md5_word_t; /* 32-bit word */
+
+/* Define the state of the MD5 Algorithm. */
+typedef struct md5_state_s {
+ md5_word_t count[2]; /* message length in bits, lsw first */
+ md5_word_t abcd[4]; /* digest buffer */
+ md5_byte_t buf[64]; /* accumulate block */
+} md5_state_t;
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/* Initialize the algorithm. */
+void md5_init(md5_state_t *pms);
+
+/* Append a string to the message. */
+void md5_append(md5_state_t *pms, const md5_byte_t *data, int nbytes);
+
+/* Finish the message and return the digest. */
+void md5_finish(md5_state_t *pms, md5_byte_t digest[16]);
+
+#ifdef __cplusplus
+} /* end extern "C" */
+#endif
+
+#endif /* md5_INCLUDED */
diff --git a/blahtexml/source/md5Wrapper.cpp b/blahtexml/source/md5Wrapper.cpp
new file mode 100644
index 0000000..2a54670
--- /dev/null
+++ b/blahtexml/source/md5Wrapper.cpp
@@ -0,0 +1,53 @@
+// File "md5Wrapper.cpp"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include "md5.h"
+#include <sstream>
+#include <iomanip>
+
+using namespace std;
+
+string ComputeMd5(const string& input)
+{
+ md5_state_s state;
+ unsigned char buf[16];
+
+ md5_init(&state);
+
+ md5_append(
+ &state,
+ reinterpret_cast<const md5_byte_t*>(input.c_str()),
+ input.size()
+ );
+
+ md5_finish(
+ &state,
+ reinterpret_cast<md5_byte_t*>(buf)
+ );
+
+ ostringstream result;
+ result << hex << setfill('0');
+ for (int i = 0; i < 16; i++)
+ result << setw(2) << static_cast<unsigned int>(buf[i]);
+
+ return result.str();
+}
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/blahtexml/source/md5Wrapper.h b/blahtexml/source/md5Wrapper.h
new file mode 100644
index 0000000..b959f98
--- /dev/null
+++ b/blahtexml/source/md5Wrapper.h
@@ -0,0 +1,36 @@
+// File "md5Wrapper.h"
+//
+// blahtex (version 0.4.4)
+// a TeX to MathML converter designed with MediaWiki in mind
+// Copyright (C) 2006, David Harvey
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+#ifndef BLAHTEX_MD5_WRAPPER_H
+#define BLAHTEX_MD5_WRAPPER_H
+
+#include <string>
+
+// This is a C++ wrapper for the md5 code in md5.c/md5.h.
+//
+// Thanks to the authors of md5.c/m5.h for making their code freely
+// available.
+//
+// Returns the md5 as a 32-character string.
+extern std::string ComputeMd5(const std::string& input);
+
+#endif
+
+// end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@