Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/mwlib
diff options
context:
space:
mode:
authorWade Brainerd <wadetb@gmail.com>2008-05-23 22:58:23 (GMT)
committer Wade Brainerd <wadetb@gmail.com>2008-05-23 22:58:23 (GMT)
commitdd58bf72d6799438d8033cf7de6bc26a711734c3 (patch)
tree12c5c75e465b5c7889857116839f4dcd215fc230 /mwlib
parent02242194e156b00cef18506ab37d4a51ba36ac57 (diff)
Rename step 1. Breaking the renames up because Git fails to recognize them when too many are done at once.
Diffstat (limited to 'mwlib')
-rwxr-xr-xmwlib/EasyTimeline.pl4718
-rw-r--r--mwlib/Makefile20
-rwxr-xr-xmwlib/__init__.py6
-rw-r--r--mwlib/_expander.cc826
-rw-r--r--mwlib/_expander.re206
-rwxr-xr-xmwlib/_expander.sobin0 -> 74041 bytes
-rw-r--r--mwlib/_mwscan.cc1699
-rw-r--r--mwlib/_mwscan.re327
-rwxr-xr-xmwlib/_mwscan.sobin0 -> 82978 bytes
-rw-r--r--mwlib/_version.py9
-rw-r--r--mwlib/advtree.py545
-rw-r--r--mwlib/allnodes.py27
-rw-r--r--mwlib/apps.py378
-rwxr-xr-xmwlib/caller.py20
-rwxr-xr-xmwlib/cdb.py262
-rwxr-xr-xmwlib/cdbwiki.py243
-rw-r--r--mwlib/dummydb.py10
-rwxr-xr-xmwlib/expander.py553
-rwxr-xr-xmwlib/expr.py222
-rwxr-xr-xmwlib/htmlwriter.py436
-rwxr-xr-xmwlib/imgmap.py122
-rwxr-xr-xmwlib/lang.py10
-rw-r--r--mwlib/lang.txt253
-rw-r--r--mwlib/licenses.py185
-rwxr-xr-xmwlib/log.py53
-rwxr-xr-xmwlib/magics.py469
-rwxr-xr-xmwlib/metabook.py119
-rw-r--r--mwlib/mwapidb.py376
-rwxr-xr-xmwlib/mwscan.py315
-rwxr-xr-xmwlib/netdb.py529
-rw-r--r--mwlib/overlay.py22
-rwxr-xr-xmwlib/parser.py1416
-rwxr-xr-xmwlib/recorddb.py83
-rwxr-xr-xmwlib/rendermath.py144
-rw-r--r--mwlib/resources/__init__.py0
-rw-r--r--mwlib/resources/__init__.pycbin0 -> 144 bytes
-rw-r--r--mwlib/resources/outgoing_link.gifbin0 -> 99 bytes
-rw-r--r--mwlib/resources/pedia.css1250
-rw-r--r--mwlib/sanitychecker.py205
-rwxr-xr-xmwlib/scanfile.py29
-rwxr-xr-xmwlib/scanner.py6
-rwxr-xr-xmwlib/texmap.py95
-rwxr-xr-xmwlib/timeline.py52
-rwxr-xr-xmwlib/uparser.py126
-rw-r--r--mwlib/utils.py112
-rwxr-xr-xmwlib/web.py122
-rwxr-xr-xmwlib/wiki.py135
-rwxr-xr-xmwlib/zipwiki.py167
48 files changed, 16902 insertions, 0 deletions
diff --git a/mwlib/EasyTimeline.pl b/mwlib/EasyTimeline.pl
new file mode 100755
index 0000000..6486224
--- /dev/null
+++ b/mwlib/EasyTimeline.pl
@@ -0,0 +1,4718 @@
+#!/usr/bin/env perl
+
+# Copyright (C) 2004 Erik Zachte , email xxx\@chello.nl (nospam: xxx=epzachte)
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License version 2
+# as published by the Free Software Foundation.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the GNU General Public License for more details, at
+# http://www.fsf.org/licenses/gpl.html
+
+# history:
+# 1.5 May 27 2004 :
+# - when a chart contains only one bar this bar was always centered in the image
+# now AlignBars works well in this case aslo ("justify" treated as "center")
+# - interwiki links reinstalled e.g. [[de:Gorbachev]]
+# - error msgs corrected
+# - minimum image size fixed
+# - line numbering adapted <timeline>spaces<br> does not count as line one in Wikipedia
+# - line breaks in wiki links parsed correctly [[Vladimir~Ilyich~Lenin]]
+# - partial url shown as hint for external link (in GIF/PNG)
+# - BarData: no attribute 'text:..' supplied -> default to space = show no text on axis
+# - PlotData: new attribute 'anchor:..'
+# - revert html encoding of '<' & '>' by MediaWiki
+
+# 1.6 May 28 2004 :
+# - SVG decode special chars in SVG input fixed
+# - BarData: new attributes 'barset:..' and 'barcount:..' # autoincrement bar id
+# - PlotData: new attribute 'barset:..'
+# - LineData: new attribute 'layer:..', draw lines to back or front of bars and texts
+
+# 1.7
+# - EscapeShellArg (Tim Starling)
+
+# 1.8 June .. 2004 :
+# - optional autosizing of image (implied when auto incrementing bar count (also new))
+# - presentation left-right order of bars reversed on TimeAxis = orientation:vertical
+# - TimeAxis option 'order:[normal|reverse]' added
+# - BarData: option barcount replaced by auto incrementing bar count and 'break' and 'skip' attributes
+# - DrawLines -> LineData (command renamed, but also restructured like PlotData, TextData)
+# - new drawing options for LineData, now also lines parallel to time axis, or between arbitrary points
+# - Preset command added (specify default settings with 'Preset =', two sets to start with)
+# - 'text' attribute parsing bugs (# or : in text gave problems, spaces got lost)
+# - PlotArea new attributes 'top' and 'right' make it possible to define plot area margins only
+# so resizing image does not imply adjusting PlotArea 'width' and 'height'
+# - PlotData option 'shift': only changing x or y value is now possible, e.g. shift=(,10)
+# - command ScaleMajor: subs for time axis can now be specified verbatim in option 'text'
+# - extra validation checks, defaults, etc
+# - function PlotScale now provides workaround for Ploticus bug: auto incrementing dates failed
+
+# 1.9 June 2004
+# - stub display order fixed on non time axis
+
+# 1.10 July 2004
+# - tempory debug code (removed)
+
+# 1.11 August 2004
+# - dot in folder name in input path was misunderstood as start of file extension
+# - utf-8 chars within 160-255 range are translated to extended ascii
+# however internal font used by Ploticus has strange mapping so some are replaced
+# by undercore or unaccented version of character
+# this is a make do solution until full unicode support with external fonts will be added
+
+ $version = "1.9" ;
+
+ use Time::Local ;
+ use Getopt::Std ;
+ use Cwd ;
+
+ $| = 1; # flush screen output
+
+ print "EasyTimeline version $version\n" .
+ "Copyright (C) 2004 Erik Zachte\n" .
+ "Email xxx\@chello.nl (nospam: xxx=epzachte)\n\n" .
+ "This program is free software; you can redistribute it\n" .
+ "and/or modify it under the terms of the \n" .
+ "GNU General Public License version 2 as published by\n" .
+ "the Free Software Foundation\n" .
+ "------------------------------------------------------\n" ;
+
+ &SetImageFormat ;
+ &ParseArguments ;
+ &InitFiles ;
+
+ open "FILE_IN", "<", $file_in ;
+ @lines = <FILE_IN> ;
+ close "FILE_IN" ;
+
+ &InitVars ;
+ &ParseScript ;
+
+ if ($CntErrors == 0)
+ { &WritePlotFile ; }
+
+ if ($CntErrors == 1)
+ { &Abort ("1 error found") ; }
+ elsif ($CntErrors > 1)
+ { &Abort ("$CntErrors errors found") ; }
+ else
+ {
+ if (defined @Info)
+ {
+ print "\nINFO\n" ;
+ print @Info ;
+ print "\n" ;
+ }
+ if (defined @Warnings)
+ {
+ print "\nWARNING(S)\n" ;
+ print @Warnings ;
+ print "\n" ;
+ }
+
+ if (! (-e $file_bitmap))
+ {
+ print "\nImage $file_bitmap not created.\n" ;
+ if ((! (-e "pl.exe")) && (! (-e "pl")))
+ { print "\nPloticus not found in local folder. Is it on your system path?\n" ; }
+ }
+ elsif (! (-e $file_vector))
+ {
+ print "\nImage $file_vector not created.\n" ;
+ }
+ else
+ { print "\nREADY\nNo errors found.\n" ; }
+ }
+
+ exit ;
+
+sub ParseArguments
+{
+ my $options ;
+ getopt ("iTAPe", \%options) ;
+
+ &Abort ("Specify input file as: -i filename") if (! defined (@options {"i"})) ;
+
+ $file_in = @options {"i"} ;
+ $listinput = @options {"l"} ; # list all input lines (not recommended)
+ $linkmap = @options {"m"} ; # make clickmap for inclusion in html
+ $makehtml = @options {"h"} ; # make test html file with gif/png + svg output
+ $bypass = @options {"b"} ; # do not use in Wikipedia:bypass some checks
+ $showmap = @options {"d"} ; # debug: shows clickable areas in gif/png
+ # The following parameters are used by MediaWiki
+ # to pass config settings from LocalSettings.php to
+ # the perl script
+ $tmpdir = @options {"T"} ; # For MediaWiki: temp directory to use
+ $plcommand = @options {"P"} ; # For MediaWiki: full path of ploticus command
+ $articlepath=@options {"A"} ; # For MediaWiki: Path of an article, relative to this servers root
+
+ if (! defined @options {"A"} )
+ { $articlepath="http://en.wikipedia.org/wiki/\$1"; }
+
+ if (! -e $file_in)
+ { &Abort ("Input file '" . $file_in . "' not found.") ; }
+}
+
+sub InitVars
+{
+ $true = 1 ;
+ $false = 0 ;
+ $CntErrors = 0 ;
+ $LinkColor = "brightblue" ;
+ $MapPNG = $false ; # switched when link or hint found
+ $MapSVG = $false ; # switched when link found
+ $WarnTextOutsideArea = 0 ;
+ $WarnOnRightAlignedText = 0 ;
+
+ $hPerc = &EncodeInput ("\%") ;
+ $hAmp = &EncodeInput ("\&") ;
+ $hAt = &EncodeInput ("\@") ;
+ $hDollar = &EncodeInput ("\$") ;
+ $hBrO = &EncodeInput ("\(") ;
+ $hBrC = &EncodeInput ("\)") ;
+ $hSemi = &EncodeInput ("\;") ;
+ $hIs = &EncodeInput ("\=") ;
+ $hLt = &EncodeInput ("\<") ;
+ $hGt = &EncodeInput ("\>") ;
+}
+
+sub InitFiles
+{
+ print "\nInput: Script file $file_in\n" ;
+
+ $file = $file_in ;
+# 1.10 dot ignore dots in folder names ->
+ $file =~ s/\.[^\\\/\.]*$// ; # remove extension
+ $file_name = $file ;
+ $file_bitmap = $file . "." . $fmt ;
+ $file_vector = $file . ".svg" ;
+ $file_png = $file . ".png" ;
+ $file_htmlmap = $file . ".map" ;
+ $file_html = $file . ".html" ;
+ $file_errors = $file . ".err" ;
+# $file_pl_info = $file . ".inf" ;
+# $file_pl_err = $file . ".err" ;
+ print "Output: Image files $file_bitmap & $file_vector\n" ;
+
+ if ($linkmap)
+ { print " Map file $file_htmlmap (add to html for clickable map)\n" ; }
+ if ($makehtml)
+ { print " HTML test file $file_html\n" ; }
+
+ # remove previous output
+ if (-e $file_bitmap) { unlink $file_bitmap ; }
+ if (-e $file_vector) { unlink $file_vector ; }
+ if (-e $file_png) { unlink $file_png ; }
+ if (-e $file_htmlmap) { unlink $file_htmlmap ; }
+ if (-e $file_html) { unlink $file_html ; }
+ if (-e $file_errors) { unlink $file_errors ; }
+}
+
+sub SetImageFormat
+{
+ $env = "" ;
+# $dir = cwd() ; # is there a better way to detect OS?
+# if ($dir =~ /\//) { $env = "Linux" ; $fmt = "png" ; $pathseparator = "/";}
+# if ($dir =~ /\\/) { $env = "Windows" ; $fmt = "gif" ; $pathseparator = "\\";}
+# cwd always to returns '/'s ? ->
+ $OS = $^O ;
+ if ($OS =~ /darwin/i)
+ { $env = "Linux"; $fmt = "png" ; $pathseparator = "/";}
+ elsif ($OS =~ /win/i)
+ { $env = "Windows" ; $fmt = "gif" ; $pathseparator = "\\";}
+ else
+ { $env = "Linux" ; $fmt = "png" ; $pathseparator = "/";}
+
+ if ($env ne "")
+ { print "\nOS $env detected -> create image in $fmt format.\n" ; }
+ else
+ {
+ print "\nOS not detected. Assuming Windows -> create image in $fmt format.\n" ;
+ $env = "Windows" ;
+ }
+}
+sub ParseScript
+{
+ my $command ; # local version, $Command = global
+ $LineNo = 0 ;
+ $InputParsed = $false ;
+ $CommandNext = "" ;
+ $DateFormat = "x.y" ;
+
+ $firstcmd = $true ;
+ &GetCommand ;
+
+ &StoreColor ("white", &EncodeInput ("gray(0.999)"), "") ;
+ &StoreColor ("barcoldefault", &EncodeInput ("rgb(0,0.6,0)"), "") ;
+
+ while (! $InputParsed)
+ {
+ if ($Command =~ /^\s*$/)
+ { &GetCommand ; next ; }
+
+ if (! ($Command =~ /$hIs/))
+ { &Error ("Invalid statement. No '=' found.") ;
+ &GetCommand ; next ; }
+
+ if ($Command =~ /$hIs.*$hIs/)
+ { &Error ("Invalid statement. Multiple '=' found.") ;
+ &GetCommand ; next ; }
+
+ my ($name, $value) = split ($hIs, $Command) ;
+ $name =~ s/^\s*(.*?)\s*$/$1/ ;
+
+ if ($name =~ /PlotDividers/i)
+ { &Error ("Command 'PlotDividers' has been renamed to 'LineData', please adjust.") ;
+ &GetCommand ; next ; }
+ if ($name =~ /DrawLines/i)
+ { &Error ("Command 'DrawLines' has been renamed to 'LineData', please adjust.\n" .
+ " Reason for change is consistency: LineData now follows the same syntax rules as PlotData and TextData.") ;
+ &GetCommand ; next ; }
+
+ if ((! ($name =~ /^(?:Define)\s/)) &&
+ (! ($name =~ /^(?:AlignBars|BarData|
+ BackgroundColors|Colors|DateFormat|LineData|
+ ScaleMajor|ScaleMinor|
+ LegendLeft|LegendTop|
+ ImageSize|PlotArea|Legend|
+ Period|PlotData|Preset|
+ TextData|TimeAxis)$/xi)))
+ { &ParseUnknownCommand ;
+ &GetCommand ; next ; }
+
+ $value =~ s/^\s*(.*?)\s*// ;
+ if (! ($name =~ /^(?:BarData|Colors|LineData|PlotData|TextData)$/i))
+ {
+ if ((! (defined ($value))) || ($value eq ""))
+ {
+ if ($name =~ /Preset/i)
+ {
+ &Error ("$name definition incomplete. No value specified\n" .
+ " At the moment only one preset exists: 'TimeVertical_OneBar_UnitYear'.\n" .
+ " See also meta.wikipedia.org/wiki/EasyTimeline/Presets") ;
+ }
+ else
+ { &Error ("$name definition incomplete. No attributes specified") ; }
+ &GetCommand ; next ; }
+ }
+
+ if ($name =~ /^(?:BackgroundColors|Colors|Period|ScaleMajor|ScaleMinor|TimeAxis)$/i)
+ {
+ my @attributes = split (" ", $value) ;
+ foreach $attribute (@attributes)
+ {
+ my ($attrname, $attrvalue) = split ("\:", $attribute) ;
+ if (! ($name."-".$attrname =~ /^(?:Colors-Value|Colors-Legend|
+ Period-From|Period-Till|
+ ScaleMajor-Color|ScaleMajor-Unit|ScaleMajor-Increment|ScaleMajor-Start|
+ ScaleMinor-Color|ScaleMinor-Unit|ScaleMinor-Increment|ScaleMinor-Start|
+ BackgroundColors-Canvas|BackgroundColors-Bars|
+ TimeAxis-Orientation|TimeAxis-Format)$/xi))
+ { &Error ("$name definition invalid. Unknown attribute '$attrname'.") ;
+ &GetCommand ; next ; }
+
+ if ((! defined ($attrvalue)) || ($attrvalue eq ""))
+ { &Error ("$name definition incomplete. No value specified for attribute '$attrname'.") ;
+ &GetCommand ; next ; }
+ }
+ }
+
+ if ($Command =~ /^AlignBars/i) { &ParseAlignBars ; }
+ elsif ($Command =~ /^BackgroundColors/i) { &ParseBackgroundColors ; }
+ elsif ($Command =~ /^BarData/i) { &ParseBarData ; }
+ elsif ($Command =~ /^Colors/i) { &ParseColors ; }
+ elsif ($Command =~ /^DateFormat/i) { &ParseDateFormat ; }
+ elsif ($Command =~ /^Define/i) { &ParseDefine ; }
+ elsif ($Command =~ /^ImageSize/i) { &ParseImageSize ; }
+ elsif ($Command =~ /^Legend/i) { &ParseLegend ; }
+ elsif ($Command =~ /^LineData/i) { &ParseLineData ; }
+ elsif ($Command =~ /^Period/i) { &ParsePeriod ; }
+ elsif ($Command =~ /^PlotArea/i) { &ParsePlotArea ; }
+ elsif ($Command =~ /^PlotData/i) { &ParsePlotData ; }
+ elsif ($Command =~ /^Preset/i) { &ParsePreset ; }
+ elsif ($Command =~ /^Scale/i) { &ParseScale ; }
+ elsif ($Command =~ /^TextData/i) { &ParseTextData ; }
+ elsif ($Command =~ /^TimeAxis/i) { &ParseTimeAxis ; }
+
+ &GetCommand ;
+ $firstcmd = $false ;
+ }
+
+ if ($CntErrors == 0)
+ { &DetectMissingCommands ; }
+
+ if ($CntErrors == 0)
+ { &ValidateAndNormalizeDimensions ; }
+}
+
+
+sub GetLine
+{
+ if ($#lines < 0)
+ { $InputParsed = $true ; return ("") ; }
+
+ # running in Wikipedia context and first line empty ?
+ # skip first line without incrementing line count
+ # this is part behind <timeline> and will not be thought of as line 1
+ if (defined @options {"A"})
+ {
+ if (($#lines >= 0) && (@lines [0] =~ /^\s*$/))
+ { $Line = shift (@lines) ; }
+ }
+
+ $Line = "" ;
+ while (($#lines >= 0) && ($Line =~ /^\s*$/))
+ {
+ $LineNo ++ ;
+ $Line = shift (@lines) ;
+ chomp ($Line) ;
+
+ if ($listinput)
+ { print "$LineNo: " . &DecodeInput ($Line) . "\n" ; }
+
+ # preserve '#' within double quotes
+ $Line =~ s/(\"[^\"]*\")/$a=$1,$a=~s^\#^\%\?\+^g,$a/ge ;
+
+ $Line =~ s/#>.*?<#//g ;
+ if ($Line =~ /#>/)
+ {
+ $commentstart = $LineNo ;
+ $Line =~ s/#>.*?$// ;
+ }
+ elsif ($Line =~ /<#/)
+ {
+ undef $commentstart ;
+ $Line =~ s/^.*?<#//x ;
+ }
+ elsif (defined ($commentstart))
+ { $Line = "" ; next ; }
+
+ # remove single line comments (keep html char tags, like &#32;)
+ $Line =~ s/\&\#/\&\$\%/g ;
+ $Line =~ s/\#.*$// ;
+ $Line =~ s/\&\$\%/\&\#/g ;
+ $Line =~ s/\%\?\+/\#/g ;
+ $Line =~ s/\s*$//g ;
+ $Line =~ s/\t/ /g ;
+ }
+
+ if ($Line !~ /^\s*$/)
+ {
+ $Line = &EncodeInput ($Line) ;
+
+ if (! ($Line =~ /^\s*Define/i))
+ { $Line =~ s/($hDollar[a-zA-Z0-9]+)/&GetDefine($Line,$1)/ge ; }
+ }
+
+ if (($#lines < 0) && (defined ($commentstart)))
+ { &Error2 ("No matching end of comment found for comment block starting at line $commentstart.\n" .
+ "Text between \#> and <\# (multiple lines) or following \# (single line) will be treated as comment.") ; }
+ return ($Line) ;
+}
+
+sub GetCommand
+{
+ undef (%Attributes) ;
+ $Command = "" ;
+
+ if ($CommandNext ne "")
+ {
+ $Command = $CommandNext ;
+ $CommandNext = "" ;
+ }
+ else
+ { $Command = &GetLine ; }
+
+ if ($Command =~ /^\s/)
+ {
+ &Error ("New command expected instead of data line (= line starting with spaces). Data line(s) ignored.\n") ;
+ $Command = &GetLine ;
+ while (($#lines >= 0) && ($Command =~ /^\s/))
+ { $Command = &GetLine ; }
+ }
+
+ if ($Command =~ /^[^\s]/)
+ {
+ $line = $Command ;
+ $line =~ s/^.*$hIs\s*// ;
+ &CollectAttributes ($line) ;
+ }
+}
+
+sub GetData
+{
+ undef (%Attributes) ;
+ $Command = "" ;
+ $NoData = $false ;
+ my $line = &GetLine ;
+
+ if ($line =~ /^[^\s]/)
+ {
+ $CommandNext = $line ;
+ $NoData = $true ;
+ return ("") ;
+ }
+
+ if ($line =~ /^\s*$/)
+ {
+ $NoData = $true ;
+ return ("") ;
+ }
+
+ $line =~ s/^\s*//g ;
+ &CollectAttributes ($line) ;
+}
+
+sub CollectAttributes
+{
+ my $line = shift ;
+
+ $line =~ s/(\slink\:[^\s\:]*)\:/$1'colon'/i ; # replace colon (:), would conflict with syntax
+ $line =~ s/(\stext\:[^\s\:]*)\:/$1'colon'/i ; # replace colon (:), would conflict with syntax
+ $line =~ s/(https?)\:/$1'colon'/i ; # replace colon (:), would conflict with syntax
+
+ my $text ;
+ ($line, $text) = &ExtractText ($line) ;
+ $text =~ s/'colon'/:/ ;
+
+ $line =~ s/( $hBrO .+? $hBrC )/&RemoveSpaces($1)/gxe ;
+ $line =~ s/\s*\:\s*/:/g ;
+ $line =~ s/([a-zA-Z0-9\_]+)\:/lc($1) . ":"/gxe ;
+ @Fields = split (" ", $line) ;
+
+ $name = "" ;
+ foreach $field (@Fields)
+ {
+ if ($field =~ /\:/)
+ {
+ ($name, $value) = split (":", $field) ;
+ $name =~ s/^\s*(.*)\s*$/lc($1)/gxe ;
+ $value =~ s/^\s*(.*)\s*$/$1/gxe ;
+ if (($name ne "bar") && ($name ne "text") && ($name ne "link") && ($name ne "legend")) # && ($name ne "hint")
+ { $value = lc ($value) ; }
+
+ if ($name eq "link") # restore colon
+ { $value =~ s/'colon'/:/ ; }
+
+ if ($value eq "")
+ {
+ if ($name =~ /Text/i)
+ { $value = " " ; }
+ else
+ { &Error ("No value specified for attribute '$name'. Attribute ignored.") ; }
+ }
+ else
+ { @Attributes {$name} = $value ; }
+ }
+ else
+ {
+ if (defined (@Attributes {"single"}))
+ { &Error ("Invalid attribute '$field' ignored.\nSpecify attributes as 'name:value' pair(s).") ; }
+ else
+ {
+ $field =~ s/^\s*(.*)\s*$/$1/gxe ;
+ @Attributes {"single"} = $field ;
+ }
+ }
+ }
+ if (($name ne "") && (@Attributes {"single"} ne ""))
+ {
+ &Error ("Invalid attribute '" . @Attributes {"single"} . "' ignored.\nSpecify attributes as 'name:value' pairs.") ;
+ delete (@Attributes {"single"}) ;
+ }
+
+ if ((defined ($text)) && ($text ne ""))
+ { @Attributes {"text"} = &ParseText ($text) ; }
+}
+
+sub GetDefine
+{
+ my $command = shift ;
+ my $const = shift ;
+ $const = lc ($const) ;
+ my $value = @Consts {lc ($const)} ;
+ if (! defined ($value))
+ {
+ &Error ("Unknown constant. 'Define $const = ... ' expected.") ;
+ return ($const);
+ }
+ return ($value) ;
+}
+
+sub ParseAlignBars
+{
+ &CheckPreset ("AlignBars") ;
+
+ $align = @Attributes {"single"} ;
+ if (! ($align =~ /^(?:justify|early|late)$/i))
+ { &Error ("AlignBars value '$align' invalid. Specify 'justify', 'early' or 'late'.") ; return ; }
+
+ $AlignBars = lc ($align) ;
+}
+
+sub ParseBackgroundColors
+{
+ if (! &ValidAttributes ("BackgroundColors"))
+ { &GetData ; next ;}
+
+ &CheckPreset ("BackGroundColors") ;
+
+ foreach $attribute (keys %Attributes)
+ {
+ my $attrvalue = @Attributes {$attribute} ;
+
+ if ($attribute =~ /Canvas/i)
+ {
+ if (! &ColorPredefined ($attrvalue))
+ {
+ if (! defined (@Colors {lc ($attrvalue)}))
+ { &Error ("BackgroundColors definition invalid. Attribute '$attribute': unknown color '$attrvalue'.\n" .
+ " Specify command 'Color' before this command.") ; return ; }
+ }
+ if (defined (@Colors {lc ($attrvalue)}))
+ { @Attributes {"canvas"} = @Colors { lc ($attrvalue) } ; }
+ else
+ { @Attributes {"canvas"} = lc ($attrvalue) ; }
+ }
+ elsif ($attribute =~ /Bars/i)
+ {
+ if (! defined (@Colors {lc ($attrvalue)}))
+ { &Error ("BackgroundColors definition invalid. Attribute '$attribute' unknown color '$attrvalue'.\n" .
+ " Specify command 'Color' before this command.") ; return ; }
+
+ @Attributes {"bars"} = lc ($attrvalue) ;
+ }
+ }
+
+ %BackgroundColors = %Attributes ;
+}
+
+sub ParseBarData
+{
+ &GetData ;
+ if ($NoData)
+ { &Error ("Data expected for command 'BarData', but line is not indented.\n") ; return ; }
+
+ my ($bar, $text, $link, $hint, $barset) ; # , $barcount) ;
+
+ BarData:
+ while ((! $InputParsed) && (! $NoData))
+ {
+ if (! &ValidAttributes ("BarData"))
+ { &GetData ; next ;}
+
+ $bar = "" ; $link = "" ; $hint = "" ; $barset = "" ; # $barcount = "" ;
+
+ my $data2 = $data ;
+ ($data2, $text) = &ExtractText ($data2) ;
+ @Attributes = split (" ", $data2) ;
+
+ foreach $attribute (keys %Attributes)
+ {
+ my $attrvalue = @Attributes {$attribute} ;
+
+ if ($attribute =~ /^Bar$/i)
+ {
+ $bar = $attrvalue ;
+ }
+ elsif ($attribute =~ /^BarSet$/i)
+ {
+ $barset = $attrvalue ;
+ }
+ # elsif ($attribute =~ /^BarCount$/i)
+ # {
+ # $barcount = $attrvalue ;
+ # if (($barcount !~ /^\d?\d?\d$/) || ($barcount < 2) || ($barcount > 200))
+ # { &Error ("BarData attribute 'barcount' invalid. Specify a number between 2 and 200\n") ;
+ # &GetData ; next BarData ; }
+ # }
+ elsif ($attribute =~ /^Text$/i)
+ {
+ $text = $attrvalue ;
+ $text =~ s/\\n/~/gs ;
+ if ($text =~ /\~/)
+ { &Warning ("BarData attribute 'text' contains ~ (tilde).\n" .
+ "Tilde will not be translated into newline character (only in PlotData)") ; }
+ if ($text =~ /\^/)
+ { &Warning ("BarData attribute 'text' contains ^ (caret).\n" .
+ "Caret will not be translated into tab character (only in PlotData)") ; }
+ }
+ elsif ($attribute =~ /^Link$/i)
+ {
+ $link = &ParseText ($attrvalue) ;
+
+ if ($link =~ /\[.*\]/)
+ { &Error ("BarData attribute 'link' contains implicit (wiki style) link.\n" .
+ "Use implicit link style with attribute 'text' only.\n") ;
+ &GetData ; next BarData ; }
+
+ $link = &EncodeURL (&NormalizeURL ($link)) ;
+
+ $MapPNG = $true ;
+ }
+ }
+
+ if (($bar eq "") && ($barset eq ""))
+ { &Error ("BarData attribute missing. Specify either 'bar' of 'barset'.\n") ;
+ &GetData ; next BarData ; }
+
+ if (($bar ne "") && ($barset ne ""))
+ { &Error ("BarData attributes 'bar' and 'barset' are mutually exclusive.\nSpecify one of these per data line\n") ;
+ &GetData ; next BarData ; }
+
+ # if (($barset ne "") && ($barcount eq ""))
+ # { &Error ("BarData attribute 'barset' specified without attribute 'barcount'.\n") ;
+ # &GetData ; next BarData ; }
+
+ # if (($barset eq "") && ($barcount ne ""))
+ # { &Error ("BarData attribute 'barcount' specified without attribute 'barset'.\n") ;
+ # &GetData ; next BarData ; }
+
+ if (($barset ne "") && ($link ne ""))
+ { &Error ("BarData attribute 'link' not valid in combination with attribute 'barset'.\n") ;
+ &GetData ; next BarData ; }
+
+ if ($link ne "")
+ {
+ if ($text =~ /\[.*\]/)
+ {
+ &Warning ("BarData contains implicit link(s) in attribute 'text' and explicit attribute 'link'.\n" .
+ "Implicit link(s) ignored.") ;
+ $text =~ s/\[+ (?:[^\|]* \|)? ([^\]]*) \]+/$1/gx ;
+ }
+
+ if ($hint eq "")
+ { $hint = &ExternalLinkToHint ($link) ; }
+ }
+
+ if (($bar ne "") && ($bar !~ /[a-zA-Z0-9\_]+/))
+ { &Error ("BarData attribute bar:'$bar' invalid.\nUse only characters 'a'-'z', 'A'-'Z', '0'-'9', '_'\n") ;
+ &GetData ; next BarData ; }
+
+ if ($bar ne "")
+ {
+ if (@Axis {"time"} eq "x")
+ { push @Bars, $bar ; }
+ else
+ { unshift @Bars, $bar ; }
+
+ if ($text ne "")
+ { @BarLegend {lc ($bar)} = $text ; }
+ else
+ { @BarLegend {lc ($bar)} = " " ; }
+
+ if ($link ne "")
+ { @BarLink {lc ($bar)} = $link ; }
+ }
+ else
+ {
+# for ($b = 1 ; $b <= $barcount ; $b++)
+# {
+# $bar = $barset . "#" . $b ;
+
+ $bar = $barset . "#1" ;
+ if (@Axis {"time"} eq "x")
+ { push @Bars, $bar ; }
+ else
+ { unshift @Bars, $bar ; }
+
+ if ($text ne "")
+ { @BarLegend {lc ($bar)} = $text . " - " . $b ; }
+ else
+ { @BarLegend {lc ($bar)} = " " ; }
+# }
+ }
+
+
+ &GetData ;
+ }
+}
+
+sub ParseColors
+{
+
+ &GetData ;
+ if ($NoData)
+ { &Error ("Data expected for command 'Colors', but line is not indented.\n") ; return ; }
+
+ Colors:
+ while ((! $InputParsed) && (! $NoData))
+ {
+ if (! &ValidAttributes ("Colors"))
+ { &GetData ; next ;}
+
+ &CheckPreset ("Colors") ;
+
+ my $addtolegend = $false ;
+ my $legendvalue = "" ;
+ my $colorvalue = "" ;
+
+ foreach $attribute (keys %Attributes)
+ {
+ my $attrvalue = @Attributes {$attribute} ;
+
+ if ($attribute =~ /Id/i)
+ {
+ $colorname = $attrvalue ;
+ }
+ elsif ($attribute =~ /Legend/i)
+ {
+ $addtolegend = $true ;
+ $legendvalue = $attrvalue ;
+ if ($legendvalue =~ /^[yY]$/)
+ { push @LegendData, $colorname ; }
+ elsif (! ($attrvalue =~ /^[nN]$/))
+ {
+ $legendvalue = &ParseText ($legendvalue) ;
+ push @LegendData, $legendvalue ;
+ }
+ }
+ elsif ($attribute =~ /Value/i)
+ {
+ $colorvalue = $attrvalue ;
+ if ($colorvalue =~ /^white$/i)
+ { $colorvalue = "gray" . $hBrO . "0.999" . $hBrC ; }
+ }
+ }
+
+ if (&ColorPredefined ($colorvalue))
+ {
+ &StoreColor ($colorname, $colorvalue, $legendvalue) ;
+ &GetData ; next Colors ;
+ }
+
+ if ($colorvalue =~ /^[a-z]+$/i)
+ {
+ if (! ($colorvalue =~ /^(?:gray|rgb|hsb)/i))
+ { &Error ("Color value invalid: unknown constant '$colorvalue'.") ;
+ &GetData ; next Colors ; }
+ }
+
+ if (! ($colorvalue =~ /^(?:gray|rgb|hsb) $hBrO .+? $hBrC/xi))
+ { &Error ("Color value invalid. Specify constant or 'gray/rgb/hsb(numeric values)' ") ;
+ &GetData ; next Colors ; }
+
+ if ($colorvalue =~ /^gray/i)
+ {
+ if ($colorvalue =~ /gray $hBrO (?:0|1|0\.\d+) $hBrC/xi)
+ { &StoreColor ($colorname, $colorvalue, $legendvalue) ; }
+ else
+ { &Error ("Color value invalid. Specify 'gray(x) where 0 <= x <= 1' ") ; }
+
+ &GetData ; next Colors ;
+ }
+
+ if ($colorvalue =~ /^rgb/i)
+ {
+ my $colormode = substr ($colorvalue,0,3) ;
+ if ($colorvalue =~ /rgb $hBrO
+ (?:0|1|0\.\d+) \,
+ (?:0|1|0\.\d+) \,
+ (?:0|1|0\.\d+)
+ $hBrC/xi)
+ { &StoreColor ($colorname, $colorvalue, $legendvalue) ; }
+ else
+ { &Error ("Color value invalid. Specify 'rgb(r,g,b) where 0 <= r,g,b <= 1' ") ; }
+
+ &GetData ; next Colors ;
+ }
+
+ if ($colorvalue =~ /^hsb/i)
+ {
+ my $colormode = substr ($colorvalue,0,3) ;
+ if ($colorvalue =~ /hsb $hBrO
+ (?:0|1|0\.\d+) \,
+ (?:0|1|0\.\d+) \,
+ (?:0|1|0\.\d+)
+ $hBrC/xi)
+ { &StoreColor ($colorname, $colorvalue, $legendvalue) ; }
+ else
+ { &Error ("Color value invalid. Specify 'hsb(h,s,b) where 0 <= h,s,b <= 1' ") ; }
+
+ &GetData ; next Colors ;
+ }
+
+ &Error ("Color value invalid.") ;
+ &GetData ;
+ }
+}
+
+sub StoreColor
+{
+ my $colorname = shift ;
+ my $colorvalue = shift ;
+ my $legendvalue = shift ;
+ if (defined (@Colors {lc ($colorname)}))
+ { &Warning ("Color '$colorname' redefined.") ; }
+ @Colors {lc ($colorname)} = lc ($colorvalue) ;
+ if ((defined ($legendvalue)) && ($legendvalue ne ""))
+ { @ColorLabels {lc ($colorname)} = $legendvalue ; }
+}
+
+sub ParseDateFormat
+{
+ &CheckPreset ("DateFormat") ;
+
+ my $datevalue = lc (@Attributes {"single"}) ;
+ $datevalue =~ s/\s//g ;
+ $datevalue = lc ($datevalue) ;
+ if (($datevalue ne "dd/mm/yyyy") && ($datevalue ne "mm/dd/yyyy") && ($datevalue ne "yyyy") && ($datevalue ne "x.y"))
+ { &Error ("Invalid DateFormat. Specify as 'dd/mm/yyyy', 'mm/dd/yyyy', 'yyyy' or 'x.y'\n" .
+ " (use first two only for years >= 1800)\n") ; return ; }
+
+ $DateFormat = $datevalue ;
+}
+
+sub ParseDefine
+{
+ my $command = $Command ;
+ my $command2 = $command ;
+ $command2 =~ s/^Define\s*//i ;
+
+ my ($name, $value) = split ($hIs, $command2) ;
+ $name =~ s/^\s*(.*?)\s*$/$1/g ;
+ $value =~ s/^\s*(.*?)\s*$/$1/g ;
+
+ if (! ($name =~ /^$hDollar/))
+ { &Error ("Define '$name' invalid. Name does not start with '\$'.") ; return ; }
+ if (! ($name =~ /^$hDollar[a-zA-Z0-9\_]+$/))
+ { &Error ("Define '$name' invalid. Valid characters are 'a'-'z', 'A'-'Z', '0'-'9', '_'.") ; return ; }
+
+ $value =~ s/($hDollar[a-zA-Z0-9]+)/&GetDefine($command,$1)/ge ;
+ @Consts {lc ($name)} = $value ;
+}
+
+sub ParseLineData
+{
+ &GetData ;
+ if ($NoData)
+ { &Error ("Data expected for command 'LineData', but line is not indented.\n") ; return ; }
+
+ if ((! (defined ($DateFormat))) || (! (defined (@Period {"from"}))))
+ {
+ if (! (defined ($DateFormat)))
+ { &Error ("LineData invalid. No (valid) command 'DateFormat' specified in previous lines.") ; }
+ else
+ { &Error ("LineData invalid. No (valid) command 'Period' specified in previous lines.") ; }
+
+ while ((! $InputParsed) && (! $NoData))
+ { &GetData ; }
+ return ;
+ }
+
+ my ($at, $from, $till, $atpos, $frompos, $tillpos, $color, $layer, $width, $points, $explanation) ;
+
+ $layer = "front" ;
+ $width = 2.0 ;
+
+ my $data2 = $data ;
+
+ LineData:
+ while ((! $InputParsed) && (! $NoData))
+ {
+ $at = "" ; $from = "" ; $till = "" ; $atpos = "" ; $frompos = "" ; $tillpos = "" ; $points = "" ;
+
+ &CheckPreset ("LineData") ;
+
+ if (! &ValidAttributes ("LineData"))
+ { &GetData ; next ;}
+
+ if (defined (@LineDefs {"color"})) { $color = @LineDefs {"color"} ; }
+ if (defined (@LineDefs {"layer"})) { $layer = @LineDefs {"layer"} ; }
+ if (defined (@LineDefs {"width"})) { $width = @LineDefs {"width"} ; }
+ if (defined (@LineDefs {"frompos"})) { $frompos = @LineDefs {"frompos"} ; }
+ if (defined (@LineDefs {"tillpos"})) { $tillpos = @LineDefs {"tillpos"} ; }
+ if (defined (@LineDefs {"atpos"})) { $atpos = @LineDefs {"atpos"} ; }
+
+ foreach $attribute (keys %Attributes)
+ {
+ my $attrvalue = @Attributes {$attribute} ;
+
+ if ($attribute =~ /^(?:At|From|Till)$/i)
+ {
+ if ($attrvalue =~ /^Start$/i)
+ { $attrvalue = @Period {"from"} ; }
+
+ if ($attrvalue =~ /^End$/i)
+ { $attrvalue = @Period {"till"} ; }
+
+ if (! &ValidDateFormat ($attrvalue))
+ { &Error ("LineData attribute '$attribute' invalid.\n" .
+ "Date does not conform to specified DateFormat '$DateFormat'.") ;
+ &GetData ; next LineData ; }
+
+ if (! &ValidDateRange ($attrvalue))
+ { &Error ("LineData attribute '$attribute' invalid.\n" .
+ "Date '$attrvalue' not within range as specified by command Period.") ;
+ &GetData ; next LineData ; }
+
+# if (substr ($attrvalue,6,4) < 1800)
+# { &Error ("LineData attribute '$attribute' invalid. Specify year >= 1800.") ;
+# &GetData ; next LineData ; }
+
+ if ($attribute =~ /At/i)
+ {
+ $at = $attrvalue ; $from = "" ; $till = "" ; }
+ elsif ($attribute =~ /From/i)
+ { $from = $attrvalue ; $at = "" ; }
+ else
+ { $till = $attrvalue ; $at = "" ; }
+ }
+ elsif ($attribute =~ /^(?:atpos|frompos|tillpos)$/i)
+ {
+ if ($attrvalue =~ /^(?:Start|End)$/i)
+ { $attrvalue = lc ($attrvalue) ; }
+ elsif (! &ValidAbs ($attrvalue))
+ { &Error ("LineData attribute '$attribute' invalid.\n" .
+ "Specify value as x[.y][px, in, cm] examples: '200', '20px', '1.3in'") ;
+ &GetData ; next LineData ; }
+
+ if ($attribute =~ /atpos/i)
+ { $atpos = &Normalize ($attrvalue) ; }
+ elsif ($attribute =~ /frompos/i)
+ { $frompos = &Normalize ($attrvalue) ; }
+ else
+ { $tillpos = &Normalize ($attrvalue) ; }
+ }
+ elsif ($attribute =~ /Color/i)
+ {
+ if ((! &ColorPredefined ($attrvalue)) && (! defined (@Colors {lc ($attrvalue)})))
+ { &Error ("LineData attribute '$attribute' invalid. Unknown color '$attrvalue'.\n" .
+ " Specify command 'Color' before this command.") ;
+ &GetData ; next LineData ; }
+
+ if (! &ColorPredefined ($attrvalue))
+ { $attrvalue = @Colors {lc ($attrvalue)} ; }
+
+ $color = $attrvalue ;
+ }
+ elsif ($attribute =~ /Layer/i)
+ {
+ if (! ($attrvalue =~ /^(?:back|front)$/i))
+ { &Error ("LineData attribute '$attrvalue' invalid.\nSpecify back(default) or front") ;
+ &GetData ; next LineData ; }
+
+ $layer = $attrvalue ;
+ }
+ elsif ($attribute =~ /Points/i)
+ {
+ $attribute =~ s/\s//g ;
+
+ if ($attrvalue !~ /^$hBrO\d+\,\d+$hBrC$hBrO\d+\,\d+$hBrC$/)
+ { &Error ("LineData attribute '$attrvalue' invalid.\nSpecify 'points:(x1,y1)(x2,y2)'") ;
+ &GetData ; next LineData ; }
+
+ $attrvalue =~ s/^$hBrO(\d+)\,(\d+)$hBrC$hBrO(\d+)\,(\d+)$hBrC$/$1,$2,$3,$4/ ;
+ $points = $attrvalue ;
+ }
+ elsif ($attribute =~ /Width/i)
+ {
+ if (! &ValidAbs ($attrvalue))
+ { &Error ("LineData attribute '$attribute' invalid.\n" .
+ "Specify value as x[.y][px, in, cm] examples: '200', '20px', '1.3in'") ;
+ &GetData ; next LineData ; }
+
+ if (($attrvalue < 0.1) || ($attrvalue > 10))
+ { &Error ("LineData attribute '$attribute' invalid.\n" .
+ "Specify value as between 0.1 and 10") ;
+ &GetData ; next LineData ; }
+
+ $width = $attrvalue ;
+ }
+ }
+
+ if (($at eq "") && ($from eq "") && ($till eq "") && ($points eq "")) # upd defaults
+ {
+ if ($color ne "") { @LineDefs {"color"} = $color ; }
+ if ($layer ne "") { @LineDefs {"layer"} = $layer ; }
+ if ($width ne "") { @LineDefs {"width"} = $width ; }
+ if ($atpos ne "") { @LineDefs {"atpos"} = $atpos ; }
+ if ($frompos ne "") { @LineDefs {"frompos"} = $frompos ; }
+ if ($tillpos ne "") { @LineDefs {"tillpos"} = $tillpos ; }
+ }
+
+ if ($layer eq "")
+ { $layer = "back" ; }
+
+ if ($color eq "")
+ { $color = "black" ; }
+
+ $explanation = "\nA line is defined as follows:\n" .
+ " Perpendicular to the time axis: 'at frompos tillpos'\n" .
+ " Parralel to the time axis: 'from till atpos'\n" .
+ " Any direction: points(x1,y1)(x2,y2)\n" .
+ " at,from,till expect date/time values, just like with command PlotData\n" .
+ " frompos,tillpos,atpos,x1,x2,y1,y2 expect coordinates (e.g. pixels values)\n" ;
+
+ if (($at ne "") && (($from ne "") || ($till ne "") || ($points ne "")))
+ { &Error ("LineData attribute 'at' can not be combined with 'from', 'till' or 'points'\n" . $explanation) ;
+ $explanation = "" ;
+ &GetData ; next LineData ; }
+
+ if ((($from ne "") && ($till eq "")) || (($from eq "") && ($till ne "")))
+ { &Error ("LineData attributes 'from' and 'till' should always be specified together\n" . $explanation) ;
+ $explanation = "" ;
+ &GetData ; next LineData ; }
+
+ if (($points ne "") && (($from ne "") || ($till ne "") || ($at ne "")))
+ { &Error ("LineData attribute 'points' can not be combined with 'at', 'from' or 'till'\n" . $explanation) ;
+ $explanation = "" ;
+ &GetData ; next LineData ; }
+
+ if ($at ne "")
+ { push @DrawLines, sprintf ("1|%s|%s|%s|%s|%s|%s\n", $at, $frompos, $tillpos, lc ($color), $width, lc ($layer)) ; }
+
+ if ($from ne "")
+ { push @DrawLines, sprintf ("2|%s|%s|%s|%s|%s|%s\n", $atpos, $from, $till, lc ($color), $width, lc ($layer)) ; }
+
+ if ($points ne "")
+ { push @DrawLines, sprintf ("3|%s|%s|%s|%s\n", $points, lc ($color), $width, lc ($layer)) ; }
+ &GetData ;
+ }
+}
+
+sub ParseImageSize
+{
+ if (! &ValidAttributes ("ImageSize")) { return ; }
+
+ &CheckPreset ("ImageSize") ;
+
+ foreach $attribute (keys %Attributes)
+ {
+ my $attrvalue = @Attributes {$attribute} ;
+
+ if ($attribute =~ /Width|Height/i)
+ {
+ if ($attrvalue !~ /auto/i)
+ {
+ if (! &ValidAbs ($attrvalue))
+ { &Error ("ImageSize attribute '$attribute' invalid.\n" .
+ "Specify value as x[.y][px, in, cm] examples: '200', '20px', '1.3in'") ; return ; }
+ }
+ }
+
+ elsif ($attribute =~ /BarIncrement/i)
+ {
+ if (! &ValidAbs ($attrvalue))
+ { &Error ("ImageSize attribute '$attribute' invalid.\n" .
+ "Specify value as x[.y][px, in, cm] examples: '200', '20px', '1.3in'") ; return ; }
+
+ @Attributes {"barinc"} = $attrvalue ;
+ }
+# if ($attribute =~ /Width/i)
+# { @Attributes {"width"} = $attrvalue ; }
+# elsif ($attribute =~ /Height/i)
+# { @Attributes {"height"} = $attrvalue ; }
+ }
+
+ if ((@Attributes {"width"} =~ /auto/i) || (@Attributes {"height"} =~ /auto/i))
+ {
+ if (@Attributes {"barinc"} eq "")
+ { &Error ("ImageSize attribute 'barincrement' missing.\n" .
+ "Automatic determination of image width or height implies specification of this attribute") ; return ; }
+ }
+
+ if ((@Attributes {"width"} !~ /auto/i) && (@Attributes {"height"} !~ /auto/i))
+ {
+ if (@Attributes {"barinc"} ne "")
+ { &Error ("ImageSize attribute 'barincrement' not valid now.\n" .
+ "This attribute is only valid (and mandatory) in combination with 'width:auto' or 'height:auto'") ; return ; }
+ }
+
+ %Image = %Attributes ;
+}
+
+sub ParseLegend
+{
+ if (! &ValidAttributes ("Legend")) { return ; }
+
+ &CheckPreset ("Legend") ;
+
+ foreach $attribute (keys %Attributes)
+ {
+ my $attrvalue = @Attributes {$attribute} ;
+
+ if ($attribute =~ /Columns/i)
+ {
+ if (($attrvalue < 1) || ($attrvalue > 4))
+ { &Error ("Legend attribute 'columns' invalid. Specify 1,2,3 or 4") ; return ; }
+ }
+ elsif ($attribute =~ /Orientation/i)
+ {
+ if (! ($attrvalue =~ /^(?:hor|horizontal|ver|vertical)$/i))
+ { &Error ("Legend attribute '$attrvalue' invalid. Specify hor[izontal] or ver[tical]") ; return ; }
+
+ @Attributes {"orientation"} = substr ($attrvalue,0,3) ;
+ }
+ elsif ($attribute =~ /Position/i)
+ {
+ if (! ($attrvalue =~ /^(?:top|bottom|right)$/i))
+ { &Error ("Legend attribute '$attrvalue' invalid.\nSpecify top, bottom or right") ; return ; }
+ }
+ elsif ($attribute =~ /Left/i)
+ {
+ if (! &ValidAbsRel ($attrvalue))
+ { &Error ("Legend attribute '$attribute' invalid.\nSpecify value as x[.y][px, in, cm] examples: '200', '20px', '1.3in'") ; return ; } }
+ elsif ($attribute =~ /Top/i)
+ {
+ if (! &ValidAbsRel ($attrvalue))
+ { &Error ("Legend attribute '$attribute' invalid.\nSpecify value as x[.y][px, in, cm] examples: '200', '20px', '1.3in'") ; return ; } }
+ elsif ($attribute =~ /ColumnWidth/i)
+ {
+ if (! &ValidAbsRel ($attrvalue))
+ { &Error ("Legend attribute '$attribute' invalid.\nSpecify value as x[.y][px, in, cm] examples: '200', '20px', '1.3in'") ; return ; }
+ }
+ }
+
+ if (defined (@Attributes {"position"}))
+ {
+ if (defined (@Attributes {"left"}))
+ { &Error ("Legend definition invalid. Attributes 'position' and 'left' are mutually exclusive.") ; return ; }
+ }
+ else
+ {
+ if ((! defined (@Attributes {"left"})) && (! defined (@Attributes {"top"})))
+ {
+ &Info ("Legend definition: none of attributes 'position', 'left' or 'top' have been defined. Position 'bottom' assumed.") ;
+ @Attributes {"position"} = "bottom" ;
+ }
+ elsif ((! defined (@Attributes {"left"})) || (! defined (@Attributes {"top"})))
+ { &Error ("Legend definition invalid. Specify 'position', or 'left' & 'top'.") ; return ; }
+ }
+
+ if (@Attributes {"position"} =~ /right/i)
+ {
+ if (defined (@Attributes {"columns"}))
+ { &Error ("Legend definition invalid.\nAttribute 'columns' and 'position:right' are mutually exclusive.") ; return ; }
+ if (defined (@Attributes {"columnwidth"}))
+ { &Error ("Legend definition invalid.\nAttribute 'columnwidth' and 'position:right' are mutually exclusive.") ; return ; }
+ }
+
+ if (@Attributes {"orientation"} =~ /hor/i)
+ {
+ if (@Attributes {"position"} =~ /right/i)
+ { &Error ("Legend definition invalid.\n'position:right' and 'orientation:horizontal' are mutually exclusive.") ; return ; }
+ if (defined (@Attributes {"columns"}))
+ { &Error ("Legend definition invalid.\nAttribute 'columns' and 'orientation:horizontal' are mutually exclusive.") ; return ; }
+ if (defined (@Attributes {"columnwidth"}))
+ { &Error ("Legend definition invalid.\nAttribute 'columnwidth' and 'orientation:horizontal' are mutually exclusive.") ; return ; }
+ }
+
+ if ((@Attributes {"orientation"} =~ /hor/i) && (defined (@Attributes {"columns"})))
+ { &Error ("Legend definition invalid.\nDo not specify attribute 'columns' with 'orientation:horizontal'.") ; return ; }
+
+ if (@Attributes {"columns"} > 1)
+ {
+ if ((defined (@Attributes {"left"})) && (! defined (@Attributes {"columnwidth"})))
+ { &Error ("Legend attribute 'columnwidth' not defined.\nThis is needed when attribute 'left' is specified.") ; return ; }
+ }
+
+ if (! defined (@Attributes {"orientation"}))
+ { @Attributes {"orientation"} = "ver" ; }
+
+ %Legend = %Attributes ;
+}
+
+sub ParsePeriod
+{
+ if (! defined ($DateFormat))
+ { &Error ("Period definition ambiguous. No (valid) command 'DateFormat' specified in previous lines.") ; return ; }
+
+ if (! ValidAttributes ("Period")) { return ; }
+
+ foreach $attribute (keys %Attributes)
+ {
+ my $attrvalue = @Attributes {$attribute} ;
+
+ if ($DateFormat eq "yyyy")
+ {
+ if ($attrvalue !~ /^\-?\d+$/)
+ { &Error ("Period definition invalid.\nInvalid year '$attrvalue' specified for attribute '$attribute'.") ; return ; }
+ }
+ elsif ($DateFormat eq "x.y")
+ {
+ if (! ($attrvalue =~ /^\-?\d+(?:\.\d+)?$/))
+ { &Error ("Period definition invalid.\nInvalid year '$attrvalue' specified for attribute '$attribute'.") ; return ; }
+ }
+ else
+ {
+ if (($attrvalue =~ /^\d+$/) && ($attrvalue >= 1800) && ($attrvalue <= 2030))
+ {
+ if ($attribute =~ /^From$/i)
+ { $attrvalue = "01/01/" . $attrvalue ; }
+ if ($attribute =~ /^Till$/i)
+ {
+ if ($DateFormat eq "dd/mm/yyyy")
+ { $attrvalue = "31/12/" . $attrvalue ; }
+ else
+ { $attrvalue = "12/31/" . $attrvalue ; }
+ }
+ }
+
+ $ValidDate = &ValidDateFormat ($attrvalue) ;
+ if (! $ValidDate)
+ { &Error ("Period attribute '$attribute' invalid.\n" .
+ "Date does not conform to specified DateFormat '$DateFormat'.") ; return ; }
+ if (substr ($attrvalue,6,4) < 1800)
+ { &Error ("Period attribute '$attribute' invalid. Specify year >= 1800.") ; return ; }
+
+ @Attributes {$attribute} = $attrvalue ;
+ }
+ }
+
+ %Period = %Attributes ;
+}
+
+sub ParsePlotArea
+{
+ if (! &ValidAttributes ("PlotArea")) { return ; }
+
+ &CheckPreset ("PlotArea") ;
+
+ foreach $attribute (@Attributes)
+ {
+ my $attrvalue = @Attributes {$attribute} ;
+ if (! &ValidAbsRel ($attrvalue))
+ { &Error ("PlotArea attribute '$attribute' invalid.\n" .
+ "Specify value as x[.y][px, in, cm, %] examples: '200', '20px', '1.3in', '80%'") ; return ; }
+ }
+
+ if ((@Attributes {"top"} ne "") && (@Attributes {"height"} ne ""))
+ { &Error ("PlotArea attributes 'top' and 'height' are mutually exclusive. Specify only one of them.") ; return ; }
+
+ if ((@Attributes {"right"} ne "") && (@Attributes {"width"} ne ""))
+ { &Error ("PlotArea attributes 'right' and 'width' are mutually exclusive. Specify only one of them.") ; return ; }
+
+ if ((@Attributes {"top"} eq "") && (@Attributes {"height"} eq ""))
+ { &Error ("PlotArea definition incomplete. Either attribute 'top' (advised) or 'height' should be specified") ; return ; }
+
+ if ((@Attributes {"right"} eq "") && (@Attributes {"width"} eq ""))
+ { &Error ("PlotArea definition incomplete. Either attribute 'right' (advised) or 'width' should be specified") ; return ; }
+
+ %PlotArea = %Attributes ;
+}
+
+# command Bars found ?
+# Y | N
+# bar: found ? | bar: found ?
+# Y | N | Y | N
+# validate | previous bar: found? | @Bars contains | previous bar: found?
+# bar:.. | | bar: ? | Y | N
+# | Y | N | | copy | assume
+# | copy | $#Bars .. | Y | N | bar: | bar:---
+# | bar: |== 0 | - | assume | |
+# | | assume bar:--- | | bar:--- | |
+# | |== 1 |
+# | | assume @Bar[0] |
+# | |> 1 |
+# | | err |
+sub ParsePlotData
+{
+ if (defined (@Bars))
+ { $BarsCommandFound = $true ; }
+ else
+ { $BarsCommandFound = $false ; }
+ $prevbar = "" ;
+
+ if ((! (defined ($DateFormat))) || (@Period {"from"} eq "") || (@Axis {"time"} eq ""))
+ {
+ if (! (defined ($DateFormat)))
+ { &Error ("PlotData invalid. No (valid) command 'DateFormat' specified in previous lines.") ; }
+ elsif (@Period {"from"} eq "")
+ { &Error ("PlotData invalid. No (valid) command 'Period' specified in previous lines.") ; }
+ else
+ { &Error ("PlotData invalid. No (valid) command 'TimeAxis' specified in previous lines.") ; }
+
+ &GetData ;
+ while ((! $InputParsed) && (! $NoData))
+ { &GetData ; }
+ return ;
+ }
+
+ &GetData ;
+ if ($NoData)
+ { &Error ("Data expected for command 'PlotData', but line is not indented.\n") ; return ; }
+
+ my ($bar, $at, $from, $till, $color, $bgcolor, $textcolor, $fontsize, $width,
+ $text, $anchor, $align, $shift, $shiftx, $shifty, $mark, $markcolor, $link, $hint) ;
+
+ @PlotDefs {"anchor"} = "middle" ;
+
+ PlotData:
+ while ((! $InputParsed) && (! $NoData))
+ {
+ if (! &ValidAttributes ("PlotData"))
+ { &GetData ; next ;}
+
+ $bar = "" ; # $barset = "" ;
+ $at = "" ; $from = "" ; $till = "" ;
+ $color = "barcoldefault" ; $bgcolor = "" ; $textcolor = "black" ; $fontsize = "S" ; $width = "0.25" ;
+ $text = "" ; $align = "left" ; $shift = "" ; $shiftx = "" ; $shifty = "" ; $anchor = "" ;
+ $mark = "" ; $markcolor = "" ;
+ $link = "" ; $hint = "" ;
+
+ &CheckPreset ("PlotData") ;
+
+ if (defined (@PlotDefs {"bar"})) { $bar = @PlotDefs {"bar"} ; }
+ # if (defined (@PlotDefs {"barset"})) { $barset = @PlotDefs {"barset"} ; }
+ if (defined (@PlotDefs {"color"})) { $color = @PlotDefs {"color"} ; }
+ if (defined (@PlotDefs {"bgcolor"})) { $bgcolor = @PlotDefs {"bgcolor"} ; }
+ if (defined (@PlotDefs {"textcolor"})) { $textcolor = @PlotDefs {"textcolor"} ; }
+ if (defined (@PlotDefs {"fontsize"})) { $fontsize = @PlotDefs {"fontsize"} ; }
+ if (defined (@PlotDefs {"width"})) { $width = @PlotDefs {"width"} ; }
+ if (defined (@PlotDefs {"anchor"})) { $anchor = @PlotDefs {"anchor"} ; }
+ if (defined (@PlotDefs {"align"})) { $align = @PlotDefs {"align"} ; }
+ if (defined (@PlotDefs {"shiftx"})) { $shiftx = @PlotDefs {"shiftx"} ; }
+ if (defined (@PlotDefs {"shifty"})) { $shifty = @PlotDefs {"shifty"} ; }
+ if (defined (@PlotDefs {"mark"})) { $mark = @PlotDefs {"mark"} ; }
+ if (defined (@PlotDefs {"markcolor"})) { $markcolor = @PlotDefs {"markcolor"} ; }
+# if (defined (@PlotDefs {"link"})) { $link = @PlotDefs {"link"} ; }
+# if (defined (@PlotDefs {"hint"})) { $hint = @PlotDefs {"hint"} ; }
+
+ foreach $attribute (keys %Attributes)
+ {
+ my $attrvalue = @Attributes {$attribute} ;
+
+ if ($attribute =~ /^Bar$/i)
+ {
+ if (! ($attrvalue =~ /[a-zA-Z0-9\_]+/))
+ { &Error ("PlotData attribute '$attribute' invalid.\n" .
+ "Use only characters 'a'-'z', 'A'-'Z', '0'-'9', '_'\n") ;
+ &GetData ; next PlotData ; }
+
+ $attrvalue2 = $attrvalue ;
+
+ if ($BarsCommandFound)
+ {
+ if (! &BarDefined ($attrvalue2))
+ { &Error ("PlotData invalid. Bar '$attrvalue' not (properly) defined.") ;
+ &GetData ; next PlotData ; }
+ }
+ else
+ {
+ if (! &BarDefined ($attrvalue2))
+ {
+ if (@Axis {"time"} eq "x")
+ { push @Bars, $attrvalue2 ; }
+ else
+ { unshift @Bars, $attrvalue2 ; }
+ }
+ }
+ $bar = $attrvalue2 ;
+ $prevbar = $bar ;
+ }
+ elsif ($attribute =~ /^BarSet$/i)
+ {
+ if (! ($attrvalue =~ /[a-zA-Z0-9\_]+/))
+ { &Error ("PlotData attribute '$attribute' invalid.\n" .
+ "Use only characters 'a'-'z', 'A'-'Z', '0'-'9', '_'\n") ;
+ &GetData ; next PlotData ; }
+
+ $attrvalue2 = $attrvalue ;
+
+ if ($attrvalue =~ /break/i)
+ { $barndx = 0 ; }
+ elsif ($attrvalue =~ /skip/i)
+ {
+ $barndx ++ ;
+ &BarDefined ($prevbar . "#" . $barndx) ;
+ }
+ else
+ {
+ if ($BarsCommandFound)
+ {
+ if (! &BarDefined ($attrvalue2 . "#1"))
+ { &Error ("PlotData invalid. BarSet '$attrvalue' not (properly) defined with command BarData.") ;
+ &GetData ; next PlotData ; }
+ }
+ $bar = $attrvalue2 ;
+ if ($bar ne $prevbar)
+ { $barndx = 0 ; }
+ $prevbar = $bar ;
+ }
+ }
+ elsif ($attribute =~ /^(?:At|From|Till)$/i)
+ {
+ if ($attrvalue =~ /^Start$/i)
+ { $attrvalue = @Period {"from"} ; }
+ if ($attrvalue =~ /^End$/i)
+ { $attrvalue = @Period {"till"} ; }
+
+ if (! &ValidDateFormat ($attrvalue))
+ {
+ &Error ("PlotData attribute '$attribute' invalid.\n" .
+ "Date '$attrvalue' does not conform to specified DateFormat $DateFormat.") ;
+ &GetData ; next PlotData ; }
+
+ if (! &ValidDateRange ($attrvalue))
+ { &Error ("Plotdata attribute '$attribute' invalid.\n" .
+ "Date '$attrvalue' not within range as specified by command Period.") ;
+
+ &GetData ; next PlotData ; }
+
+ if ($attribute =~ /^At$/i)
+ { $at = $attrvalue ; }
+ elsif ($attribute =~ /^From$/i)
+ { $from = $attrvalue ; }
+ else
+ { $till = $attrvalue ; }
+ }
+# elsif ($attribute =~ /^From$/i)
+# {
+# if ($attrvalue =~ /^Start$/i)
+# { $attrvalue = @Period {"from"} ; }
+
+# if (! &ValidDateFormat ($attrvalue))
+# { &Error ("PlotData invalid.\nDate '$attrvalue' does not conform to specified DateFormat $DateFormat.") ;
+# &GetData ; next PlotData ; }
+
+# if (! &ValidDateRange ($attrvalue))
+# { &Error ("Plotdata attribute 'from' invalid.\n" .
+# "Date '$attrvalue' not within range as specified by command Period.") ;
+# &GetData ; next PlotData ; }
+
+# $from = $attrvalue ;
+# }
+# elsif ($attribute =~ /^Till$/i)
+# {
+# if ($attrvalue =~ /^End$/i)
+# { $attrvalue = @Period {"till"} ; }
+
+# if (! &ValidDateFormat ($attrvalue))
+# { &Error ("PlotData invalid. Date '$attrvalue' does not conform to specified DateFormat $DateFormat.") ;
+# &GetData ; next PlotData ; }
+
+# if (! &ValidDateRange ($attrvalue))
+# { &Error ("Plotdata attribute 'till' invalid.\n" .
+# "Date '$attrvalue' not within range as specified by command Period.") ;
+# &GetData ; next PlotData ; }
+
+# $till = $attrvalue ;
+# }
+ elsif ($attribute =~ /^Color$/i)
+ {
+ if (! &ColorPredefined ($attrvalue))
+ {
+ if (! defined (@Colors {lc ($attrvalue)}))
+ { &Error ("PlotData invalid. Attribute '$attribute' has unknown color '$attrvalue'.\n" .
+ " Specify command 'Color' before this command.") ;
+ &GetData ; next PlotData ; }
+ }
+ if (defined (@Colors {lc ($attrvalue)}))
+ { $color = @Colors { lc ($attrvalue) } ; }
+ else
+ { $color = lc ($attrvalue) ; }
+
+ $color = $attrvalue ;
+ }
+ elsif ($attribute =~ /^BgColor$/i)
+ {
+ if (! &ColorPredefined ($attrvalue))
+ {
+ if (! defined (@Colors {lc ($attrvalue)}))
+ { &Error ("PlotData invalid. Attribute '$attribute' has unknown color '$attrvalue'.\n" .
+ " Specify command 'Color' before this command.") ;
+ &GetData ; next PlotData ; }
+ }
+ if (defined (@Colors {lc ($attrvalue)}))
+ { $bgcolor = @Colors { lc ($attrvalue) } ; }
+ else
+ { $bgcolor = lc ($attrvalue) ; }
+ }
+ elsif ($attribute =~ /^TextColor$/i)
+ {
+ if (! &ColorPredefined ($attrvalue))
+ {
+ if (! defined (@Colors {lc ($attrvalue)}))
+ { &Error ("PlotData invalid. Attribute '$attribute' contains unknown color '$attrvalue'.\n" .
+ " Specify command 'Color' before this command.") ;
+ &GetData ; next PlotData ; }
+ }
+ if (defined (@Colors {lc ($attrvalue)}))
+ { $textcolor = @Colors { lc ($attrvalue) } ; }
+ else
+ { $textcolor = lc ($attrvalue) ; }
+ }
+ elsif ($attribute =~ /^Width$/i)
+ {
+ $width = &Normalize ($attrvalue) ;
+ if ($width > $MaxBarWidth)
+ { $MaxBarWidth = $width ; }
+ }
+ elsif ($attribute =~ /^FontSize$/i)
+ {
+ if (($attrvalue !~ /\d+(?:\.\d)?/) && ($attrvalue !~ /xs|s|m|l|xl/i))
+ { &Error ("PlotData invalid. Specify for attribute '$attribute' a number of XS,S,M,L,XL.") ;
+ &GetData ; next PlotData ; }
+
+ $fontsize = $attrvalue ;
+ if ($fontsize =~ /(?:XS|S|M|L|XL)/i)
+ {
+ if ($fontsize !~ /(?:xs|s|m|l|xl)/i)
+ {
+ if ($fontsize < 6)
+ { &Warning ("TextData attribute 'fontsize' value too low. Font size 6 assumed.\n") ;
+ $fontsize = 6 ; }
+ if ($fontsize > 30)
+ { &Warning ("TextData attribute 'fontsize' value too high. Font size 30 assumed.\n") ;
+ $fontsize = 30 ; }
+ }
+ }
+ }
+ elsif ($attribute =~ /^Anchor$/i)
+ {
+ if (! ($attrvalue =~ /^(?:from|till|middle)$/i))
+ { &Error ("PlotData value '$attribute' invalid. Specify 'from', 'till' or 'middle'.") ;
+ &GetData ; next PlotData ; }
+
+ $anchor = lc ($attrvalue) ;
+ }
+ elsif ($attribute =~ /^Align$/i)
+ {
+ if (! ($attrvalue =~ /^(?:left|right|center)$/i))
+ { &Error ("PlotData value '$attribute' invalid. Specify 'left', 'right' or 'center'.") ;
+ &GetData ; next PlotData ; }
+
+ $align = lc ($attrvalue) ;
+ }
+ elsif ($attribute =~ /^Shift$/i)
+ {
+ $shift = $attrvalue ;
+ $shift =~ s/$hBrO(.*?)$hBrC/$1/ ;
+ $shift =~ s/\s//g ;
+ ($shiftx2,$shifty2) = split (",", $shift) ;
+ if ($shiftx2 ne "")
+ { $shiftx = &Normalize ($shiftx2) ; }
+ if ($shifty2 ne "")
+ { $shifty = &Normalize ($shifty2) ; }
+
+ if (($shiftx < -10) || ($shiftx > 10) || ($shifty < -10) || ($shifty > 10))
+ { &Error ("PlotData invalid. Attribute '$shift', specify value(s) between -1000 and 1000 pixels = -10 and 10 inch.") ;
+ &GetData ; next PlotData ; }
+ }
+ elsif ($attribute =~ /^Text$/i)
+ {
+ $text = &ParseText ($attrvalue) ;
+ $text =~ s/\\n/\n/g ;
+ if ($text =~ /\^/)
+ { &Warning ("TextData attribute 'text' contains ^ (caret).\n" .
+ "Caret symbol will not be translated into tab character (use TextData when tabs are needed)") ; }
+
+# $text=~ s/(\[\[ [^\]]* \n [^\]]* \]\])/&NormalizeWikiLink($1)/gxe ;
+ $text=~ s/(\[\[? [^\]]* \n [^\]]* \]?\])/&NormalizeWikiLink($1)/gxe ;
+ }
+ elsif ($attribute =~ /^Link$/i)
+ {
+ $link = &ParseText ($attrvalue) ;
+ $link = &EncodeURL (&NormalizeURL ($link)) ;
+ }
+# elsif ($attribute =~ /^Hint$/i)
+# {
+# $hint = &ParseText ($attrvalue) ;
+# $hint =~ s/\\n/\n/g ;
+# }
+ elsif ($attribute =~ /^Mark$/i)
+ {
+ $attrvalue =~ s/$hBrO (.*) $hBrC/$1/x ;
+ (@suboptions) = split (",", $attrvalue) ;
+ $mark = @suboptions [0] ;
+ if (! ($mark =~ /^(?:Line|None)$/i))
+ { &Error ("PlotData invalid. Value '$mark' for attribute 'mark' unknown.") ;
+ &GetData ; next PlotData ; }
+
+ if (defined (@suboptions [1]))
+ {
+ $markcolor = @suboptions [1] ;
+
+ if (! &ColorPredefined ($markcolor))
+ {
+ if (! defined (@Colors {lc ($markcolor)}))
+ { &Error ("PlotData invalid. Attribute 'mark': unknown color '$markcolor'.\n" .
+ " Specify command 'Color' before this command.") ;
+ &GetData ; next PlotData ; }
+ }
+ $markcolor = lc ($markcolor) ;
+ }
+ else
+ { $markcolor = "black" ; }
+ }
+ else
+ { &Error ("PlotData invalid. Unknown attribute '$attribute' found.") ;
+ &GetData ; next PlotData ; }
+ }
+
+# if ($text =~ /\[\[.*\[\[/s)
+# { &Error ("PlotData invalid. Text segment '$text' contains more than one wiki link. Only one allowed.") ;
+# &GetData ; next PlotData ; }
+
+# if (($text ne "") || ($link ne ""))
+# { ($text, $link, $hint) = &ProcessWikiLink ($text, $link, $hint) ; }
+
+ $shift = $shiftx . "," . $shifty ;
+
+ if ($MaxBarWidth eq "")
+ { $MaxBarWidth = $width - 0.001 ; }
+
+ if ($bar ne "")
+ {
+ if (! defined (@BarLegend {lc($bar)}))
+ { @BarLegend {lc($bar)} = $bar ; }
+ if (! defined (@BarWidths {$bar}))
+ { @BarWidths {$bar} = $width ; } # was 0 ??
+ }
+
+ if (($at eq "") && ($from eq "") && ($till eq "")) # upd defaults
+ {
+ if ($bar ne "") { @PlotDefs {"bar"} = $bar ; }
+# if ($barset ne "") { @PlotDefs {"barset"} = $barset ; }
+ if ($color ne "") { @PlotDefs {"color"} = $color ; }
+ if ($bgcolor ne "") { @PlotDefs {"bgcolor"} = $bgcolor ; }
+ if ($textcolor ne "") { @PlotDefs {"textcolor"} = $textcolor ; }
+ if ($fontsize ne "") { @PlotDefs {"fontsize"} = $fontsize ; }
+ if ($width ne "") { @PlotDefs {"width"} = $width ; }
+ if ($anchor ne "") { @PlotDefs {"anchor"} = $anchor ; }
+ if ($align ne "") { @PlotDefs {"align"} = $align ; }
+ if ($shiftx ne "") { @PlotDefs {"shiftx"} = $shiftx ; }
+ if ($shifty ne "") { @PlotDefs {"shifty"} = $shifty ; }
+ if ($mark ne "") { @PlotDefs {"mark"} = $mark ; }
+ if ($markcolor ne "") { @PlotDefs {"markcolor"} = $markcolor ; }
+# if ($link ne "") { @PlotDefs {"link"} = $link ; }
+# if ($hint ne "") { @PlotDefs {"hint"} = $hint ; }
+ &GetData ; next PlotData ;
+ }
+
+ if ($bar eq "")
+ {
+ if ($prevbar ne "")
+ { $bar = $prevbar ; }
+ else
+ {
+# if ($BarsCommandFound)
+# {
+ if ($#Bars > 0)
+ { &Error ("PlotData invalid. Specify attribute 'bar'.") ;
+ &GetData ; next PlotData ; }
+ elsif ($#Bars == 0)
+ {
+ $bar = @Bars [0] ;
+ &Info ($data, "PlotData incomplete. Attribute 'bar' missing, value '" . @Bars [0] . "' assumed.") ;
+ }
+ else
+ { $bar = "1" ; }
+# }
+# else
+# {
+# if ($#Bars > 0)
+# { &Error ("PlotData invalid. Attribute 'bar' missing.") ;
+# &GetData ; next PlotData ; }
+# elsif ($#Bars == 0)
+# {
+# $bar = @Bars [0] ;
+# &Info ($data, "PlotData incomplete. Attribute 'bar' missing, value '" . @Bars [0] . "' assumed.") ;
+# }
+# else { $bar = "1" ; }
+# }
+ $prevbar = $bar ;
+ }
+ }
+
+ if (&BarDefined ($bar . "#1")) # bar is actually a bar set
+ {
+ if (($from ne "") || ($at ne "") || ($text eq " ")) # data line ?
+ {
+ $barndx++ ;
+ if (! &BarDefined ($bar . "#" . $barndx))
+ { $barndx = 1 ; }
+ $bar = $bar . "#" . $barndx ;
+ # $text = $bar ;
+ }
+ }
+
+ if (($at ne "") && (($from ne "") || ($till ne "")))
+ { &Error ("PlotData invalid. Attributes 'at' and 'from/till' are mutually exclusive.") ;
+ &GetData ; next PlotData ; }
+
+ if ((($from eq "") && ($till ne "")) || (($from ne "") && ($till eq "")))
+ { &Error ("PlotData invalid. Specify attribute 'at' or 'from' + 'till'.") ;
+ &GetData ; next PlotData ; }
+
+
+ if ($at ne "")
+ {
+ if ($text ne "")
+ {
+ if ($align eq "")
+ { &Error ("PlotData invalid. Attribute 'align' missing.") ;
+ &GetData ; next PlotData ; }
+ if ($fontsize eq "")
+ { &Error ("PlotData invalid. Attribute '[font]size' missing.") ;
+ &GetData ; next PlotData ; }
+ if ($text eq "")
+ { &Error ("PlotData invalid. Attribute 'text' missing.") ;
+ &GetData ; next PlotData ; }
+ }
+ }
+ else
+ {
+ if (($text ne "") && ($anchor eq ""))
+ { &Error ("PlotData invalid. Attribute 'anchor' missing.") ;
+ &GetData ; next PlotData ; }
+ if ($color eq "")
+ { &Error ("PlotData invalid. Attribute 'color' missing.") ;
+ &GetData ; next PlotData ; }
+ if ($width eq "")
+ { &Error ("PlotData invalid. Attribute 'width' missing.") ;
+ &GetData ; next PlotData ; }
+ }
+
+ if ($from ne "")
+ {
+ if (($link ne "") && ($hint eq ""))
+ { $hint = &ExternalLinkToHint ($link) ; }
+
+ if (($link ne "") || ($hint ne ""))
+ { $MapPNG = $true ; }
+ if ($link ne "")
+ { $MapSVG = $true ; }
+
+ push @PlotBars, sprintf ("%6.3f,%s,%s,%s,%s,%s,%s,\n", $width, $bar, $from, $till, lc ($color),$link,$hint) ;
+ if ($width > @BarWidths {$bar})
+ { @BarWidths {$bar} = $width ; }
+
+ if ($text ne "")
+ {
+ if ($anchor eq "from")
+ { $at = $from ; }
+ elsif ($anchor eq "till")
+ { $at = $till ; }
+ else
+ { $at = &DateMedium ($from, $till) ; }
+ }
+
+ if (($mark ne "") && ($mark !~ /none/i))
+ {
+ push @PlotLines, sprintf ("%s,%s,%s,%s,,,\n", $bar, $from, $from, lc ($markcolor)) ;
+ push @PlotLines, sprintf ("%s,%s,%s,%s,,,\n", $bar, $till, $till, lc ($markcolor)) ;
+ $mark = "" ;
+ }
+ }
+
+ if ($at ne "")
+ {
+ if (($mark ne "") && ($mark !~ /none/i))
+ { push @PlotLines, sprintf ("%s,%s,%s,%s,,,\n", $bar, $at, $at, lc ($markcolor)) ; }
+
+ if ($text ne "")
+ {
+ my $textdetails = "" ;
+
+ if ($link ne "")
+ {
+ if ($text =~ /\[.*\]/)
+ {
+ &Warning ("PlotData contains implicit link(s) in attribute 'text' and explicit attribute 'link'. " .
+ "Implicit link(s) ignored.") ;
+ $text =~ s/\[+ (?:[^\|]* \|)? ([^\]]*) \]+/$1/gx ;
+ }
+ if ($hint eq "")
+ { $hint = &ExternalLinkToHint ($link) ; }
+ }
+
+ if ($anchor eq "")
+ { $anchor = "middle" ; }
+ if ($align eq "")
+ { $align = "center" ; }
+ if ($color eq "")
+ { $color = "black" ; }
+ if ($fontsize eq "")
+ { $fontsize = "S" ; }
+ if ($adjust eq "")
+ { $adjust = "0,0" ; }
+
+# $textdetails = " textdetails: align=$align size=$size" ;
+# if ($textcolor eq "")
+# { $textcolor = "black" ; }
+# if ($color ne "")
+# { $textdetails .= " color=$textcolor" ; }
+
+# my ($xpos, $ypos) ;
+# my $barcnt = 0 ;
+# for ($b = 0 ; $b <= $#Bars ; $b++)
+# {
+# if (lc(@Bars [$b]) eq lc($bar))
+# { $barcnt = ($b + 1) ; last ; }
+# }
+
+# if (@Axis {"time"} eq "x")
+# { $xpos = "$at(s)" ; $ypos = "[$barcnt](s)" ; }
+# else
+# { $ypos = "$at(s)" ; $xpos = "[$barcnt](s)" ; }
+
+# if ($shift ne "")
+# {
+# my ($shiftx, $shifty) = split (",", $shift) ;
+# if ($shiftx > 0)
+# { $xpos .= "+$shiftx" ; }
+# if ($shiftx < 0)
+# { $xpos .= "$shiftx" ; }
+# if ($shifty > 0)
+# { $ypos .= "+$shifty" ; }
+# if ($shifty < 0)
+# { $ypos .= "$shifty" ; }
+# }
+
+ $text =~ s/\,/\#\%\$/g ;
+ $link =~ s/\,/\#\%\$/g ;
+ $hint =~ s/\,/\#\%\$/g ;
+ $shift =~ s/\,/\#\%\$/g ;
+ $textcolor =~ s/\,/\#\%\$/g ;
+ push @PlotText, sprintf ("%s,%s,%s,%s,%s,%s,%s,%s,%s", $at, $bar, $text, $textcolor, $fontsize, $align, $shift, $link, $hint) ;
+ }
+ }
+
+ &GetData ;
+ }
+
+ if ((! $BarsCommandFound) && ($#Bars > 1))
+ { &Info2 ("PlotBars definition: no (valid) command 'BarData' found in previous lines.\nBars will presented in order of appearance in PlotData.") ; }
+
+ $maxwidth = 0 ;
+ foreach $key (keys %BarWidths)
+ {
+ if (@BarWidths {$key} == 0)
+ { &Warning ("PlotData incomplete. No bar width defined for bar '$key', assume width from widest bar (used for line marks).") ; }
+ elsif (@BarWidths {$key} > $maxwidth)
+ { $maxwidth = @BarWidths {$key} ; }
+ }
+ foreach $key (keys %BarWidths)
+ {
+ if (@BarWidths {$key} == 0)
+ { @BarWidths {$key} = $maxwidth ; }
+ }
+}
+
+sub ParsePreset
+{
+ if (! $firstcmd)
+ { &Error ("Specify 'Preset' command before any other commands, if desired at all.\n") ; return ; }
+
+ $preset = @Attributes {"single"} ;
+ if ($preset !~ /^(?:TimeVertical_OneBar_UnitYear|TimeHorizontal_AutoPlaceBars_UnitYear)$/i)
+ { &Error ("Preset value invalid.\n" .
+ " At the moment two presets are available:\n" .
+ " TimeVertical_OneBar_UnitYear and TimeHorizontal_AutoPlaceBars_UnitYear\n" .
+ " See also meta.wikipedia.org/wiki/EasyTimeline/Presets") ; return ; }
+
+ $Preset = $preset ;
+
+ if ($Preset =~ /^TimeVertical_OneBar_UnitYear/i)
+ {
+ $DateFormat = "yyyy" ;
+ $AlignBars = "early" ;
+ @Axis {"format"} = "yyyy" ;
+ @Axis {"time"} = "y" ;
+ @PlotArea {"left"} = 45 ;
+ @PlotArea {"right"} = 10 ;
+ @PlotArea {"top"} = 10 ;
+ @PlotArea {"bottom"} = 10 ;
+ push @PresetList, "PlotArea|+|left|" . @PlotArea {"left"} ;
+ push @PresetList, "PlotArea|+|right|" . @PlotArea {"right"};
+ push @PresetList, "PlotArea|+|top|" . @PlotArea {"top"} ;
+ push @PresetList, "PlotArea|+|bottom|" . @PlotArea {"bottom"} ;
+ push @PresetList, "PlotArea|-|width" ;
+ push @PresetList, "PlotArea|-|height" ;
+ push @PresetList, "Dateformat|-||yyyy" ;
+ push @PresetList, "TimeAxis|=|format|" . @Axis {"format"} ;
+ push @PresetList, "TimeAxis|=|orientation|vertical" ;
+ push @PresetList, "ScaleMajor|=|unit|year" ;
+ push @PresetList, "ScaleMinor|=|unit|year" ;
+ push @PresetList, "AlignBars|=||early" ;
+ push @PresetList, "PlotData|+|mark|" . $hBrO . "line,white" . $hBrC ;
+ push @PresetList, "PlotData|+|align|left" ;
+ push @PresetList, "PlotData|+|fontsize|S" ;
+ push @PresetList, "PlotData|+|width|20" ;
+ push @PresetList, "PlotData|+|shift|" . $hBrO . "20,0" . $hBrC ;
+ }
+ elsif ($Preset =~ /TimeHorizontal_AutoPlaceBars_UnitYear/i)
+ {
+ $DateFormat = "yyyy" ;
+ $AlignBars = "justify" ;
+ @Axis {"format"} = "yyyy" ;
+ @Axis {"time"} = "x" ;
+ @PlotArea {"left"} = 25 ;
+ @PlotArea {"right"} = 25 ;
+ @PlotArea {"top"} = 15 ;
+ @PlotArea {"bottom"} = 30 ;
+ @Image {"height"} = "auto" ;
+ @Image {"barinc"} = 20 ;
+ @BackgroundColors {"canvas"} = "gray(0.7)" ;
+ @Legend {"orientation"} = "ver" ;
+ @Legend {"left"} = @PlotArea {"left"}+10 ;
+ @Legend {"top"} = @PlotArea {"bottom"}+100 ;
+ &StoreColor ("canvas", &EncodeInput ("gray(0.7)"), "") ;
+ &StoreColor ("grid1", &EncodeInput ("gray(0.4)"), "") ;
+ &StoreColor ("grid2", &EncodeInput ("gray(0.2)"), "") ;
+ push @PresetList, "ImageSize|=|height|auto" ;
+ push @PresetList, "ImageSize|+|barincrement|20" ;
+ push @PresetList, "PlotArea|+|left|" . @PlotArea {"left"} ;
+ push @PresetList, "PlotArea|+|right|" . @PlotArea {"right"};
+ push @PresetList, "PlotArea|+|top|" . @PlotArea {"top"} ;
+ push @PresetList, "PlotArea|+|bottom|" . @PlotArea {"bottom"} ;
+ push @PresetList, "PlotArea|-|width" ;
+ push @PresetList, "PlotArea|-|height" ;
+ push @PresetList, "Dateformat|-||yyyy" ;
+ push @PresetList, "TimeAxis|=|format|" . @Axis {"format"} ;
+ push @PresetList, "TimeAxis|=|orientation|horizontal" ;
+ push @PresetList, "ScaleMajor|=|unit|year" ;
+ push @PresetList, "ScaleMajor|+|grid|grid1" ;
+ push @PresetList, "ScaleMinor|=|unit|year" ;
+ push @PresetList, "AlignBars|=||justify" ;
+ push @PresetList, "Legend|+|orientation|" . @Legend {"orientation"} ;
+ push @PresetList, "Legend|+|left|" . @Legend {"left"} ;
+ push @PresetList, "Legend|+|top|" . @Legend {"top"} ;
+ push @PresetList, "PlotData|+|align|left" ;
+ push @PresetList, "PlotData|+|anchor|from" ;
+ push @PresetList, "PlotData|+|fontsize|M" ;
+ push @PresetList, "PlotData|+|width|15" ;
+ push @PresetList, "PlotData|+|textcolor|black" ;
+ push @PresetList, "PlotData|+|shift|" . $hBrO . "4,-6" . $hBrC ;
+ }
+}
+
+sub ParseScale
+{
+ my ($scale) ;
+
+ if ($Command =~ /ScaleMajor/i)
+ { $scale .= 'Major' ; }
+ else
+ { $scale .= 'Minor' ; }
+
+ if (! ValidAttributes ("Scale" . $scale)) { return ; }
+
+ &CheckPreset (Scale . $scale) ;
+
+ @Scales {$scale} = $true ;
+
+ foreach $attribute (keys %Attributes)
+ {
+ my $attrvalue = @Attributes {$attribute} ;
+
+ if ($attribute =~ /Grid/i) # preferred gridcolor instead of grid, grid allowed for compatability
+ {
+ if ((! &ColorPredefined ($attrvalue)) && (! defined (@Colors {lc ($attrvalue)})))
+ { &Error ("Scale attribute '$attribute' invalid. Unknown color '$attrvalue'.\n" .
+ " Specify command 'Color' before this command.") ; return ; }
+ @Attributes {$scale . " grid"} = $attrvalue ;
+ delete (@Attributes {"grid"}) ;
+ }
+ elsif ($attribute =~ /Text/i)
+ {
+ $attrvalue =~ s/\~/\\n/g ;
+ $attrvalue =~ s/^\"//g ;
+ $attrvalue =~ s/\"$//g ;
+ @Attributes {$scale . " stubs"} = $attrvalue ;
+ }
+ elsif ($attribute =~ /Unit/i)
+ {
+ if ($DateFormat eq "yyyy")
+ {
+ if (! ($attrvalue =~ /^(?:year|years)$/i))
+ { &Error ("Scale attribute '$attribute' invalid. DateFormat 'yyyy' implies 'unit:year'.") ; return ; }
+ }
+ else
+ {
+ if (! ($attrvalue =~ /^(?:year|month|day)s?$/i))
+ { &Error ("Scale attribute '$attribute' invalid. Specify year, month or day.") ; return ; }
+ }
+ $attrvalue =~ s/s$// ;
+ @Attributes {$scale . " unit"} = $attrvalue ;
+ delete (@Attributes {"unit"}) ;
+ }
+ elsif ($attribute =~ /Increment/i)
+ {
+ if ((! ($attrvalue =~ /^\d+$/i)) || ($attrvalue == 0))
+ { &Error ("Scale attribute '$attribute' invalid. Specify positive integer.") ; return ; }
+ @Attributes {$scale . " inc"} = $attrvalue ;
+ delete (@Attributes {"increment"}) ;
+ }
+ elsif ($attribute =~ /Start/i)
+ {
+ if (! (defined ($DateFormat)))
+ { &Error ("Scale attribute '$attribute' invalid.\n" .
+ "No (valid) command 'DateFormat' specified in previous lines.") ; return ; }
+
+ if (($DateFormat eq "dd/mm/yyyy") || ($DateFormat eq "mm/dd/yyyy"))
+ {
+ if (($attrvalue =~ /^\d+$/) && ($attrvalue >= 1800) && ($attrvalue <= 2030))
+ { $attrvalue = "01/01/" . $attrvalue ; }
+ }
+
+ if (! &ValidDateFormat ($attrvalue))
+ { &Error ("Scale attribute '$attribute' invalid.\n" .
+ "Date does not conform to specified DateFormat '$DateFormat'.") ; return ; }
+
+ if (($DateFormat =~ /\d\d\/\d\d\/\d\d\d\d/) && (substr ($attrvalue,6,4) < 1800))
+ { &Error ("Scale attribute '$attribute' invalid.\n" .
+ " Specify year >= 1800.") ; return ; }
+
+ if (! &ValidDateRange ($attrvalue))
+ { &Error ("Scale attribute '$attribute' invalid.\n" .
+ "Date '$attrvalue' not within range as specified by command Period.") ; return ; }
+
+ @Attributes {$scale . " start"} = $attrvalue ;
+ delete (@Attributes {"start"}) ;
+ }
+ if ($DateFormat eq "yyyy") { @Attributes {$scale . " unit"} = "year" ; }
+ }
+
+ foreach $attribute (keys %Attributes)
+ { @Scales {$attribute} = @Attributes {$attribute} ; }
+}
+
+sub ParseTextData
+{
+ &GetData ;
+ if ($NoData)
+ { &Error ("Data expected for command 'TextData', but line is not indented.\n") ; return ; }
+
+ my ($pos, $tabs, $fontsize, $lineheight, $textcolor, $text, $link, $hint) ;
+
+ TextData:
+ while ((! $InputParsed) && (! $NoData))
+ {
+ if (! &ValidAttributes ("TextData"))
+ { &GetData ; next ;}
+
+ &CheckPreset ("TextData") ;
+
+ $pos = "" ; $tabs = "" ; $fontsize = "" ; $lineheight = "" ; $textcolor = "" ; $link = "" ; $hint = "" ;
+
+ if (defined (@TextDefs {"tabs"})) { $tabs = @TextDefs {"tabs"} ; }
+ if (defined (@TextDefs {"fontsize"})) { $fontsize = @TextDefs {"fontsize"} ; }
+ if (defined (@TextDefs {"lineheight"})) { $lineheight = @TextDefs {"lineheight"} ; }
+ if (defined (@TextDefs {"textcolor"})) { $textcolor = @TextDefs {"textcolor"} ; }
+
+ my $data2 = $data ;
+ ($data2, $text) = &ExtractText ($data2) ;
+ @Attributes = split (" ", $data2) ;
+
+ foreach $attribute (keys %Attributes)
+ {
+ my $attrvalue = @Attributes {$attribute} ;
+
+ if ($attribute =~ /^FontSize$/i)
+ {
+ if (($attrvalue !~ /\d+(?:\.\d)?/) && ($attrvalue !~ /^(?:xs|s|m|l|xl)$/i))
+ { &Error ("TextData invalid. Attribute '$attribute': specify number of XS,S,M,L,XL.") ;
+ &GetData ; next TextData ; }
+
+ $fontsize = $attrvalue ;
+
+ if ($fontsize !~ /^(?:xs|s|m|l|xl)$/i)
+ {
+ if ($fontsize < 6)
+ { &Warning ("TextData attribute 'fontsize' value too low. Font size 6 assumed.\n") ;
+ $fontsize = 6 ; }
+ if ($fontsize > 30)
+ { &Warning ("TextData attribute 'fontsize' value too high. Font size 30 assumed.\n") ;
+ $fontsize = 30 ; }
+ }
+ }
+ elsif ($attribute =~ /^LineHeight$/i)
+ {
+ $lineheight = &Normalize ($attrvalue) ;
+ if (($lineheight < -0.4) || ($lineheight > 0.4))
+ {
+ if (! $bypass)
+ { &Error ("TextData attribute 'lineheight' invalid.\n" .
+ "Specify value up to 40 pixels = 0.4 inch\n" .
+ "Run with option -b (bypass checks) when this is correct.\n") ; }
+ }
+ }
+ elsif ($attribute =~ /^Pos$/i)
+ {
+ $attrvalue =~ s/\s*$hBrO (.*) $hBrC\s*/$1/x ;
+ ($posx,$posy) = split (",", $attrvalue) ;
+ $posx = &Normalize ($posx) ;
+ $posy = &Normalize ($posy) ;
+ $pos = "$posx,$posy" ;
+ }
+ elsif ($attribute =~ /^Tabs$/i)
+ {
+ $tabs = $attrvalue ;
+ }
+ elsif ($attribute =~ /^(?:Color|TextColor)$/i)
+ {
+ if (! &ColorPredefined ($attrvalue))
+ {
+ if (! defined (@Colors {lc ($attrvalue)}))
+ { &Error ("TextData invalid. Attribute '$attribute' contains unknown color '$attrvalue'.\n" .
+ " Specify command 'Color' before this command.") ;
+ &GetData ; next TextData ; }
+ }
+ if (defined (@Colors {lc ($attrvalue)}))
+ { $textcolor = @Colors { lc ($attrvalue) } ; }
+ else
+ { $textcolor = lc ($attrvalue) ; }
+ }
+ elsif ($attribute =~ /^Text$/i)
+ {
+ $text = $attrvalue ;
+ $text =~ s/\\n/~/gs ;
+ if ($text =~ /\~/)
+ { &Warning ("TextData attribute 'text' contains ~ (tilde).\n" .
+ "Tilde will not be translated into newline character (only in PlotData)") ; }
+
+ }
+ elsif ($attribute =~ /^Link$/i)
+ {
+ $link = &ParseText ($attrvalue) ;
+ $link = &EncodeURL (&NormalizeURL ($link)) ;
+ }
+ }
+
+ if ($fontsize eq "")
+ { $fontsize = "S" ; }
+
+ if ($lineheight eq "")
+ {
+ if ($fontsize =~ /^(?:XS|S|M|L|XL)$/i)
+ {
+ if ($fontsize =~ /XS/i) { $lineheight = 0.11 ; }
+ elsif ($fontsize =~ /S/i) { $lineheight = 0.13 ; }
+ elsif ($fontsize =~ /M/i) { $lineheight = 0.155 ; }
+ elsif ($fontsize =~ /XL/i) { $lineheight = 0.24 ; }
+ else { $lineheight = 0.19 ; }
+ }
+ else
+ {
+ $lineheight = sprintf ("%.2f", (($fontsize * 1.2) / 100)) ;
+ if ($lineheight < $fontsize/100 + 0.02)
+ { $lineheight = $fontsize/100 + 0.02 ; }
+ }
+ }
+
+ if ($textcolor eq "")
+ { $textcolor = "black" ; }
+
+ if ($pos eq "")
+ {
+ $pos = @TextDefs {"pos"} ;
+ ($posx,$posy) = split (",", $pos) ;
+ $posy -= $lineheight ;
+ if ($posy < 0)
+ { $posy = 0 ; }
+ $pos = "$posx,$posy" ;
+ @TextDefs {"pos"} = $pos ;
+ }
+
+# if ($link ne "")
+# { ($text, $link, $hint) = &ProcessWikiLink ($text, $link, $hint) ; }
+
+ if ($text eq "") # upd defaults
+ {
+ if ($pos ne "") { @TextDefs {"pos"} = $pos ; }
+ if ($tabs ne "") { @TextDefs {"tabs"} = $tabs ; }
+ if ($fontsize ne "") { @TextDefs {"fontsize"} = $fontsize ; }
+ if ($textcolor ne "") { @TextDefs {"textcolor"} = $textcolor ; }
+ if ($lineheight ne "") { @TextDefs {"lineheight"} = $lineheight ; }
+ &GetData ; next TextData ;
+ }
+
+ if ($link ne "")
+ {
+ if ($text =~ /\[.*\]/)
+ {
+ &Warning ("TextData contains implicit link(s) in attribute 'text' and explicit attribute 'link'.\n" .
+ "Implicit link(s) ignored.") ;
+ $text =~ s/\[+ (?:[^\|]* \|)? ([^\]]*) \]+/$1/gx ;
+ }
+
+ if ($hint eq "")
+ { $hint = &ExternalLinkToHint ($link) ; }
+ }
+
+ if ($text =~ /\[ [^\]]* \^ [^\]]* \]/x)
+ {
+ &Warning ("TextData attribute 'text' contains tab character (^) inside implicit link ([[..]]). Tab ignored.") ;
+ $text =~ s/(\[+ [^\]]* \^ [^\]]* \]+)/($a = $1), ($a =~ s+\^+ +g), $a/gxe ;
+ }
+
+ if (defined ($tabs) && ($tabs ne ""))
+ {
+ $tabs =~ s/^\s*$hBrO (.*) $hBrC\s*$/$1/x ;
+ @Tabs = split (",", $tabs) ;
+ foreach $tab (@Tabs)
+ {
+ $tab =~ s/\s* (.*) \s*$/$1/x ;
+ if (! ($tab =~ /\d+\-(?:center|left|right)$/))
+ { &Error ("Specify attribute 'tabs' as 'n-a,n-a,n-a,.. where n = numeric value, a = left|right|center.") ;
+ while ((! $InputParsed) && (! $NoData)) { &GetData ; } return ; }
+ }
+
+ @Text = split ('\^', $text) ;
+ if ($#Text > $#Tabs + 1)
+ { &Error ("TextData invalid. " . $#Text . " tab characters ('^') in text, only " . ($#Tabs+1) . " tab(s) defined.") ;
+ &GetData ; next TextData ; }
+ }
+
+ &WriteText ("^", "", 0, $posx, $posy, $text, $textcolor, $fontsize, "left", $link, $hint, $tabs) ;
+
+ &GetData ;
+ }
+}
+
+sub ParseTimeAxis
+{
+ if (! &ValidAttributes ("TimeAxis")) { return ; }
+
+ &CheckPreset ("TimeAxis") ;
+
+ foreach $attribute (keys %Attributes)
+ {
+ my $attrvalue = @Attributes {$attribute} ;
+
+
+ if ($attribute =~ /Format/i)
+ {
+ if ($attrvalue =~ /^yy$/i)
+ { &Error ("TimeAxis attribute '$attribute' valid but not available, waiting for bug fix.\n" .
+ "Please specify 'format:yyyy' instead of 'format:yy'.") ; return ; }
+
+ if ($DateFormat eq "yyyy")
+ {
+ if (! ($attrvalue =~ /^(?:yy|yyyy)$/i))
+ { &Error ("TimeAxis attribute '$attribute' invalid.\n" .
+ "DateFormat 'yyyy' implies 'format:yy' or 'format:yyyy'.") ; return ; }
+ }
+ }
+
+ elsif ($attribute =~ /Order/i)
+ {
+ if ($attrvalue !~ /^(?:normal|reverse)$/i)
+ { &Error ("TimeAxis attribute '$attribute' invalid.\n" .
+ " Specify 'order:normal' (default) or 'order:reverse'\n" .
+ " normal =\n" .
+ " vertical axis: highest date on top,\n" .
+ " horizontal axis: highest date at right side\n" ) ; return ; }
+
+ if (($attrvalue =~ /reverse/i) && ($DateFormat ne "yyyy"))
+ { &Error ("TimeAxis attribute '$attribute' invalid.\n" .
+ " 'order:reverse' is only possible with DateFormat=yyyy (sorry)\n") ; return ; }
+
+ @Attributes {"order"} = lc ($attrvalue) ;
+ }
+
+ elsif ($attribute =~ /Orientation/i)
+ {
+ if ($attrvalue =~ /^hor(?:izontal)?$/i)
+ { @Attributes {"time"} = "x" ; }
+ elsif ($attrvalue =~ /^ver(?:tical)?$/i)
+ { @Attributes {"time"} = "y" ; }
+ else
+ { &Error ("TimeAxis attribute '$attribute' invalid.\n" .
+ "Specify hor[izontal] or ver[tical]") ; return ; }
+ delete (@Attributes {"orientation"}) ;
+ }
+ }
+
+ if (! defined (@Attributes {"format"}))
+ { @Attributes {"format"} = "yyyy" ; }
+
+ %Axis = %Attributes ;
+}
+
+sub ParseUnknownCommand
+{
+ $name = $Command ;
+ $name =~ s/[^a-zA-Z].*$// ;
+ &Error ("Command '$name' unknown.") ;
+}
+
+sub RemoveSpaces
+{
+ my $text = shift ;
+ $text =~ s/\s//g ;
+ return ($text) ;
+}
+
+sub DetectMissingCommands
+{
+ if (! defined (%Image)) { &Error2 ("Command ImageSize missing or invalid") ; }
+ if (! defined (%PlotArea)) { &Error2 ("Command PlotArea missing or invalid") ; }
+ if (! defined ($DateFormat)) { &Error2 ("Command DateFormat missing or invalid") ; }
+ if (! defined (@Axis {"time"})) { &Error2 ("Command TimeAxis missing or invalid") ; }
+
+ if ((@Image {"width"} =~ /auto/i) && (@Axis {"time"} =~ /x/i))
+ { &Error2 ("ImageSize value 'width:auto' only allowed with TimeAxis value 'orientation:vertical'") ; }
+ if ((@Image {"height"} =~ /auto/i) && (@Axis {"time"} =~ /y/i))
+ { &Error2 ("ImageSize value 'height:auto' only allowed with TimeAxis value 'orientation:horizontal'") ; }
+}
+
+sub Normalize
+{
+ my $number = shift ;
+ my $reference = shift ;
+ my ($val, $dim) ;
+
+ if (($number eq "") || ($number =~ /auto/i))
+ { return ($number) ; }
+
+ $val = $number ; $val =~ s/[^\d\.\-].*$//g ;
+ $dim = $number ; $dim =~ s/\d//g ;
+ if ($dim =~ /in/i) { $number = $val ; }
+ elsif ($dim =~ /cm/i) { $number = $val / 2.54 ; }
+ elsif ($dim =~ /%/) { $number = $reference * $val / 100 ; }
+ else { $number = $val / 100 ; }
+ return (sprintf ("%.3f", $number)) ;
+}
+
+sub ValidateAndNormalizeDimensions
+{
+ my ($val, $dim) ;
+
+ if (@Image {"width"} =~ /auto/i)
+ {
+ foreach $attribute ("width","left","right")
+ { if (@PlotArea {$attribute} =~ /\%/)
+ { &Error2 ("You specified 'ImageSize = width:auto'.\n" .
+ " This implies absolute values in PlotArea attributes 'left', 'right' and/or 'width' (no \%).\n") ; return ; }
+ }
+
+ if ((@PlotArea {"width"} ne "") || (@PlotArea {"left"} eq "") || (@PlotArea {"right"} eq ""))
+ { &Error2 ("You specified 'ImageSize = width:auto'.\n" .
+ " This implies 'PlotArea = width:auto'.\n" .
+ " Instead of 'width' specify plot margins with PlotArea attributes 'left' and 'right'.\n") ; return ; }
+ }
+
+
+ if (@Image {"height"} =~ /auto/i)
+ {
+ foreach $attribute ("height","top","bottom")
+ { if (@PlotArea {$attribute} =~ /\%/)
+ { &Error2 ("You specified 'ImageSize = height:auto'.\n" .
+ " This implies absolute values in PlotArea attributes 'top', 'bottom' and/or 'height' (no \%).\n") ; return ; }
+ }
+
+ if ((@PlotArea {"height"} ne "") || (@PlotArea {"top"} eq "") || (@PlotArea {"bottom"} eq ""))
+ { &Error2 ("You specified 'ImageSize = height:auto'.\n" .
+ " This implies 'PlotArea = height:auto'.\n" .
+ " Instead of 'height' specify plot margins with PlotArea attributes 'top' and 'bottom'.\n") ; return ; }
+ }
+
+ @Image {"width"} = &Normalize (@Image {"width"}) ;
+ @Image {"height"} = &Normalize (@Image {"height"}) ;
+ @Image {"barinc"} = &Normalize (@Image {"barinc"}) ;
+ @PlotArea {"width"} = &Normalize (@PlotArea {"width"}, @Image {"width"}) ;
+ @PlotArea {"height"} = &Normalize (@PlotArea {"height"}, @Image {"height"}) ;
+ @PlotArea {"left"} = &Normalize (@PlotArea {"left"}, @Image {"width"}) ;
+ @PlotArea {"right"} = &Normalize (@PlotArea {"right"}, @Image {"width"}) ;
+ @PlotArea {"bottom"} = &Normalize (@PlotArea {"bottom"}, @Image {"height"}) ;
+ @PlotArea {"top"} = &Normalize (@PlotArea {"top"}, @Image {"height"}) ;
+
+ if (@Image {"width"} =~ /auto/i)
+ {
+ @PlotArea {"width"} = $#Bars * @Image {"barinc"} ;
+ @Image {"width"} = @PlotArea {"left"} + @PlotArea {"width"} + @PlotArea {"right"} ;
+ }
+
+ elsif (@Image {"height"} =~ /auto/i)
+ {
+ @PlotArea {"height"} = $#Bars * @Image {"barinc"} ;
+ @Image {"height"} = @PlotArea {"top"} + @PlotArea {"height"} + @PlotArea {"bottom"} ;
+ }
+
+ if (@PlotArea {"right"} ne "")
+ { @PlotArea {"width"} = @Image {"width"} - @PlotArea {"left"} - @PlotArea {"right"} ; }
+
+ if (@PlotArea {"top"} ne "")
+ { @PlotArea {"height"} = @Image {"height"} - @PlotArea {"top"} - @PlotArea {"bottom"} ; }
+
+ if ((@Image {"width"} > 16) || (@Image {"height"} > 20))
+ {
+ if (! $bypass)
+ { &Error2 ("Maximum image size is 1600x2000 pixels = 16x20 inch\n" .
+ " Run with option -b (bypass checks) when this is correct.\n") ; return ; }
+ }
+
+ if ((@Image {"width"} < 0.25) || (@Image {"height"} < 0.25))
+ {
+ &Error2 ("Minimum image size is 25x25 pixels = 0.25x0.25 inch\n") ;
+ return ;
+ }
+
+ if (@PlotArea {"width"} > @Image {"width"})
+ { &Error2 ("Plot width larger than image width. Please adjust.\n") ; return ; }
+
+ if (@PlotArea {"width"} < 0.2)
+ { &Error2 ("Plot width less than 20 pixels = 0.2 inch. Please adjust.\n") ; return ; }
+
+ if (@PlotArea {"height"} > @Image {"height"})
+ { &Error2 ("Plot height larger than image height. Please adjust.\n") ; return ; }
+
+ if (@PlotArea {"height"} < 0.2)
+ { &Error2 ("Plot height less than 20 pixels = 0.2 inch. Please adjust.\n") ; return ; }
+
+ if (@PlotArea {"left"} + @PlotArea {"width"} > @Image {"width"})
+ { &Error2 ("Plot width + margins larger than image width. Please adjust.\n") ; return ; }
+# @PlotArea {"left"} = @Image {"width"} - @PlotArea {"width"} ; }
+
+ if (@PlotArea {"left"} < 0)
+ { @PlotArea {"left"} = 0 ; }
+
+ if (@PlotArea {"bottom"} + @PlotArea {"height"} > @Image {"height"})
+ { &Error2 ("Plot height + margins larger than image height. Please adjust.\n") ; return ; }
+# @PlotArea {"bottom"} = @Image {"height"} - @PlotArea {"height"} ; }
+
+ if (@PlotArea {"bottom"} < 0)
+ { @PlotArea {"bottom"} = 0 ; }
+
+ if ((defined (@Scales {"Major"})) ||
+ (defined (@Scales {"Minor"})))
+ {
+ if (defined (@Scales {"Major"}))
+ { $margin = 0.2 ; }
+ else
+ { $margin = 0.05 ; }
+
+ if (@Axis {"time"} eq "x")
+ {
+ if (@PlotArea {"bottom"} < $margin)
+ { &Error2 ("Not enough space below plot area for plotting time axis\n" .
+ " Specify 'PlotArea = bottom:x', where x is at least " . (100 * $margin) . " pixels = $margin inch\n") ; return ; }
+ }
+ else
+ {
+ if (@PlotArea {"left"} < $margin)
+ { &Error2 ("Not enough space outside plot area for plotting time axis\n" .
+ " Specify 'PlotArea = left:x', where x is at least " . (100 * $margin) . " pixels = $margin inch\n") ; return ; }
+ }
+ }
+
+ if (defined (@Legend {"orientation"}))
+ {
+ if (defined (@Legend {"left"}))
+ { @Legend {"left"} = &Normalize (@Legend {"left"}, @Image {"width"}) ; }
+ if (defined (@Legend {"top"}))
+ { @Legend {"top"} = &Normalize (@Legend {"top"}, @Image {"height"}) ; }
+ if (defined (@Legend {"columnwidth"}))
+ { @Legend {"columnwidth"} = &Normalize (@Legend {"columnwidth"}, @Image {"width"}) ; }
+
+ if (! defined (@Legend {"columns"}))
+ {
+ @Legend {"columns"} = 1 ;
+ if ((@Legend {"orientation"} =~ /ver/i) &&
+ (@Legend {"position"} =~ /^(?:top|bottom)$/i))
+ {
+ if ($#LegendData > 10)
+ {
+ @Legend {"columns"} = 3 ;
+ &Info2 ("Legend attribute 'columns' not defined. 3 columns assumed.") ;
+ }
+ elsif ($#LegendData > 5)
+ {
+ @Legend {"columns"} = 2 ;
+ &Info2 ("Legend attribute 'columns' not defined. 2 columns assumed.") ;
+ }
+ }
+ }
+
+ if (@Legend {"position"} =~ /top/i)
+ {
+ if (! defined (@Legend {"left"}))
+ { @Legend {"left"} = @PlotArea {"left"} ; }
+ if (! defined (@Legend {"top"}))
+ { @Legend {"top"} = (@Image {"height"} - 0.2) ; }
+ if ((! defined (@Legend {"columnwidth"})) && (@Legend {"columns"} > 1))
+ { @Legend {"columnwidth"} = sprintf ("%02f", ((@PlotArea {"left"} + @PlotArea {"width"} - 0.2) / @Legend {"columns"})) ; }
+ }
+ elsif (@Legend {"position"} =~ /bottom/i)
+ {
+ if (! defined (@Legend {"left"}))
+ { @Legend {"left"} = @PlotArea {"left"} ; }
+ if (! defined (@Legend {"top"}))
+ { @Legend {"top"} = (@PlotArea {"bottom"} - 0.4) ; }
+ if ((! defined (@Legend {"columnwidth"})) && (@Legend {"columns"} > 1))
+ { @Legend {"columnwidth"} = sprintf ("%02f", ((@PlotArea {"left"} + @PlotArea {"width"} - 0.2) / @Legend {"columns"})) ; }
+ }
+ elsif (@Legend {"position"} =~ /right/i)
+ {
+ if (! defined (@Legend {"left"}))
+ { @Legend {"left"} = (@PlotArea {"left"} + @PlotArea {"width"} + 0.2) ; }
+ if (! defined (@Legend {"top"}))
+ { @Legend {"top"} = (@PlotArea {"bottom"} + @PlotArea {"height"} - 0.2) ; }
+ }
+ }
+
+ if (! defined (@Axis {"order"}))
+ { @Axis {"order"} = "normal" ; }
+}
+
+sub WriteProcAnnotate
+{
+ my $bar = shift ;
+ my $shiftx = shift ;
+ my $xpos = shift ;
+ my $ypos = shift ;
+ my $text = shift ;
+ my $textcolor = shift ;
+ my $fontsize = shift ;
+ my $align = shift ;
+ my $link = shift ;
+ my $hint = shift ;
+
+ if (length ($text) > 250)
+ { &Error ("Text segments can be up to 250 characters long. This segment is " . length ($text) . " chars.\n" .
+ " You can either shorten the text or\n" .
+ " - PlotData: insert line breaks (~)\n" .
+ " - TextData: insert tabs (~) to produce columns\n") ; return ; }
+
+ if ($textcolor eq "")
+ { $textcolor = "black" ; }
+
+ my $textdetails = " textdetails: align=$align size=$fontsize color=$textcolor" ;
+
+ push @PlotTextsPng, "#proc annotate\n" ;
+ push @PlotTextsSvg, "#proc annotate\n" ;
+
+ push @PlotTextsPng, " location: $xpos $ypos\n" ;
+ push @PlotTextsSvg, " location: $xpos $ypos\n" ;
+
+ push @PlotTextsPng, $textdetails . "\n" ;
+ push @PlotTextsSvg, $textdetails . "\n" ;
+
+ $text2 = $text ;
+ $text2 =~ s/\[\[//g ;
+ $text2 =~ s/\]\]//g ;
+ if ($text2 =~ /^\s/)
+ { push @PlotTextsPng, " text: \n\\$text2\n\n" ; }
+ else
+ { push @PlotTextsPng, " text: $text2\n\n" ; }
+
+ $text2 = $text ;
+ if ($link ne "")
+ {
+ # put placeholder in Ploticus input file
+ # will be replaced by real link after SVG generation
+ # this allows adding color info
+ push @linksSVG, &DecodeInput ($link) ;
+ my $lcnt = $#linksSVG ;
+ $text2 =~ s/\[\[ ([^\]]+) \]\]/\[$lcnt\[$1\]$lcnt\]/x ;
+ $text2 =~ s/\[\[ ([^\]]+) $/\[$lcnt\[$1\]$lcnt\]/x ;
+ $text2 =~ s/^ ([^\[]+) \]\]/\[$lcnt\[$1\]$lcnt\]/x ;
+ }
+
+ $text3 = &EncodeHtml ($text2) ;
+ if ($text2 ne $text3)
+ {
+ # put placeholder in Ploticus input file
+ # will be replaced by real text after SVG generation
+ # Ploticus would autoscale image improperly when text contains &#xxx; tags
+ # because this would count as 5 chars
+ push @textsSVG, &DecodeInput ($text3) ;
+ $text3 = "{{" . $#textsSVG . "}}" ;
+ while (length ($text3) < length ($text2)) { $text3 .= "x" ; }
+ }
+
+ if ($text3 =~ /^\s/)
+ { push @PlotTextsSvg, " text: \n\\$text3\n\n" ; }
+ else
+ { push @PlotTextsSvg, " text: $text3\n\n" ; }
+
+ if ($link ne "")
+ {
+ $MapPNG = $true ;
+
+ push @PlotTextsPng, "#proc annotate\n" ;
+ push @PlotTextsPng, " location: $xpos $ypos\n" ;
+
+# push @PlotTextsPng, " boxmargin: 0.01\n" ;
+
+ if ($align ne "right")
+ {
+ push @PlotTextsPng, " clickmapurl: $link\n" ;
+ if ($hint ne "")
+ { push @PlotTextsPng, " clickmaplabel: $hint\n" ; }
+ }
+ else
+ {
+ if ($bar eq "")
+ {
+ if ($WarnOnRightAlignedText ++ == 0)
+ { &Warning2 ("Links on right aligned texts are only supported for svg output,\npending Ploticus bug fix.") ; }
+ return ;
+ }
+ else
+ {
+ push @PlotTextsPng, " clickmapurl: $link\&\&$shiftx\n" ;
+ if ($hint ne "")
+ { push @PlotTextsPng, " clickmaplabel: $hint\n" ; }
+ }
+ }
+
+ $textdetails =~ s/color=[^\s]+/color=$LinkColor/ ;
+ push @PlotTextsPng, $textdetails . "\n" ;
+
+ $text = &DecodeInput ($text) ;
+ if ($text =~ /^[^\[]+\]\]/)
+ { $text = "[[" . $text ; }
+ if ($text =~ /\[\[[^\]]+$/)
+ { $text .= "]]" ; }
+ my $pos1 = index ($text, "[[") ;
+ my $pos2 = index ($text, "]]") + 1 ;
+ if (($pos1 > -1) && ($pos2 > -1))
+ {
+ for (my $i = 0 ; $i < length ($text) ; $i++)
+ {
+ $c = substr ($text, $i, 1) ;
+ if ($c ne "\n")
+ {
+ if (($i < $pos1) || ($i > $pos2))
+ { substr ($text, $i, 1) = " " ; }
+ }
+ }
+ }
+
+ $text =~ s/\[\[(.*?)\]\]/$1/s ;
+
+ if ($text =~ /^\s/)
+ { push @PlotTextsPng, " text: \n\\$text\n\n" ; }
+ else
+ { push @PlotTextsPng, " text: $text\n\n" ; }
+
+# push @PlotTextsPng, "#proc rect\n" ;
+# push @PlotTextsPng, " color: green\n" ;
+# push @PlotTextsPng, " rectangle: 1(s)+0.25 1937.500(s)+0.06 1(s)+0.50 1937.500(s)+0.058\n" ;
+# push @PlotTextsPng, "\n\n" ;
+ }
+}
+
+sub WriteText
+{
+ my $mode = shift ;
+ my $bar = shift ;
+ my $shiftx = shift ;
+ my $posx = shift ;
+ my $posy = shift ;
+ my $text = shift ;
+ my $textcolor = shift ;
+ my $fontsize = shift ;
+ my $align = shift ;
+ my $link = shift ;
+ my $hint = shift ;
+ my $tabs = shift ;
+ my ($link2, $hint2, $tab) ;
+ my $outside = $false ;
+ if (@Axis {"order"} =~ /reverse/i)
+ {
+ if (@Axis {"time"} eq "y")
+ { $posy =~ s/(.*)(\(s\))/(-$1).$2/xe ; }
+ else
+ { $posx =~ s/(.*)(\(s\))/(-$1).$2/xe ; }
+ }
+
+ if ($posx !~ /\(s\)/)
+ {
+ if ($posx < 0)
+ { $outside = $true ; }
+ if (@Image {"width"} !~ /auto/i)
+ {
+ if ($posx > @Image {"width"}/100)
+ { $outside = $true ; }
+ }
+ }
+ if ($posy !~ /\(s\)/)
+ {
+ if ($posy < 0)
+ { $outside = $true ; }
+ if (@Image {"height"} !~ /auto/i)
+ {
+ if ($posy > @Image {"height"}/100)
+ { $outside = $true ; }
+ }
+ }
+ if ($outside)
+ {
+ if ($WarnTextOutsideArea++ < 5)
+ { $text =~ s/\n/~/g ;
+ &Error ("Text segment '$text' falls outside image area. Text ignored.") ; }
+ return ;
+ }
+
+ my @Tabs = split (",", $tabs) ;
+ foreach $tab (@Tabs)
+ { $tab =~ s/\s* (.*) \s*$/$1/x ; }
+
+ $posx0 = $posx ;
+ my @Text ;
+ my $dy = 0 ;
+
+ if ($text =~ /\[\[.*\]\]/)
+ {
+ $link = "" ; $hint = "" ;
+ }
+
+ my @Text ;
+ if ($mode eq "^")
+ { @Text = split ('\^', $text) ; }
+ elsif ($mode eq "~")
+ {
+ @Text = split ('\n', $text) ;
+
+ if ($fontsize =~ /^(?:XS|S|M|L|XL)$/i)
+ {
+ if ($fontsize =~ /XS/i) { $dy = 0.09 ; }
+ elsif ($fontsize =~ /S/i) { $dy = 0.11 ; }
+ elsif ($fontsize =~ /M/i) { $dy = 0.135 ; }
+ elsif ($fontsize =~ /XL/i) { $dy = 0.21 ; }
+ else { $dy = 0.16 ; }
+ }
+ else
+ {
+ $dy = sprintf ("%.2f", (($fontsize * 1.2) / 100)) ;
+ if ($dy < $fontsize/100 + 0.02)
+ { $dy = $fontsize/100 + 0.02 ; }
+ }
+ }
+ else
+ { push @Text, $text ; }
+
+
+ foreach $text (@Text)
+ {
+ if ($text !~ /^[\n\s]*$/)
+ {
+ $link2 = "" ;
+ $hint2 = "" ;
+ ($text, $link2, $hint2) = &ProcessWikiLink ($text, $link2, $hint2) ;
+
+ if ($link2 eq "")
+ {
+ $link2 = $link ;
+ if (($link ne "") && ($text !~ /\[\[.*\]\]/))
+ { $text = "[[" . $text . "]]" ;}
+ }
+ if ($hint2 eq "")
+ { $hint2 = $hint ; }
+
+ &WriteProcAnnotate ($bar, $shiftx, $posx, $posy, $text, $textcolor, $fontsize, $align, $link2, $hint2) ;
+ }
+
+ if ($#Tabs >= 0)
+ {
+ $tab = shift (@Tabs) ;
+ ($dx,$align) = split ("\-", $tab) ;
+ $posx = $posx0 + &Normalize ($dx) ;
+ }
+ if ($posy =~ /\+/)
+ { ($posy1, $posy2) = split ('\+', $posy) ; }
+ elsif ($posy =~ /.+\-/)
+ {
+ if ($posy =~ /^\-/)
+ {
+ ($sign, $posy1, $posy2) = split ('\-', $posy) ; $posy2 = -$posy2 ;
+ $posy1 = "-" . $posy1 ;
+ }
+ else
+ { ($posy1, $posy2) = split ('\-', $posy) ; $posy2 = -$posy2 ; }
+ }
+ else
+ { $posy1 = $posy ; $posy2 = 0 ; }
+
+ $posy2 -= $dy ;
+
+ if ($posy2 == 0)
+ { $posy = $posy1 ; }
+ elsif ($posy2 < 0)
+ { $posy = $posy1 . "$posy2" ; }
+ else
+ { $posy = $posy1 . "+" . $posy2 ; }
+ }
+}
+
+sub WriteProcDrawCommandsOld
+{
+ my $posx = shift ;
+ my $posy = shift ;
+ my $text = shift ;
+ my $textcolor = shift ;
+ my $fontsize = shift ;
+ my $link = shift ;
+ my $hint = shift ;
+
+ $posx0 = $posx ;
+ my @Text = split ('\^', $text) ;
+ my $align = "text" ;
+ foreach $text (@Text)
+ {
+ push @TextData, " mov $posx $posy\n" ;
+ push @TextData, " textsize $fontsize\n" ;
+ push @TextData, " color $textcolor\n" ;
+ push @TextData, " $align $text\n" ;
+
+
+ $tab = shift (@Tabs) ;
+ ($dx,$align) = split ("\-", $tab) ;
+ $posx = $posx0 + &Normalize ($dx) ;
+ if ($align =~ /left/i) { $align = "text" ; }
+ elsif ($align =~ /right/i) { $align = "rightjust" ; }
+ else { $align = "centext" ; }
+ }
+}
+
+sub WritePlotFile
+{
+ &WriteTexts ;
+
+ $script = "" ;
+ my ($color) ;
+ if (@Axis {"time"} eq "x")
+ { $AxisBars = "y" ; }
+ else
+ { $AxisBars = "x" ; }
+
+# if ((@Axis {"time"} eq "y") && ($#Bars > 0))
+# {
+# undef @BarsTmp ;
+# while ($#Bars >= 0)
+# { push @BarsTmp, pop @Bars ; }
+# @Bars = @BarsTmp ;
+# }
+
+ if ($tmpdir ne "")
+ { $file_script = $tmpdir.$pathseparator."EasyTimeline.txt.$$" ; }
+ else
+ { $file_script = "EasyTimeline.txt" ; }
+
+ print "Ploticus input file = ".$file_script."\n";
+
+ # $fmt = "gif" ;
+ open "FILE_OUT", ">", $file_script ;
+
+ #proc settings
+# $script .= "#proc settings\n" ;
+# $script .= " xml_encoding: utf-8\n" ;
+# $script .= "\n" ;
+
+ # proc page
+ $script .= "#proc page\n" ;
+ $script .= " dopagebox: no\n" ;
+ $script .= " pagesize: ". @Image {"width"} . " ". @Image {"height"} . "\n" ;
+ if (defined (@BackgroundColors {"canvas"}))
+ { $script .= " backgroundcolor: " . @BackgroundColors {"canvas"} . "\n" ; }
+ $script .= "\n" ;
+
+ $barcnt = $#Bars + 1 ;
+
+# if ($AlignBars eq "justify") && ($#Bars > 0)
+#
+# given P = plotwidth in pixels
+# given B = half bar width in pixels
+# get U = plotwidth in units
+# get x = half bar width in units
+#
+# first bar plotted at unit 1
+# last bar plotted at unit c
+# let C = c - 1 (units between centers of lowest and highest bar) -> x = (U-C) / 2
+#
+# Justify: calculate range for axis in units:
+# axis starts at 1-x and ends at c+x =
+# x/B = U/P -> x = BU/P (1)
+# U = c+x - (1-x) = (c-1) + 2x -> x = (U-(c-1))/2 (2)
+#
+# (1) & (2) -> BU/P = (U-(c-1))/2
+# -> 2BU/P = U-(c-1)
+# -> 2BU/P = U - C
+# -> 2BU = PU - PC
+# -> U (2B-P) = -PC
+# -> U = -PC/(2B-P)
+# P = @PlotArea {$extent}
+# C = c - 1 = $#Bars
+# 2B = $MaxBarWidth
+ if (! defined ($AlignBars))
+ {
+ &Info2 ("AlignBars not defined. Alignment 'early' assumed.") ;
+ $AlignBars = "early" ;
+ }
+
+ if (@Axis {"time"} eq "x")
+ { $extent = "height" ; }
+ else
+ { $extent = "width" ; }
+
+ if ($MaxBarWidth > @PlotArea {$extent})
+ { &Error2 ("Maximum bar width exceeds plotarea " . $extent . ".") ; return ; }
+
+ if ($MaxBarWidth == @PlotArea {$extent})
+ { @PlotArea {$extent} += 0.01 ; }
+
+ if ($MaxBarWidth == @PlotArea {$extent})
+ {
+ $till = 1 ;
+ $from = 1 ;
+ }
+ else
+ {
+ if ($AlignBars eq "justify")
+ {
+ if ($#Bars > 0)
+ {
+ $U = - (@PlotArea {$extent} * $#Bars) / ($MaxBarWidth - @PlotArea {$extent}) ;
+ $x = ($U - $#Bars) / 2 ;
+ $from = 1 - $x ;
+ $till = 1 + $#Bars + $x ;
+ }
+ else # one bar-> "justify" is misnomer here, treat as "center"
+ {
+ # $x = ($MaxBarWidth /2) / @PlotArea {$extent} ;
+ # $from = 0.5 - $x ;
+ # $till = $from + 1 ;
+ $from = 0.5 ;
+ $till = 1.5 ;
+ }
+ }
+ elsif ($AlignBars eq "early")
+ {
+ $U = $#Bars + 1 ;
+ if ($U == 0)
+ { $U = 1 ; }
+ $x = (($MaxBarWidth /2) * $U) / @PlotArea {$extent} ;
+ $from = 1 - $x ;
+ $till = $from + $U ;
+ }
+ elsif ($AlignBars eq "late")
+ {
+ $U = $#Bars + 1 ;
+ $x = (($MaxBarWidth /2) * $U) / @PlotArea {$extent} ;
+ $till = $U + $x ;
+ $from = $till - $U ;
+ }
+ }
+
+# if ($#Bars == 0)
+# {
+# $from = 1 - $MaxBarWidth ;
+# $till = 1 + $MaxBarWidth ;
+# }
+ if ($from eq $till)
+ { $till = $from + 1 ; }
+
+ #proc areadef
+ $script .= "#proc areadef\n" ;
+ $script .= " rectangle: " . @PlotArea {"left"} . " " . @PlotArea {"bottom"} . " " .
+ sprintf ("%.2f", @PlotArea {"left"} + @PlotArea {"width"}). " " . sprintf ("%.2f", @PlotArea {"bottom"} + @PlotArea {"height"}) . "\n" ;
+ if (($DateFormat eq "yyyy") || ($DateFormat eq "x.y"))
+ { $script .= " " . @Axis {"time"} . "scaletype: linear\n" ; } # date yyyy
+ else
+ { $script .= " " . @Axis {"time"} . "scaletype: date $DateFormat\n" ; }
+
+ if (@Axis {"order"} !~ /reverse/i)
+ { $script .= " " . @Axis {"time"} . "range: " . @Period{"from"} . " " . @Period{"till"} . "\n" ; }
+ else
+ { $script .= " " . @Axis {"time"} . "range: " . (-@Period{"till"}) . " " . (-@Period{"from"}) . "\n" ; }
+
+ $script .= " " . $AxisBars . "scaletype: linear\n" ;
+ $script .= " " . $AxisBars . "range: " . sprintf ("%.3f", $from-0.001) . " " . sprintf ("%.3f", $till) . "\n" ;
+ $script .= " #saveas: A\n" ;
+ $script .= "\n" ;
+
+ #proc rect (test)
+# $script .= "#proc rect\n" ;
+# $script .= " rectangle 1.0 1.0 1.4 1.4\n" ;
+# $script .= " color gray(0.95)\n" ;
+# $script .= " clickmaplabel: Vladimir Ilyich Lenin\n" ;
+# $script .= " clickmapurl: http://www.wikipedia.org/wiki/Vladimir_Lenin\n" ;
+
+
+ #proc legendentry
+ foreach $color (sort keys %Colors)
+ {
+ $script .= "#proc legendentry\n" ;
+ $script .= " sampletype: color\n" ;
+
+ if ((defined (@ColorLabels {$color})) && (@ColorLabels {$color} ne ""))
+ { $script .= " label: " . @ColorLabels {$color} . "\n" ; }
+ $script .= " details: " . @Colors {$color} . "\n" ;
+ $script .= " tag: $color\n" ;
+ $script .= "\n" ;
+ }
+
+ if (defined (@BackgroundColors {"bars"}))
+ {
+ #proc getdata / #proc bars
+ $script .= "#proc getdata\n" ;
+ $script .= " delim: comma\n" ;
+ $script .= " data:\n" ;
+
+ $maxwidth = 0 ;
+ foreach $entry (@PlotBars)
+ {
+ ($width) = split (",", $entry) ;
+ if ($width > $maxwidth)
+ { $maxwidth = $width ; }
+ }
+
+ for ($b = 0 ; $b <= $#Bars ; $b++)
+ { $script .= ($b+1) . "," . @Period {"from"} . "," . @Period {"till"} . ",".
+ @BackgroundColors {"bars"} . "\n" ; }
+ $script .= "\n" ;
+
+ #proc bars
+ $script .= "#proc bars\n" ;
+ $script .= " axis: " . @Axis {"time"} . "\n" ;
+ $script .= " barwidth: $maxwidth\n" ;
+ $script .= " outline: no\n" ;
+ if (@Axis {"time"} eq "x")
+ { $script .= " horizontalbars: yes\n" ; }
+ $script .= " locfield: 1\n" ;
+ $script .= " segmentfields: 2 3\n" ;
+ $script .= " colorfield: 4\n" ;
+
+# $script .= " clickmaplabel: Vladimir Ilyich Lenin\n" ;
+# $script .= " clickmapurl: http://www.wikipedia.org/wiki/Vladimir_Lenin\n" ;
+
+ $script .= "\n" ;
+ }
+
+ #proc axis
+ if (defined (@Scales {"Minor grid"}))
+ { &PlotScale ("Minor", $true) ; }
+ if (defined (@Scales {"Major grid"}))
+ { &PlotScale ("Major", $true) ; }
+
+ &PlotLines ("back") ;
+
+ @PlotBarsNow = @PlotBars ;
+ &PlotBars ;
+
+ $script .= "\n([inc3])\n\n" ; # will be replace by rects
+
+%x = %BarWidths ;
+ foreach $entry (@PlotLines)
+ {
+ ($bar) = split (",", $entry) ;
+ $bar =~ s/\#.*// ;
+ $width = @BarWidths {$bar} ;
+ $entry = sprintf ("%6.3f",$width) . "," . $entry ;
+ }
+
+ @PlotBarsNow = @PlotLines ;
+ &PlotBars ;
+
+ #proc axis
+ if ($#Bars > 0)
+ {
+ $scriptPng2 = "#proc " . $AxisBars . "axis\n" ;
+ $scriptSvg2 = "#proc " . $AxisBars . "axis\n" ;
+ if ($AxisBars eq "x")
+ {
+ $scriptPng2 .= " stubdetails: adjust=0,0.09\n" ;
+ $scriptSvg2 .= " stubdetails: adjust=0,0.09\n" ;
+ }
+ else
+ {
+ $scriptPng2 .= " stubdetails: adjust=0.09,0\n" ;
+ $scriptSvg2 .= " stubdetails: adjust=0.09,0\n" ;
+ }
+ $scriptPng2 .= " tics: none\n" ;
+ $scriptSvg2 .= " tics: none\n" ;
+ $scriptPng2 .= " stubrange: 1\n" ;
+ $scriptSvg2 .= " stubrange: 1\n" ;
+ if ($AxisBars eq "y")
+ {
+ $scriptPng2 .= " stubslide: -" . sprintf ("%.2f", $MaxBarWidth / 2) . "\n" ;
+ $scriptSvg2 .= " stubslide: -" . sprintf ("%.2f", $MaxBarWidth / 2) . "\n" ;
+ }
+ $scriptPng2 .= " stubs: text\n" ;
+ $scriptSvg2 .= " stubs: text\n" ;
+
+ my ($text, $link, $hint) ;
+
+ undef (@Bars2) ;
+ foreach $bar (@Bars)
+ {
+ if ($AxisBars eq "y")
+ { push @Bars2, $bar ; }
+ else
+ { unshift @Bars2, $bar ; }
+ }
+
+ foreach $bar (@Bars2)
+ {
+ $hint = "" ;
+ $text = @BarLegend {lc ($bar)} ;
+ if ($text =~ /^\s*$/)
+ { $text = "\\" ; }
+
+ $link = @BarLink {lc ($bar)} ;
+ if (! defined ($link))
+ {
+ if ($text =~ /\[.*\]/)
+ { ($text, $link, $hint) = &ProcessWikiLink ($text, $link, $hint) ; }
+ }
+
+ $text =~ s/\[+([^\]]*)\]+/$1/ ;
+ $scriptPng2 .= "$text\n" ;
+ if (defined ($link))
+ {
+ push @linksSVG, $link ;
+ my $lcnt = $#linksSVG ;
+ $scriptSvg2 .= "[" . $lcnt . "[" . $text . "]" . $lcnt . "]\n" ;
+ }
+ else
+ { $scriptSvg2 .= "$text\n" ; }
+ }
+ $scriptPng2 .= "\n" ;
+ $scriptSvg2 .= "\n" ;
+
+ $scriptPng2 .= "#proc " . $AxisBars . "axis\n" ;
+ if ($AxisBars eq "x")
+ { $scriptPng2 .= " stubdetails: adjust=0,0.09 color=$LinkColor\n" ; }
+ else
+ { $scriptPng2 .= " stubdetails: adjust=0.09,0 color=$LinkColor\n" ; }
+ $scriptPng2 .= " tics: none\n" ;
+ $scriptPng2 .= " stubrange: 1\n" ;
+ if ($AxisBars eq "y")
+ { $scriptPng2 .= " stubslide: -" . sprintf ("%.2f", $MaxBarWidth / 2) . "\n" ; }
+ $scriptPng2 .= " stubs: text\n" ;
+
+ $barcnt = $#Bars + 1 ;
+ foreach $bar (@Bars2)
+ {
+ $hint = "" ;
+ $text = @BarLegend {lc ($bar)} ;
+ if ($text =~ /^\s*$/)
+ { $text = "\\" ; }
+
+ $link = @BarLink {lc ($bar)} ;
+ if (! defined ($link))
+ {
+ if ($text =~ /\[.*\]/)
+ { ($text, $link, $hint) = &ProcessWikiLink ($text, $link, $hint) ; }
+ }
+ if ((! defined ($link)) || ($link eq ""))
+ { $text = "\\" ; }
+ else
+ {
+ $scriptPng3 .= "#proc rect\n" ;
+ $scriptPng3 .= " rectangle: 0 $barcnt(s)+0.05 " . @PlotArea {"left"} . " $barcnt(s)-0.05\n" ;
+ $scriptPng3 .= " color: " . @BackgroundColors {"canvas"} . "\n" ;
+ $scriptPng3 .= " clickmapurl: " . $link . "\n" ;
+ if ((defined ($hint)) && ($hint ne ""))
+ { $scriptPng3 .= " clickmaplabel: " . $hint . "\n" ; }
+
+ $text =~ s/\[+([^\]]*)\]+/$1/ ;
+ }
+ $scriptPng2 .= "$text\n" ;
+
+ $barcnt-- ;
+ }
+ $scriptPng2 .= "\n" ;
+ }
+
+ &PlotLines ("front") ;
+
+ $script .= "\n([inc1])\n\n" ; # will be replaced by annotations
+ $script .= "\n([inc2])\n\n" ;
+
+
+ if ($#PlotTextsPng >= 0)
+ {
+ foreach $command (@PlotTextsPng)
+ {
+ if ($command =~ /^\s*location/)
+ { $command =~ s/(.*)\[(.*)\](.*)/$1 . ($#Bars - $2 + 2) . $3/xe ; }
+
+ $scriptPng1 .= $command ;
+ }
+ $scriptPng1 .= "\n" ;
+ }
+
+ if ($#PlotTextsSvg >= 0)
+ {
+ foreach $command (@PlotTextsSvg)
+ {
+ if ($command =~ /^\s*location/)
+ { $command =~ s/(.*)\[(.*)\](.*)/$1 . ($#Bars - $2 + 2) . $3/xe ; }
+
+ $scriptSvg1 .= $command ;
+ }
+ $scriptSvg1 .= "\n" ;
+ }
+
+# $script .= "#proc symbol\n" ;
+# $script .= " location: 01/01/1943(s) Korea \n" ;
+# $script .= " symbol: style=fill shape=downtriangle fillcolor=white radius=0.04\n" ;
+# $script .= "\n" ;
+
+ #proc axis
+ # repeat without grid to get axis on top of bar
+ # needed because axis may overlap bar slightly
+ if (defined (@Scales {"Minor"}))
+ { &PlotScale ("Minor", $false) ; }
+ if (defined (@Scales {"Major"}))
+ { &PlotScale ("Major", $false) ; }
+
+ #proc drawcommands
+ if ($#TextData >= 0)
+ {
+ $script .= "#proc drawcommands\n" ;
+ $script .= " commands:\n" ;
+ foreach $entry (@TextData)
+ { $script .= $entry ; }
+ $script .= "\n" ;
+ }
+
+ #proc legend
+ if (defined (@Legend {"orientation"}))
+ {
+ if (($#LegendData < 0) && ($Preset eq ""))
+ { &Error2 ("Command 'Legend' found, but no entries for the legend were specified.\n" .
+ " Please remove or disable command (disable = put \# before the command)\n" .
+ " or specify entries for the legend with command 'Colors', attribute 'legend'\n") ;
+ return ; }
+
+ $perColumn = 999 ;
+ if (@Legend {"orientation"} =~ /ver/i)
+ {
+ if (@Legend {"columns"} > 1)
+ {
+ $perColumn = 0 ;
+ while ((@Legend {"columns"} * $perColumn) < $#LegendData + 1)
+ { $perColumn ++ ; }
+ }
+ }
+
+ for ($l = 1 ; $l <= @Legend {"columns"} ; $l++)
+ {
+ $script .= "#proc legend\n" ;
+ $script .= " noclear: yes\n" ;
+ if (@Legend {"orientation"} =~ /ver/i)
+ { $script .= " format: multiline\n" ; }
+ else
+ { $script .= " format: singleline\n" ; }
+ $script .= " seglen: 0.2\n" ;
+ $script .= " swatchsize: 0.12\n" ;
+ $script .= " textdetails: size=S\n" ;
+ $script .= " location: " . (@Legend{"left"}+0.2) . " " . @Legend{"top"} . "\n" ;
+ $script .= " specifyorder:\n" ;
+ for ($l2 = 1 ; $l2 <= $perColumn ; $l2++)
+ {
+ $category = shift (@LegendData) ;
+ if (defined ($category))
+ { $script .= "$category\n" ; }
+ }
+ $script .= "\n" ;
+ @Legend {"left"} += @Legend {"columnwidth"} ;
+ }
+ }
+
+ $script .= "#endproc\n" ;
+
+ print "\nGenerating output:\n" ;
+ if ( $plcommand ne "" )
+ { $pl = $plcommand; }
+ else
+ {
+ $pl = "pl.exe" ;
+ if ($env eq "Linux")
+ { $pl = "pl" ; }
+ }
+
+ print "Using ploticus command \"".$pl."\" (".$plcommand.")\n";
+
+ $script_save = $script ;
+
+ $script =~ s/\(\[inc1\]\)/$scriptSvg1/ ;
+ $script =~ s/\(\[inc2\]\)/$scriptSvg2/ ;
+ $script =~ s/\(\[inc3\]\)// ;
+
+ $script =~ s/textsize XS/textsize 7/gi ;
+ $script =~ s/textsize S/textsize 8.9/gi ;
+
+ $script =~ s/textsize M/textsize 10.5/gi ;
+ $script =~ s/textsize L/textsize 13/gi ;
+ $script =~ s/textsize XL/textsize 17/gi ;
+ $script =~ s/size=XS/size=7/gi ;
+ $script =~ s/size=S/size=8.9/gi ;
+ $script =~ s/size=M/size=10.5/gi ;
+ $script =~ s/size=L/size=13/gi ;
+ $script =~ s/size=XL/size=17/gi ;
+
+
+ $script =~ s/(\n location:.*)/&ShiftOnePixelForSVG($1)/ge ;
+
+ open "FILE_OUT", ">", $file_script ;
+ print FILE_OUT &DecodeInput($script) ;
+ close "FILE_OUT" ;
+
+ $map = ($MapSVG) ? "-map" : "";
+
+ print "Running Ploticus to generate svg file\n" ;
+# my $cmd = "$pl $map -" . "svg" . " -o $file_vector $file_script -tightcrop -font \"Times\"" ;
+# my $cmd = "$pl $map -" . "svg" . " -o $file_vector $file_script -tightcrop" ;
+ my $cmd = EscapeShellArg($pl) . " $map -" . "svg" . " -o " .
+ EscapeShellArg($file_vector) . " " . EscapeShellArg($file_script) . " -tightcrop" ;
+ print "$cmd\n";
+ system ($cmd) ;
+
+ $script = $script_save ;
+ $script =~ s/dopagebox: no/dopagebox: yes/ ;
+
+ $script =~ s/\(\[inc1\]\)/$scriptPng1/ ;
+ $script =~ s/\(\[inc2\]\)/$scriptPng2/ ;
+ $script =~ s/\(\[inc3\]\)/$scriptPng3/ ;
+
+ $script =~ s/textsize XS/textsize 6/gi ;
+ $script =~ s/textsize S/textsize 8/gi ;
+ $script =~ s/textsize M/textsize 10/gi ;
+ $script =~ s/textsize L/textsize 14/gi ;
+ $script =~ s/textsize XL/textsize 18/gi ;
+ $script =~ s/size=XS/size=6/gi ;
+ $script =~ s/size=S/size=8/gi ;
+ $script =~ s/size=M/size=10/gi ;
+ $script =~ s/size=L/size=14/gi ;
+ $script =~ s/size=XL/size=18/gi ;
+
+ open "FILE_OUT", ">", $file_script ;
+ print FILE_OUT &DecodeInput($script) ;
+ close "FILE_OUT" ;
+
+ $map = ($MapPNG && $linkmap) ? "-csmap" : "";
+ if ($linkmap && $showmap)
+ { $map .= " -csmapdemo" ; }
+
+# $crop = "-crop 0,0," + @ImageSize {"width"} . "," . @ImageSize {"height"} ;
+ print "Running Ploticus to generate bitmap\n" ;
+# $cmd = "$pl $map -" . $fmt . " -o $file_bitmap $file_script -tightcrop" ; # -v $file_bitmap" ;
+# $cmd = "$pl $map -" . $fmt . " -o $file_bitmap $file_script -tightcrop -diagfile $file_pl_info -errfile $file_pl_err" ;
+ $cmd = EscapeShellArg($pl) . " $map -" . $fmt . " -o " .
+ EscapeShellArg($file_bitmap) . " " . EscapeShellArg($file_script) . " -tightcrop" .
+ " -mapfile " . EscapeShellArg($file_htmlmap) ;
+ print "$cmd\n";
+ system ($cmd) ;
+
+ if ((-e $file_bitmap) && (-s $file_bitmap > 500 * 1024))
+ {
+ &Error2 ("Output image size exceeds 500 K. Image deleted.\n" .
+ "Run with option -b (bypass checks) when this is correct.\n") ;
+ unlink $file_bitmap ;
+ } ;
+
+ # not for Wikipedia, only for offline use:
+ if ((-e $file_bitmap) && ($fmt eq "gif"))
+ {
+ print "Running nconvert to convert gif image to png format\n\n" ;
+ print "---------------------------------------------------------------------------\n" ;
+ $cmd = "nconvert.exe -out png " . EscapeShellArg($file_bitmap) ;
+ system ($cmd) ;
+ print "---------------------------------------------------------------------------\n" ;
+
+ if (! (-e $file_png))
+ { print "PNG file not created (is nconvert.exe missing?)\n\n" ; }
+ }
+
+ if (-e $file_htmlmap) # correct click coordinates of right aligned texts (Ploticus bug)
+ {
+ open "FILE_IN", "<", $file_htmlmap ;
+ @map = <FILE_IN> ;
+ close "FILE_IN" ;
+
+ foreach $line (@map)
+ {
+ chomp $line ;
+ if ($line =~ /\&\&/)
+ {
+ $coords = $line ;
+ $shift = $line ;
+ $coords =~ s/^.*coords\=\"([^\"]*)\".*$/$1/ ;
+ $shift =~ s/^.*\&\&([^\"]*)\".*$/$1/ ;
+ $line =~ s/\&\&[^\"]*// ;
+ (@updcoords) = split (",", $coords) ;
+ $maplength = @updcoords [2] - @updcoords [0] ;
+ @updcoords [0] = @updcoords [0] - 2 * ($maplength-25) ;
+ @updcoords [2] = @updcoords [0] + $maplength ;
+ $coordsnew = join (",", @updcoords) ;
+ $line =~ s/$coords/$coordsnew/ ;
+ push @map2, $line . "\n" ;
+ }
+ else
+ { push @map2, $line . "\n" ; }
+ }
+
+ open "FILE_OUT", ">", $file_htmlmap ;
+ print FILE_OUT @map2 ;
+ close "FILE_OUT" ;
+ }
+
+ if (-e $file_vector)
+ {
+ open "FILE_IN", "<", $file_vector ;
+ @svg = <FILE_IN> ;
+ close "FILE_IN" ;
+
+ foreach $line (@svg)
+ {
+ $line =~ s/\{\{(\d+)\}\}x+/@textsSVG[$1]/gxe ;
+ $line =~ s/\[(\d+)\[ (.*?) \]\d+\]/'<a style="fill:blue;" xlink:href="' . @linksSVG[$1] . '">' . $2 . '<\/a>'/gxe ;
+ }
+
+ open "FILE_OUT", ">", $file_vector ;
+ print FILE_OUT @svg ;
+ close "FILE_OUT" ;
+ }
+
+ # not for Wikipedia, for offline use:
+ if ($makehtml)
+ {
+ $map = "" ;
+ if ($linkmap)
+ {
+ open "FILE_IN", "<", $file_htmlmap ;
+ while ($line = <FILE_IN>)
+ { $map .= $line ; }
+ close "FILE_IN" ;
+ }
+ print "Generating html test file\n" ;
+ $width = sprintf ("%.0f", @Image {"width"} * 100) ;
+ $height = sprintf ("%.0f", @Image {"height"} * 100) ;
+ $html = <<__HTML__ ;
+
+<html>
+<head>
+<title>%FILENAME% - EasyTimeline test file</title>\n
+</head>
+
+<body>
+<h1><font color="green">EasyTimeline</font> - Test Page</h1>
+
+<b>Fixed size version (PNG): file $file_png</b><p>
+<map name="map1">
+$map</map>
+
+<!--
+If you want a border simplest way is set <img .. border='1'>
+Here tables are used to draw similar borders around both images (border='1' seems not to work for embed tag)
+-->
+
+<table border='1' cellpadding='0' cellspacing='0'><tr><td>
+<img src=$file_png usemap='#map1' border='0'>
+</td></tr></table>
+
+<hr>
+<b>Scalable version (SVG): file $file_vector</b><p>
+<table border='1' cellpadding='0' cellspacing='0'><tr><td>
+<noembed>Your browser does not support embedded objects</noembed>
+<embed src='$file_vector' name='SVGEmbed' border='1'
+width='$width' height='$height' type='image/svg-xml' pluginspage='http://www.adobe.com/svg/viewer/install/'>
+</td></tr></table>
+
+<p>As you can see the scalable version renders fonts smoother better than the bitmap version.
+<br>Any SVG picture can also be rescaled or zoomed into, without annoying artefacts.
+
+<p>Windows users:<br>
+<small>&nbsp;&nbsp;Right mouse click on picture for zoom options or</small>
+<p><small>&nbsp;&nbsp;Ctrl+click for zoom in</small>
+<br><small>&nbsp;&nbsp;Ctrl+Shift+click for zoom out</small>
+<br><small>&nbsp;&nbsp;Alt+drag with mouse to move focus</small>
+
+</body>
+</html>
+
+__HTML__
+
+ $html =~ s/\%FILENAME\%/$file_name/ ;
+
+ open "FILE_OUT", ">", $file_html ;
+ print FILE_OUT $html ;
+ close "FILE_OUT" ;
+ }
+# my $cmd = "\"c:\\\\Program Files\\\\XnView\\\\xnview.exe\"" ;
+# system ("\"c:\\\\Program Files\\\\XnView\\\\xnview.exe\"", "d:\\\\Wikipedia\\Perl\\\\Wo2\\\\Test.png") ;
+}
+
+sub WriteTexts
+{
+ my ($line, $xpos, $ypos) ;
+ foreach $line (@PlotText)
+ {
+ my ($at, $bar, $text, $textcolor, $fontsize, $align, $shift, $link, $hint) = split (",", $line) ;
+ $text =~ s/\#\%\$/\,/g ;
+ $link =~ s/\#\%\$/\,/g ;
+ $hint =~ s/\#\%\$/\,/g ;
+ $shift =~ s/\#\%\$/\,/g ;
+ $textcolor =~ s/\#\%\$/\,/g ;
+
+ my $barcnt = 0 ;
+ for ($b = 0 ; $b <= $#Bars ; $b++)
+ {
+ if (lc(@Bars [$b]) eq lc($bar))
+ { $barcnt = ($b + 1) ; last ; }
+ }
+
+ if (@Axis {"time"} eq "x")
+ { $xpos = "$at(s)" ; $ypos = "[$barcnt](s)" ; }
+ else
+ { $ypos = "$at(s)" ; $xpos = "[$barcnt](s)" ; }
+
+ if ($shift ne "")
+ {
+ my ($shiftx, $shifty) = split (",", $shift) ;
+ if ($shiftx > 0)
+ { $xpos .= "+$shiftx" ; }
+ if ($shiftx < 0)
+ { $xpos .= "$shiftx" ; }
+ if ($shifty > 0)
+ { $ypos .= "+$shifty" ; }
+ if ($shifty < 0)
+ { $ypos .= "$shifty" ; }
+ }
+
+ &WriteText ("~", $bar, $shiftx, $xpos, $ypos, $text, $textcolor, $fontsize, $align, $link, $hint) ;
+ }
+}
+
+sub PlotBars
+{
+ #proc getdata / #proc bars
+ while ($#PlotBarsNow >= 0)
+ {
+ undef @PlotBarsLater ;
+
+ $maxwidth = 0 ;
+ foreach $entry (@PlotBarsNow)
+ {
+ ($width) = split (",", $entry) ;
+ if ($width > $maxwidth)
+ { $maxwidth = $width ; }
+ }
+
+ $script .= "#proc getdata\n" ;
+ $script .= " delim: comma\n" ;
+ $script .= " data:\n" ;
+
+ foreach $entry (@PlotBarsNow)
+ {
+ my ($width, $bar, $from, $till, $color, $link, $hint) = split (",", $entry) ;
+ if ($width < $maxwidth)
+ {
+ push @PlotBarsLater, $entry ;
+ next ;
+ }
+ for ($b = 0 ; $b <= $#Bars ; $b++)
+ {
+ if (lc(@Bars [$b]) eq lc($bar))
+ { $bar = ($#Bars - ($b - 1)) ; last ; }
+ }
+ if (@Axis {"order"} !~ /reverse/i)
+ { $entry = "$bar,$from,$till,$color,$link,$hint,\n" ; }
+ else
+ { $entry = "$bar," . (-$till) . "," . (-$from) . ",$color,$link,$hint,\n" ; }
+
+ $script .= "$entry" ;
+ }
+ $script .= "\n" ;
+
+ #proc bars
+ $script .= "#proc bars\n" ;
+ $script .= " axis: " . @Axis {"time"} . "\n" ;
+ $script .= " barwidth: $maxwidth\n" ;
+ $script .= " outline: no\n" ;
+# $script .= " thinbarline: width=5\n" ;
+ if (@Axis {"time"} eq "x")
+ { $script .= " horizontalbars: yes\n" ; }
+ $script .= " locfield: 1\n" ;
+ $script .= " segmentfields: 2 3\n" ;
+ $script .= " colorfield: 4\n" ;
+# $script .= " outline: width=1\n" ;
+# $script .= " barwidthfield: 5\n" ;
+# if (@fields [4] ne "")
+# { $script .= " clickmapurl: " . &LinkToUrl ($text) . "\n" ; }
+# if (@fields [5] ne "")
+# { $script .= " clickmaplabel: $text\n" ; }
+ $script .= " clickmapurl: \@\@5\n" ;
+ $script .= " clickmaplabel: \@\@6\n" ;
+ $script .= "\n" ;
+
+ @PlotBarsNow = @PlotBarsLater ;
+ }
+}
+
+sub PlotScale
+{
+ my $scale = shift ;
+ my $grid = shift ;
+ my ($color, $from, $till, $start) ;
+
+ %x = %Period ;
+# if (($DateFormat =~ /\//) && ($grid))
+# { return ; }
+
+# if (($DateFormat =~ /\//)
+# {
+# }
+
+# if (! $grid) # redefine area, scale linear for time axis, showl whole years always, Ploticus bug
+# {
+ # $from = @Period {"from"} ;
+ # $till = @Period {"till"} ;
+ $from = &DateToFloat (@Period {"from"}) ;
+ $till = &DateToFloat (@Period {"till"}) ;
+ # $from =~ s/.*\///g ; # delete dd mm if present
+ # $till =~ s/.*\///g ;
+ #proc areadef
+ $script .= "#proc areadef\n" ;
+ $script .= " #clone: A\n" ;
+ $script .= " " . @Axis {"time"} . "scaletype: linear\n" ; # date yyyy
+
+ if (@Axis {"order"} !~ /reverse/i)
+ { $script .= " " . @Axis {"time"} . "range: $from $till\n" ; }
+ else
+ { $script .= " " . @Axis {"time"} . "range: " . (-$till) . " " . (-$from) . "\n" ; }
+
+ $script .= "\n" ;
+# }
+
+ $script .= "#proc " . @Axis {"time"} . "axis\n" ;
+
+ if (($scale eq "Major") && (! $grid))
+ {
+# $script .= " stubs: incremental " . @Scales {"Major inc"} . " " . @Scales {"Major unit"} . "\n" ;
+# if ($DateFormat =~ /\//)
+# { $script .= " stubformat: " . @Axis {"format"} . "\n" ; }
+# temp always show whole years (Ploticus autorange bug)
+ if (@Scales {"Major stubs"} eq "") # ($DateFormat !~ /\//)
+ { $script .= " stubs: incremental " . @Scales {"Major inc"} . "\n" ; }
+ else
+ { $script .= " stubs: list " . @Scales {"Major stubs"} . "\n" ; }
+ }
+ else
+ { $script .= " stubs: none\n" ; }
+
+ if ($DateFormat !~ /\//)
+# { $script .= " ticincrement: " . @Scales {"$scale inc"} . " " . @Scales {"$scale unit"} . "\n" ; }
+ { $script .= " ticincrement: " . @Scales {"$scale inc"} . "\n" ; }
+ else
+ {
+ my $unit = 1 ;
+ if (@Scales {"$scale unit"} =~ /month/i)
+ { $unit = 1/12 ; }
+ if (@Scales {"$scale unit"} =~ /day/i)
+ { $unit = 1/365 ; }
+ $script .= " ticincrement: " . @Scales {"$scale inc"} . " $unit\n" ;
+ }
+
+ if (defined (@Scales {"$scale start"}))
+ {
+ $start = @Scales {"$scale start"} ;
+ # $start =~ s/.*\///g ; # delete dd mm if present
+ $start = &DateToFloat ($start) ;
+ if (@Axis {"order"} =~ /reverse/i)
+ {
+ $loop = 0 ;
+ $start = -$start ;
+ while ($start - @Scales {"$scale inc"} >= - @Period {"till"})
+ {
+ $start -= @Scales {"$scale inc"} ;
+ if (++$loop > 1000) { last ; } # precaution
+ }
+ }
+ $script .= " stubrange: $start\n" ;
+ }
+
+ if ($scale eq "Major")
+ {
+ $script .= " ticlen: 0.05\n" ;
+ if (@Axis {"time"} eq "y")
+ { $script .= " stubdetails: adjust=0.05,0\n" ; }
+ if (@Axis {"order"} =~ /reverse/i)
+ { $script .= " signreverse: yes\n" ; }
+ }
+ else
+ { $script .= " ticlen: 0.02\n" ; }
+# $script .= " location: 4\n" ; test
+
+ $color .= @Scales {"$scale grid"} ;
+
+ if (defined (@Colors {$color}))
+ { $color = @Colors {$color} ; }
+
+ if ($grid)
+ { $script .= " grid: color=$color\n" ; }
+
+ $script .= "\n" ;
+
+ if ($grid) # restore areadef
+ {
+ #proc areadef
+ $script .= "#proc areadef\n" ;
+ $script .= " #clone: A\n" ;
+ $script .= "\n" ;
+ }
+}
+
+sub PlotLines
+{
+ my $layer = shift ;
+
+ if ($#DrawLines < 0)
+ { return ; }
+
+ undef (@DrawLinesNow) ;
+
+ foreach $line (@DrawLines)
+ {
+ if ($line =~ /\|$layer\n/)
+ { push @DrawLinesNow, $line ; }
+ }
+
+ if ($#DrawLinesNow < 0)
+ { return ; }
+
+ foreach $entry (@DrawLinesNow)
+ {
+ chomp ($entry) ;
+ $script .= "#proc line\n" ;
+# $script .= " notation: scaled\n" ;
+ if ($entry =~ /^[12]/)
+ { ($mode, $at, $from, $till, $color, $width) = split ('\|', $entry) ; }
+ else
+ { ($mode, $points, $color, $width) = split ('\|', $entry) ; }
+
+ $script .= " linedetails: width=$width color=$color style=0\n" ;
+
+ if ($mode == 1) # draw perpendicular to time axis
+ {
+ if (@Axis {"order"} =~ /reverse/i)
+ { $at = -$at ; }
+
+ if (@Axis {"time"} eq "x")
+ {
+ if ($from eq "")
+ { $from = @PlotArea {"bottom"} }
+ if ($till eq "")
+ { $till = @PlotArea {"bottom"} + @PlotArea {"height"} }
+ $from += ($width/200) ; # compensate for overstrechting of thick lines
+ $till -= ($width/200) ;
+ if ($from > @Image {"height"})
+ { $from = @Image {"height"} ; }
+ if ($till > @Image {"height"})
+ { $till = @Image {"height"} ; }
+ $script .= " points: $at(s) $from $at(s) $till\n" ;
+ }
+ else
+ {
+ if ($from eq "")
+ { $from = @PlotArea {"left"} }
+ if ($till eq "")
+ { $till = @PlotArea {"left"} + @PlotArea {"width"} }
+ $from += ($width/200) ;
+ $till -= ($width/200) ;
+ if ($from > @Image {"width"})
+ { $from = @Image {"width"} ; }
+ if ($till > @Image {"width"})
+ { $till = @Image {"width"} ; }
+ $script .= " points: $from $at(s) $till $at(s)\n" ;
+ }
+ }
+
+ if ($mode == 2) # draw parralel to time axis
+ {
+ if (@Axis {"order"} =~ /reverse/i)
+ {
+ $from = -$from ;
+ $till = -$till ;
+ }
+
+ $from .= "(s)+" .($width/200) ;
+ $till .= "(s)-" .($width/200) ;
+ if (@Axis {"time"} eq "x")
+ {
+ if ($at eq "")
+ { $at = @PlotArea {"bottom"} ; }
+ if ($at > @Image {"height"})
+ { $at = @Image {"height"} ; }
+ $script .= " points: $from $at $till $at\n" ;
+ }
+ else
+ {
+ if ($at eq "")
+ { $at = @PlotArea {"left"} ; }
+ if ($at > @Image {"width"})
+ { $at = @Image {"width"} ; }
+ $script .= " points: $at $from $at $till\n" ;
+ }
+ }
+
+ if ($mode == 3) # draw free line
+ {
+ @Points = split (",", $points) ;
+ foreach $point (@Points)
+ { $point = &Normalize ($point) ; }
+ if ((@Points [0] > @Image {"width"}) ||
+ (@Points [1] > @Image {"height"}) ||
+ (@Points [2] > @Image {"width"}) ||
+ (@Points [3] > @Image {"height"}))
+ { &Error2 ("Linedata attribute 'points' invalid.\n" .
+ sprintf ("(%d,%d)(%d,%d)", @Points[0]*100, @Points[1]*100, @Points[2]*100, @Points[3]*100) . " does not fit in image\n") ;
+ return ; }
+ $script .= " points: @Points[0] @Points[1] @Points[2] @Points[3]\n" ;
+ }
+ }
+
+
+ $script .= "\n" ;
+}
+
+sub ColorPredefined
+{
+ my $color = shift ;
+ if ($color =~ /^(?:black|white|tan1|tan2|red|magenta|claret|coral|pink|orange|
+ redorange|lightorange|yellow|yellow2|dullyellow|yelloworange|
+ brightgreen|green|kelleygreen|teal|drabgreen|yellowgreen|
+ limegreen|brightblue|darkblue|blue|oceanblue|skyblue|
+ purple|lavender|lightpurple|powderblue|powderblue2)$/xi)
+ {
+ if (! defined (@Colors {lc ($color)}))
+ { &StoreColor ($color, $color, "", $command) ; }
+ return ($true) ;
+ }
+ else
+ { return ($false) ; }
+}
+
+sub ValidAbs
+{
+ $value = shift ;
+ if ($value =~ /^ \d+ \.? \d* (?:px|in|cm)? $/xi)
+ { return ($true) ; }
+ else
+ { return ($false) ; }
+}
+
+sub ValidAbsRel
+{
+ $value = shift ;
+ if ($value =~ /^ \d+ \.? \d* (?:px|in|cm|$hPerc)? $/xi)
+ { return ($true) ; }
+ else
+ { return ($false) ; }
+}
+
+sub ValidDateFormat
+{
+ my $date = shift ;
+ my ($day, $month, $year) ;
+
+# if ($date=~ /^\-?\d+$/) # for now full years are always allowed
+# { return ($true) ; }
+
+ if ($DateFormat eq "yyyy")
+ {
+ if (! ($date=~ /^\-?\d+$/))
+ { return ($false) ; }
+ return ($true) ;
+ }
+
+ if ($DateFormat eq "x.y")
+ {
+ if (! ($date=~ /^\-?\d+(?:\.\d+)?$/))
+ { return ($false) ; }
+ return ($true) ;
+ }
+
+ if (! ($date=~ /^\d\d\/\d\d\/\d\d\d\d$/))
+ { return ($false) ; }
+
+ if ($DateFormat eq "dd/mm/yyyy")
+ {
+ $day = substr ($date,0,2) ;
+ $month = substr ($date,3,2) ;
+ $year = substr ($date,6,4) ;
+ }
+ else
+ {
+ $day = substr ($date,3,2) ;
+ $month = substr ($date,0,2) ;
+ $year = substr ($date,6,4) ;
+ }
+
+ if ($month =~ /^(?:01|03|05|07|08|10|12)$/)
+ { if ($day > 31) { return ($false) ; }}
+ elsif ($month =~ /^(?:04|06|09|11)$/)
+ { if ($day > 30) { return ($false) ; }}
+ elsif ($month =~ /^02$/)
+ {
+ if (($year % 4 == 0) && ($year % 100 != 0))
+ { if ($day > 29) { return ($false) ; }}
+ else
+ { if ($day > 28) { return ($false) ; }}
+ }
+ else { return ($false) ; }
+ return ($true) ;
+}
+
+sub ValidDateRange
+{
+ my $date = shift ;
+ my ($day, $month, $year,
+ $dayf, $monthf, $yearf,
+ $dayt, $montht, $yeart) ;
+
+ my $from = @Period {"from"} ;
+ my $till = @Period {"till"} ;
+
+ if (($DateFormat eq "yyyy") || ($DateFormat eq "x.y"))
+ {
+ if (($date < $from) || ($date > $till))
+ { return ($false) ; }
+ return ($true) ;
+ }
+
+ if ($DateFormat eq "dd/mm/yyyy")
+ {
+ $day = substr ($date,0,2) ;
+ $month = substr ($date,3,2) ;
+ $year = substr ($date,6,4) ;
+ $dayf = substr ($from,0,2) ;
+ $monthf = substr ($from,3,2) ;
+ $yearf = substr ($from,6,4) ;
+ $dayt = substr ($till,0,2) ;
+ $montht = substr ($till,3,2) ;
+ $yeart = substr ($till,6,4) ;
+ }
+ if ($DateFormat eq "mm/dd/yyyy")
+ {
+ $day = substr ($date,3,2) ;
+ $month = substr ($date,0,2) ;
+ $year = substr ($date,6,4) ;
+ $dayf = substr ($from,3,2) ;
+ $monthf = substr ($from,0,2) ;
+ $yearf = substr ($from,6,4) ;
+ $dayt = substr ($till,3,2) ;
+ $montht = substr ($till,0,2) ;
+ $yeart = substr ($till,6,4) ;
+ }
+
+ if (($year < $yearf) ||
+ (($year == $yearf) &&
+ (($month < $monthf) ||
+ (($month == $monthf) && ($day < $dayf))
+ )))
+ { return ($false) }
+
+ if (($year > $yeart) ||
+ (($year == $yeart) &&
+ (($month > $montht) ||
+ (($month == $montht) && ($day > $dayt))
+ )))
+ { return ($false) }
+
+ return ($true) ;
+}
+
+sub DateMedium
+{
+ my $from = shift ;
+ my $till = shift ;
+
+ if (($DateFormat eq "yyyy") || ($DateFormat eq "x.y"))
+ { return (sprintf ("%.3f", ($from + $till) / 2)) ; }
+
+ $from2 = &DaysFrom1800 ($from) ;
+ $till2 = &DaysFrom1800 ($till) ;
+ my $date = &DateFrom1800 (int (($from2 + $till2) / 2)) ;
+ return ($date) ;
+}
+
+sub DaysFrom1800
+{
+ @mmm = (31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) ;
+ my $date = shift ;
+ if ($DateFormat eq "dd/mm/yyyy")
+ {
+ $day = substr ($date,0,2) ;
+ $month = substr ($date,3,2) ;
+ $year = substr ($date,6,4) ;
+ }
+ else
+ {
+ $day = substr ($date,3,2) ;
+ $month = substr ($date,0,2) ;
+ $year = substr ($date,6,4) ;
+ }
+ if ($year < 1800)
+ { &Error2 ("Function 'DaysFrom1800' expects year >= 1800, not '$year'.") ; return ; }
+
+ $days = ($year - 1800) * 365 ;
+ $days += int (($year -1 - 1800) / 4) ;
+ $days -= int (($year -1 - 1800) / 100) ;
+ if ($month > 1)
+ {
+ for ($m = $month - 2 ; $m >= 0 ; $m--)
+ {
+ $days += @mmm [$m] ;
+ if ($m == 1)
+ {
+ if ((($year % 4) == 0) && (($year % 100) != 0))
+ { $days ++ ; }
+ }
+ }
+ }
+ $days += $day ;
+
+ return ($days) ;
+}
+
+sub DateToFloat
+{
+ my $date = shift ;
+ if ($DateFormat !~ /\//)
+ { return ($date) ; }
+ my $year = $date ;
+ $year =~ s/.*\///g ; # delete dd mm/mm dd
+ my $fraction = (&DaysFrom1800 ($date) - &DaysFrom1800 ("01/01/" . $year)) / 365.25 ;
+ return ($year + $fraction) ;
+}
+
+sub DateFrom1800
+{
+ my $days = shift ;
+
+ @mmm = (31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) ;
+
+ $year = 1800 ;
+ while ($days > 365 + (($year % 4) == 0))
+ {
+ if ((($year % 4) == 0) && (($year % 100) != 0))
+ { $days -= 366 ; }
+ else
+ { $days -= 365 ; }
+ $year ++ ;
+ }
+
+ $month = 0 ;
+ while ($days > @mmm [$month])
+ {
+ $days -= @mmm [$month] ;
+ if ($month == 1)
+ {
+ if ((($year % 4) == 0) && (($year % 100) != 0))
+ { $days -- ; } ;
+ }
+ $month++ ;
+ }
+ $day = $days ;
+
+ $month ++ ;
+ if ($DateFormat eq "dd/mm/yyyy")
+ { $date = sprintf ("%02d/%02d/%04d", $day, $month, $year) ; }
+ else
+ { $date = sprintf ("%02d/%02d/%04d", $month, $day, $year) ; }
+
+ return ($date) ;
+}
+
+sub ExtractText
+{
+ my $data = shift ;
+ my $data2 = $data ;
+ my $text = "" ;
+
+ # special case: allow embedded spaces when 'text' is last attribute
+# $data2 =~ s/\:\:/\@\#\!/g ;
+ if ($data2 =~ /text\:[^\:]+$/)
+ {
+ $text = $data2 ;
+ $text =~ s/^.*?text\:// ;
+# $text =~ s/^\s(.*?)\s*$/$1/ ; ?? ->
+ $text =~ s/^(.*?)\s*$/$1/ ;
+ $text =~ s/\\n/\n/g ;
+ $text =~ s/\"\"/\@\#\$/g ;
+ $text =~ s/\"//g ;
+ $text =~ s/\@\#\$/"/g ;
+ $data2 =~ s/text\:.*$// ;
+ }
+
+ # extract text between double quotes
+ $data2 =~ s/\"\"/\@\#\$/g ;
+ if ($data2 =~ /text\:\s*\"/)
+ {
+ $text = $data2 ;
+ $text =~ s/^.*?text\:\s*\"// ;
+
+ if (! ($text =~ /\"/))
+ { &Error ("PlotData invalid. Attribute 'text': no closing \" found.") ;
+ return ("x", "x") ; }
+
+ $text =~ s/\".*$//;
+ $text =~ s/\@\#\$/"/g ;
+ $text =~ s/\\n/\n/g ;
+ }
+ $data2 =~ s/text\:\s*\"[^\"]*\"// ;
+ $data2 =~ s/\@\#\$/"/g ;
+ return ($data2, $text) ;
+}
+
+sub ParseText
+{
+ my $text = shift ;
+ $text =~ s/\_\_/\@\#\$/g ;
+ $text =~ s/\_/ /g ;
+ $text =~ s/\@\#\$/_/g ;
+
+ $text =~ s/\~\~/\@\#\$/g ;
+ $text =~ s/\~/\\n/g ;
+ $text =~ s/\@\#\$/~/g ;
+
+ return ($text) ;
+}
+
+sub BarDefined
+{
+ my $bar = shift ;
+ foreach $bar2 (@Bars)
+ {
+ if (lc ($bar2) eq lc ($bar))
+ { return ($true) ; }
+ }
+
+# not part of barset ? return
+ if ($bar != /\#\d+$/)
+ { return ($false) ; }
+
+# find previous bar in barset
+ my $barcnt = $bar ;
+ my $barid = $bar ;
+ $barcnt =~ s/.*\#(\d+$)/$1/ ;
+ $barid =~ s/(.*\#)\d+$/$1/ ;
+ $barcnt -- ;
+ $a = $#Bars ;
+ for (my $b = 0 ; $b <= $#Bars ; $b++)
+ {
+ if (lc (@Bars [$b]) eq lc ($barid . $barcnt))
+ {
+ $b++ ;
+ for (my $b2 = $#Bars + 1 ; $b2 > $b ; $b2--)
+ { @Bars [$b2] = @Bars [$b2-1]; }
+ @Bars [$b] = lc ($bar) ;
+ @BarLegend {lc ($bar)} = " " ;
+ return ($true) ;
+ }
+ }
+ return ($false) ;
+}
+
+sub ValidAttributes
+{
+ my $command = shift ;
+
+ if ($command =~ /^BackgroundColors$/i)
+ { return (CheckAttributes ($command, "", "canvas,bars")) ; }
+
+ if ($command =~ /^BarData$/i)
+# { return (CheckAttributes ($command, "", "bar,barset,barcount,link,text")) ; }
+ { return (CheckAttributes ($command, "", "bar,barset,link,text")) ; }
+
+ if ($command =~ /^Colors$/i)
+ { return (CheckAttributes ($command, "id,value", "legend")) ; }
+
+ if ($command =~ /^ImageSize$/i)
+ { return (CheckAttributes ($command, "", "width,height,barincrement")) ; }
+
+ if ($command =~ /^Legend$/i)
+ { return (CheckAttributes ($command, "", "columns,columnwidth,orientation,position,left,top")) ; }
+
+ if ($command =~ /^LineData$/i)
+ { return (CheckAttributes ($command, "", "at,from,till,atpos,frompos,tillpos,points,color,layer,width")) ; }
+
+ if ($command =~ /^Period$/i)
+ { return (CheckAttributes ($command, "from,till", "")) ; }
+
+ if ($command =~ /^PlotArea$/i)
+ { return (CheckAttributes ($command, "", "left,bottom,width,height,right,top")) ; }
+
+ if ($command =~ /^PlotData$/i)
+ { return (CheckAttributes ($command, "", "align,anchor,at,bar,barset,color,fontsize,from,link,mark,shift,text,textcolor,till,width")) ; }
+
+ if ($command =~ /^Scale/i)
+ { return (CheckAttributes ($command, "increment,start", "unit,grid,gridcolor,text")) ; }
+
+ if ($command =~ /^TextData$/i)
+ { return (CheckAttributes ($command, "", "fontsize,lineheight,link,pos,tabs,text,textcolor")) ; }
+
+ if ($command =~ /^TimeAxis$/i)
+ { return (CheckAttributes ($command, "", "orientation,format,order")) ; }
+
+ return ($true) ;
+}
+
+sub CheckAttributes
+{
+ my $name = shift ;
+ my @Required = split (",", shift) ;
+ my @Allowed = split (",", shift) ;
+
+ my $attribute ;
+ my %Attributes2 = %Attributes ;
+
+ $hint = "\nSyntax: '$name =" ;
+ foreach $attribute (@Required)
+ { $hint .= " $attribute:.." ; }
+ foreach $attribute (@Allowed)
+ { $hint .= " [$attribute:..]" ; }
+ $hint .= "'" ;
+
+ foreach $attribute (@Required)
+ {
+ if ((! defined (@Attributes {$attribute})) || (@Attributes {$attribute} eq ""))
+ { &Error ("$name definition incomplete. $hint") ;
+ undef (@Attributes) ; return ($false) ; }
+ delete (@Attributes2 {$attribute}) ;
+ }
+ foreach $attribute (@Allowed)
+ { delete (@Attributes2 {$attribute}) ; }
+
+ @AttrKeys = keys %Attributes2 ;
+ if ($#AttrKeys >= 0)
+ {
+ if (@AttrKeys [0] eq "single")
+ { &Error ("$name definition invalid. Specify all attributes as name:value pairs.") ; }
+ else
+ { &Error ("$name definition invalid. Invalid attribute '" . @AttrKeys [0] . "' found. $hint") ; }
+ undef (@Attributes) ; return ($false) ; }
+
+ return ($true) ;
+}
+
+sub CheckPreset
+{
+ my $command = shift ;
+ my ($preset, $action, $attrname, $attrvalue) ;
+
+ my $newcommand = $true ;
+ my $addvalue = $true ;
+ if ($command =~ /^$prevcommand$/i)
+ { $newcommand = $false ; }
+ if ((! $newcommand) && ($command =~ /^(?:DrawLines|PlotData|TextData)$/i))
+ { $addvalue = $false ; }
+ $prevcommand = $command ;
+
+ foreach $preset (@PresetList)
+ {
+ if ($preset =~ /^$command\|/i)
+ {
+ ($command, $action, $attrname, $attrpreset) = split ('\|', $preset) ;
+ if ($attrname eq "")
+ { $attrname = "single" ; }
+
+ $attrvalue = @Attributes {$attrname} ;
+
+ if (($action eq "-") && ($attrvalue ne ""))
+ {
+ if ($attrname eq "single")
+ { &Error ("Chosen preset makes this command redundant.\n" .
+ " Please remove this command.") ; }
+ else
+ { &Error ("Chosen preset conflicts with '$attrname:...'.\n" .
+ " Please remove this attribute.") ; }
+ @Attributes {$attrname} = "" ;
+ }
+
+ if (($action eq "+") && ($attrvalue eq ""))
+ {
+ if ($addvalue)
+ { @Attributes {$attrname} = $attrpreset ; }
+ }
+
+ if (($action eq "=") && ($attrvalue eq ""))
+ { @Attributes {$attrname} = $attrpreset ; }
+
+ if (($action eq "=") && ($attrvalue ne "") &&
+ ($attrvalue !~ /$attrpreset/i))
+ {
+ if ($attrname eq "single")
+ { &Error ("Conflicting settings.\nPreset defines '$attrpreset'.") ; }
+ else
+ { &Error ("Conflicting settings.\nPreset defines '$attrname:$attrpreset'.") ; }
+ @Attributes {$attrname} = $attrpreset ;
+ }
+ }
+ }
+}
+
+sub ShiftOnePixelForSVG
+{
+ my $line = shift ;
+ $line =~ s/location:\s*// ;
+ my ($posx, $posy) = split (" ", $line) ;
+
+ if ($posy =~ /\+/)
+ { ($posy1, $posy2) = split ('\+', $posy) ; }
+ elsif ($posy =~ /.+\-/)
+ {
+ if ($posy =~ /^\-/)
+ {
+ ($sign, $posy1, $posy2) = split ('\-', $posy) ; $posy2 = - $posy2 ;
+ $posy1 = "-" . $posy1 ;
+ }
+ else
+ { ($posy1, $posy2) = split ('\-', $posy) ; $posy2 = - $posy2 }
+ }
+ else
+ { $posy1 = $posy ; $posy2 = 0 ; }
+
+ if ($posy1 !~ /(s)/)
+ { $posy += 0.01 ; }
+ else
+ {
+ $posy2 += 0.01 ;
+ if ($posy2 == 0)
+ { $posy = $posy1 ; }
+ elsif ($posy2 < 0)
+ { $posy = $posy1 . "$posy2" ; }
+ else
+ { $posy = $posy1 . "+" . $posy2 ; }
+ }
+
+ $line = "\n location: $posx $posy" ;
+ return ($line) ;
+}
+
+sub NormalizeURL
+{
+ my $url = shift ;
+ $url =~ s/(https?)\:?\/?\/?/$1:\/\// ; # add possibly missing special characters
+ $url =~ s/ /%20/g ;
+ return ($url) ;
+}
+
+# wiki style link may include linebreak characters -> split into several wiki links
+sub NormalizeWikiLink
+{
+ my $text = shift ;
+
+ my $brdouble = $false ;
+ if ($text =~ /\[\[.*\]\]/)
+ { $brdouble = $true ; }
+
+ $text =~ s/\[\[?// ;
+ $text =~ s/\]?\]// ;
+
+ my ($hide,$show) = split ('\|', $text) ;
+ if ($show eq "")
+ { $show = $hide ; }
+ $hide =~ s/\s*\n\s*/ /g ;
+
+ my @Show = split ("\n", $show) ;
+ $text = "" ;
+ foreach $part (@Show)
+ {
+ if ($brdouble)
+ { $part = "[[" . $hide . "|" . $part . "]]" ; }
+ else
+ { $part = "[" . $hide . "|" . $part . "]" ; }
+ }
+ $text = join ("\n", @Show) ;
+
+ return ($text) ;
+}
+
+sub ProcessWikiLink
+{
+ my $text = shift ;
+ my $link = shift ;
+ my $hint = shift ;
+ my $wikilink = $false ;
+
+ chomp ($text) ;
+ chomp ($link) ;
+ chomp ($hint) ;
+
+ my ($wiki, $title) ;
+ if ($link ne "") # ignore wiki brackets in text when explicit link is specified
+ {
+ $text =~ s/\[\[ [^\|]+ \| (.*) \]\]/$1/gx ;
+ $text =~ s/\[\[ [^\:]+ \: (.*) \]\]/$1/gx ;
+# $text =~ s/\[\[ (.*) \]\]/$1/gx ;
+ }
+ else
+ {
+ if ($text =~ /\[.+\]/) # keep first link in text segment, remove others
+ {
+ $link = $text ;
+ $link =~ s/\n//g ;
+ $link =~ s/^[^\[\]]*\[/[/x ;
+
+ if ($link =~ /^\[\[/)
+ { $wikilink = $true ; }
+
+ $link =~ s/^ [^\[]* \[+ ([^\[\]]*) \].*$/$1/x ;
+ $link =~ s/\|.*$// ;
+ if ($wikilink)
+ { $link = "[[" . $link . "]]" ; }
+
+ $text =~ s/(\[+) [^\|\]]+ \| ([^\]]*) (\]+)/$1$2$3/gx ;
+ $text =~ s/(https?)\:/$1colon/gx ;
+# $text =~ s/(\[+) [^\:\]]+ \: ([^\]]*) (\]+)/$1$2$3/gx ; #???
+
+ # remove interwiki link prefix
+ $text =~ s/(\[+) (?:.{2,3}|(?:zh\-.*)|simple|minnan|tokipona) \: ([^\]]*) (\]+)/$1$2$3/gxi ; #???
+
+ $text =~ s/\[+ ([^\]]+) \]+/{{{$1}}}/x ;
+ $text =~ s/\[+ ([^\]]+) \]+/$1/gx ;
+ $text =~ s/\{\{\{ ([^\}]*) \}\}\}/[[$1]]/x ;
+ }
+# if ($text =~ /\[\[.+\]\]/)
+# {
+# $wikilink = $true ;
+# $link = $text ;
+# $link =~ s/\n//g ;
+# $link =~ s/^.*?\[\[/[[/x ;
+# $link =~ s/\| .*? \]\].*$/]]/x ;
+# $link =~ s/\]\].*$/]]/x ;
+# $text =~ s/\[\[ [^\|\]]+ \| (.*?) \]\]/[[$1]]/x ;
+# $text =~ s/\[\[ [^\:\]]+ \: (.*?) \]\]/[[$1]]/x ;
+
+# # remove remaining links
+# $text =~ s/\[\[ ([^\]]+) \]\]/^%#$1#%^/x ;
+# $text =~ s/\[+ ([^\]]+) \]+/$1/gx ;
+# $text =~ s/\^$hPerc\# (.*?) \#$hPerc\^/[[$1]]/x ;
+# }
+# elsif ($text =~ /\[.+\]/)
+# {
+# $link = $text ;
+# $link =~ s/\n//g ;
+# $link =~ s/^.*?\[/[/x ;
+# $link =~ s/\| .*? \].*$/]/x ;
+# $link =~ s/\].*$/]/x ;
+# $link =~ s/\[ ([^\]]+) \]/$1/x ;
+# $text =~ s/\[ [^\|\]]+ \| (.*?) \]/[[$1]]/x ;
+
+# # remove remaining links
+# $text =~ s/\[\[ ([^\]]+) \]\]/^%#$1#%^/x ;
+# $text =~ s/\[+ ([^\]]+) \]+/$1/gx ;
+# $text =~ s/\^$hPerc\# (.*?) \#$hPerc\^/[[$1]]/x ;
+## $text =~ s/\[\[ (.*) \]\]/$1/gx ;
+# }
+
+ }
+
+ if ($wikilink)
+ {
+# if ($link =~ /^\[\[.+\:.+\]\]$/) # has a colon in its name
+ if ($link =~ /^\[\[ (?:.{2,3}|(?:zh\-.*)|simple|minnan|tokipona) \: .+\]\]$/xi) # has a interwiki link prefix
+ {
+ # This will fail for all interwiki links other than Wikipedia.
+ $wiki = lc ($link) ;
+ $title = $link ;
+ $wiki =~ s/\[\[([^\:]+)\:.*$/$1/x ;
+ $title =~ s/^[^\:]+\:(.*)\]\]$/$1/x ;
+ $title =~ s/ /_/g ;
+ $link = "http://$wiki.wikipedia.org/wiki/$title" ;
+ $link = &EncodeURL ($title) ;
+ if (($hint eq "") && ($title ne ""))
+ { $hint = "$wiki: $title" ; }
+ }
+ else
+ {
+ # $wiki = "en" ;
+ $title = $link ;
+ $title =~ s/^\[\[(.*)\]\]$/$1/x ;
+ $title =~ s/ /_/g ;
+ $link = $articlepath ;
+ $urlpart = &EncodeURL ($title) ;
+ $link =~ s/\$1/$urlpart/ ;
+ if (($hint eq "") && ($title ne ""))
+ { $hint = "$title" ; }
+ }
+ $hint =~ s/_/ /g ;
+ }
+ else
+ {
+ if ($link ne "")
+ { $hint = &ExternalLinkToHint ($link) ; }
+ }
+
+ if (($link ne "") && ($text !~ /\[\[/) && ($text !~ /\]\]/))
+ { $text = "[[" . $text . "]]" ; }
+
+ $hint = &EncodeHtml ($hint) ;
+ return ($text, $link, $hint) ;
+}
+
+sub ExternalLinkToHint
+{
+ my $hint = shift ;
+ $hint =~ s/^https?\:?\/?\/?// ;
+ $hint =~ s/\/.*$// ;
+ return (&EncodeHtml ($hint . "/..")) ;
+}
+
+sub EncodeInput
+{
+ my $text = shift ;
+ # revert encoding of '<' & '>' by MediaWiki
+ $text =~ s/\&lt\;/\</g ;
+ $text =~ s/\&gt\;/\>/g ;
+ $text =~ s/([\`\{\}\%\&\@\$\(\)\;\=])/"%" . sprintf ("%X", ord($1)) . "%";/ge ;
+ return ($text) ;
+}
+
+sub DecodeInput
+{
+ my $text = shift ;
+ $text =~ s/\%([0-9A-F]{2})\%/chr(hex($1))/ge ;
+ return ($text) ;
+}
+
+sub EncodeHtml
+{
+ my $text = shift ;
+ $text =~ s/([\<\>\&\'\"])/"\&\#" . ord($1) . "\;"/ge ;
+ $text =~ s/\n/<br>/g ;
+ return ($text) ;
+}
+
+sub EncodeURL
+{
+ my $url = shift ;
+ # For some reason everything gets run through this weird internal
+ # encoding that's similar to URL-encoding. Armor against this as well,
+ # or else adjacent encoded bytes will be corrupted.
+ $url =~ s/([^0-9a-zA-Z\%\:\/\._])/"%25%".sprintf ("%02X",ord($1))/ge ;
+ return ($url) ;
+}
+
+sub Error
+{
+ my $msg = &DecodeInput(shift) ;
+ $msg =~ s/\n\s*/\n /g ; # indent consecutive lines
+
+ $CntErrors++ ;
+ if (! $listinput)
+ { push @Errors, "Line $LineNo: " . &DecodeInput($Line) . "\n" ; }
+ push @Errors, "- $msg\n\n" ;
+ if ($CntErrors > 10)
+ { &Abort ("More than 10 errors found") ; }
+}
+
+sub Error2
+{
+ my $msg = &DecodeInput(shift) ;
+ $msg =~ s/\n\s*/\n /g ; # indent consecutive lines
+ $CntErrors++ ;
+ push @Errors, "- $msg\n" ;
+}
+
+sub Warning
+{
+ my $msg = &DecodeInput(shift) ;
+ $msg =~ s/\n\s*/\n /g ; # indent consecutive lines
+ if (! $listinput)
+ { push @Warnings, "Line $LineNo: " . &DecodeInput ($Line) . "\n" ; }
+ push @Warnings, "- $msg\n\n" ;
+}
+
+sub Warning2
+{
+ my $msg = &DecodeInput(shift) ;
+ $msg =~ s/\n\s*/\n /g ; # indent consecutive lines
+ push @Warnings, "- $msg\n" ;
+}
+
+sub Info
+{
+ my $msg = &DecodeInput(shift) ;
+ $msg =~ s/\n\s*/\n /g ; # indent consecutive lines
+ if (! $listinput)
+ { push @Info, "Line $LineNo: " . &DecodeInput ($Line) . "\n" ; }
+ push @Info, "- $msg\n\n" ;
+}
+
+sub Info2
+{
+ my $msg = &DecodeInput(shift) ;
+ $msg =~ s/\n\s*/\n /g ; # indent consecutive lines
+ push @Info, "- $msg\n" ;
+}
+
+sub Abort
+{
+ my $msg = &DecodeInput(shift) ;
+
+ print "\n\n***** " . $msg . " *****\n\n" ;
+ print @Errors ;
+ print "Execution aborted.\n" ;
+
+ open "FILE_OUT", ">", $file_errors ;
+ print FILE_OUT "<p>EasyTimeline $version</p><p><b>Timeline generation failed: " . &EncodeHtml ($msg) ."</b></p>\n" ;
+ foreach $line (@Errors)
+ { print FILE_OUT &EncodeHtml ($line) . "\n" ; }
+ close "FILE_OUT" ;
+
+ if ($makehtml) # generate html test file, which would normally contain png + svg (+ image map)
+ {
+ open "FILE_IN", "<", $file_errors ;
+ open "FILE_OUT", ">", $file_html ;
+ print FILE_OUT "<html><head>\n<title>Graphical Timelines - HTML test file</title>\n</head>\n" .
+ "<body><h1><font color='green'>EasyTimeline</font> - Test Page</h1>\n\n" .
+ "<code>\n" ;
+ print FILE_OUT <FILE_IN> ;
+ print FILE_OUT "</code>\n\n</body>\n</html>" ;
+ close "FILE_IN" ;
+ close "FILE_OUT" ;
+ }
+ exit ;
+}
+
+sub EscapeShellArg
+{
+ my $arg = shift;
+ if ($env eq "Linux") {
+ $arg =~ s/'/\\'/;
+ $arg = "'$arg'";
+ } else {
+ $arg =~ s/"/\\"/;
+ $arg = "\"$arg\"";
+ }
+ return $arg;
+}
+
+# vim: set sts=2 ts=2 sw=2 et :
+
+sub UnicodeToAscii {
+ my $unicode = shift ;
+ my $char = substr ($unicode,0,1) ;
+ my $ord = ord ($char) ;
+
+ if ($ord < 128) # plain ascii character
+ { return ($unicode) ; } # (will not occur in this script)
+ else
+ {
+ # for completeness sake complete routine, only 2 byte unicodes sent here
+ if ($ord >= 252)
+ { $value = $ord - 252 ; }
+ elsif ($ord >= 248)
+ { $value = $ord - 248 ; }
+ elsif ($ord >= 240)
+ { $value = $ord - 240 ; }
+ elsif ($ord >= 224)
+ { $value = $ord - 224 ; }
+ else
+ { $value = $ord - 192 ; }
+ for ($c = 1 ; $c < length ($unicode) ; $c++)
+ { $value = $value * 64 + ord (substr ($unicode, $c,1)) - 128 ; }
+
+# $html = "\&\#" . $value . ";" ; any unicode can be specified as html char
+
+ if (($value >= 128) && ($value <= 255))
+ { return (chr ($value)) ; }
+ else
+ { return "?" ; }
+ }
+}
+
diff --git a/mwlib/Makefile b/mwlib/Makefile
new file mode 100644
index 0000000..6f244ef
--- /dev/null
+++ b/mwlib/Makefile
@@ -0,0 +1,20 @@
+RE2C = re2c -w --no-generation-date
+
+all: _expander.cc _mwscan.cc _mwscan.so _expander.so
+
+_expander.so: _expander.cc
+ (cd .. && python ./setup.py build_ext --inplace build)
+
+_mwscan.so: _mwscan.cc
+ (cd .. && python ./setup.py build_ext --inplace build)
+
+_expander.cc: _expander.re
+ $(RE2C) -o _expander.cc _expander.re
+
+_mwscan.cc: _mwscan.re
+ $(RE2C) -o _mwscan.cc _mwscan.re
+
+clean::
+ rm -rf *.pyc *~ *.so build a.out
+
+
diff --git a/mwlib/__init__.py b/mwlib/__init__.py
new file mode 100755
index 0000000..8088807
--- /dev/null
+++ b/mwlib/__init__.py
@@ -0,0 +1,6 @@
+
+# Copyright (c) 2007-2008 PediaPress GmbH
+# See README.txt for additional licensing information.
+
+#import pkg_resources
+#pkg_resources.declare_namespace("mwlib")
diff --git a/mwlib/_expander.cc b/mwlib/_expander.cc
new file mode 100644
index 0000000..9641ae1
--- /dev/null
+++ b/mwlib/_expander.cc
@@ -0,0 +1,826 @@
+/* Generated by re2c 0.13.4 */
+#line 1 "_expander.re"
+// -*- mode: c++ -*-
+// Copyright (c) 2007-2008 PediaPress GmbH
+// See README.txt for additional licensing information.
+
+#include <Python.h>
+
+#include <iostream>
+#include <assert.h>
+#include <vector>
+
+using namespace std;
+
+#define RET(x) {found(x); return x;}
+
+struct Token
+{
+ int type;
+ int start;
+ int len;
+};
+
+
+class MacroScanner
+{
+public:
+
+ MacroScanner(Py_UNICODE *_start, Py_UNICODE *_end) {
+ source = start = _start;
+ end = _end;
+ cursor = start;
+ }
+
+ int found(int val) {
+ if (val==5 && tokens.size()) {
+ Token &previous_token (tokens[tokens.size()-1]);
+ if (previous_token.type==val) {
+ previous_token.len += cursor-start;
+ return tokens.size()-1;
+ }
+ }
+ Token t;
+ t.type = val;
+ t.start = (start-source);
+ t.len = cursor-start;
+ tokens.push_back(t);
+ return tokens.size()-1;
+ }
+
+ inline int scan();
+
+ Py_UNICODE *source;
+
+ Py_UNICODE *start;
+ Py_UNICODE *cursor;
+ Py_UNICODE *end;
+ vector<Token> tokens;
+};
+
+
+int MacroScanner::scan()
+{
+
+std:
+
+ start=cursor;
+
+ Py_UNICODE *marker=cursor;
+
+ Py_UNICODE *save_cursor = cursor;
+
+
+#define YYCTYPE Py_UNICODE
+#define YYCURSOR cursor
+#define YYMARKER marker
+#define YYLIMIT (end)
+// #define YYFILL(n) return 0;
+
+#line 80 "_expander.re"
+
+
+
+
+
+#line 87 "_expander.cc"
+{
+ YYCTYPE yych;
+
+ yych = *YYCURSOR;
+ if (yych <= '\\') {
+ if (yych <= '<') {
+ if (yych <= 0x0000) goto yy10;
+ if (yych <= ';') goto yy12;
+ goto yy9;
+ } else {
+ if (yych == '[') goto yy5;
+ goto yy12;
+ }
+ } else {
+ if (yych <= '{') {
+ if (yych <= ']') goto yy6;
+ if (yych <= 'z') goto yy12;
+ } else {
+ if (yych <= '|') goto yy7;
+ if (yych <= '}') goto yy4;
+ goto yy12;
+ }
+ }
+ ++YYCURSOR;
+ if ((yych = *YYCURSOR) == '{') goto yy78;
+yy3:
+#line 99 "_expander.re"
+ {RET(5);}
+#line 116 "_expander.cc"
+yy4:
+ yych = *++YYCURSOR;
+ if (yych == '}') goto yy75;
+ goto yy3;
+yy5:
+ yych = *++YYCURSOR;
+ if (yych == '[') goto yy73;
+ goto yy3;
+yy6:
+ yych = *++YYCURSOR;
+ if (yych == ']') goto yy73;
+ goto yy3;
+yy7:
+ ++YYCURSOR;
+#line 88 "_expander.re"
+ {RET(6);}
+#line 133 "_expander.cc"
+yy9:
+ yych = *(YYMARKER = ++YYCURSOR);
+ if (yych <= 'M') {
+ if (yych <= 'G') {
+ if (yych == '!') goto yy13;
+ if (yych <= 'F') goto yy3;
+ goto yy15;
+ } else {
+ if (yych == 'I') goto yy17;
+ if (yych <= 'L') goto yy3;
+ goto yy16;
+ }
+ } else {
+ if (yych <= 'h') {
+ if (yych <= 'N') goto yy18;
+ if (yych == 'g') goto yy15;
+ goto yy3;
+ } else {
+ if (yych <= 'l') {
+ if (yych <= 'i') goto yy17;
+ goto yy3;
+ } else {
+ if (yych <= 'm') goto yy16;
+ if (yych <= 'n') goto yy18;
+ goto yy3;
+ }
+ }
+ }
+yy10:
+ ++YYCURSOR;
+#line 98 "_expander.re"
+ {RET(0);}
+#line 166 "_expander.cc"
+yy12:
+ yych = *++YYCURSOR;
+ goto yy3;
+yy13:
+ yych = *++YYCURSOR;
+ if (yych == '-') goto yy60;
+yy14:
+ YYCURSOR = YYMARKER;
+ goto yy3;
+yy15:
+ yych = *++YYCURSOR;
+ if (yych == 'A') goto yy51;
+ if (yych == 'a') goto yy51;
+ goto yy14;
+yy16:
+ yych = *++YYCURSOR;
+ if (yych == 'A') goto yy45;
+ if (yych == 'a') goto yy45;
+ goto yy14;
+yy17:
+ yych = *++YYCURSOR;
+ if (yych == 'M') goto yy35;
+ if (yych == 'm') goto yy35;
+ goto yy14;
+yy18:
+ yych = *++YYCURSOR;
+ if (yych == 'O') goto yy19;
+ if (yych != 'o') goto yy14;
+yy19:
+ yych = *++YYCURSOR;
+ if (yych <= 'W') {
+ if (yych == 'I') goto yy21;
+ if (yych <= 'V') goto yy14;
+ } else {
+ if (yych <= 'i') {
+ if (yych <= 'h') goto yy14;
+ goto yy21;
+ } else {
+ if (yych != 'w') goto yy14;
+ }
+ }
+ yych = *++YYCURSOR;
+ if (yych == 'I') goto yy30;
+ if (yych == 'i') goto yy30;
+ goto yy14;
+yy21:
+ yych = *++YYCURSOR;
+ if (yych == 'N') goto yy22;
+ if (yych != 'n') goto yy14;
+yy22:
+ yych = *++YYCURSOR;
+ if (yych == 'C') goto yy23;
+ if (yych != 'c') goto yy14;
+yy23:
+ yych = *++YYCURSOR;
+ if (yych == 'L') goto yy24;
+ if (yych != 'l') goto yy14;
+yy24:
+ yych = *++YYCURSOR;
+ if (yych == 'U') goto yy25;
+ if (yych != 'u') goto yy14;
+yy25:
+ yych = *++YYCURSOR;
+ if (yych == 'D') goto yy26;
+ if (yych != 'd') goto yy14;
+yy26:
+ yych = *++YYCURSOR;
+ if (yych == 'E') goto yy27;
+ if (yych != 'e') goto yy14;
+yy27:
+ yych = *++YYCURSOR;
+ if (yych != '>') goto yy14;
+ ++YYCURSOR;
+#line 90 "_expander.re"
+ {goto noinclude;}
+#line 242 "_expander.cc"
+yy30:
+ yych = *++YYCURSOR;
+ if (yych == 'K') goto yy31;
+ if (yych != 'k') goto yy14;
+yy31:
+ yych = *++YYCURSOR;
+ if (yych == 'I') goto yy32;
+ if (yych != 'i') goto yy14;
+yy32:
+ yych = *++YYCURSOR;
+ if (yych != '>') goto yy14;
+ ++YYCURSOR;
+#line 91 "_expander.re"
+ {goto nowiki;}
+#line 257 "_expander.cc"
+yy35:
+ yych = *++YYCURSOR;
+ if (yych == 'A') goto yy36;
+ if (yych != 'a') goto yy14;
+yy36:
+ yych = *++YYCURSOR;
+ if (yych == 'G') goto yy37;
+ if (yych != 'g') goto yy14;
+yy37:
+ yych = *++YYCURSOR;
+ if (yych == 'E') goto yy38;
+ if (yych != 'e') goto yy14;
+yy38:
+ yych = *++YYCURSOR;
+ if (yych == 'M') goto yy39;
+ if (yych != 'm') goto yy14;
+yy39:
+ yych = *++YYCURSOR;
+ if (yych == 'A') goto yy40;
+ if (yych != 'a') goto yy14;
+yy40:
+ yych = *++YYCURSOR;
+ if (yych == 'P') goto yy41;
+ if (yych != 'p') goto yy14;
+yy41:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych <= '<') {
+ if (yych <= 0x0000) goto yy14;
+ if (yych <= ';') goto yy41;
+ goto yy14;
+ } else {
+ if (yych != '>') goto yy41;
+ }
+ ++YYCURSOR;
+#line 92 "_expander.re"
+ {goto imagemap;}
+#line 295 "_expander.cc"
+yy45:
+ yych = *++YYCURSOR;
+ if (yych == 'T') goto yy46;
+ if (yych != 't') goto yy14;
+yy46:
+ yych = *++YYCURSOR;
+ if (yych == 'H') goto yy47;
+ if (yych != 'h') goto yy14;
+yy47:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych <= '<') {
+ if (yych <= 0x0000) goto yy14;
+ if (yych <= ';') goto yy47;
+ goto yy14;
+ } else {
+ if (yych != '>') goto yy47;
+ }
+ ++YYCURSOR;
+#line 93 "_expander.re"
+ {goto math;}
+#line 317 "_expander.cc"
+yy51:
+ yych = *++YYCURSOR;
+ if (yych == 'L') goto yy52;
+ if (yych != 'l') goto yy14;
+yy52:
+ yych = *++YYCURSOR;
+ if (yych == 'L') goto yy53;
+ if (yych != 'l') goto yy14;
+yy53:
+ yych = *++YYCURSOR;
+ if (yych == 'E') goto yy54;
+ if (yych != 'e') goto yy14;
+yy54:
+ yych = *++YYCURSOR;
+ if (yych == 'R') goto yy55;
+ if (yych != 'r') goto yy14;
+yy55:
+ yych = *++YYCURSOR;
+ if (yych == 'Y') goto yy56;
+ if (yych != 'y') goto yy14;
+yy56:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych <= '<') {
+ if (yych <= 0x0000) goto yy14;
+ if (yych <= ';') goto yy56;
+ goto yy14;
+ } else {
+ if (yych != '>') goto yy56;
+ }
+ ++YYCURSOR;
+#line 94 "_expander.re"
+ {goto gallery;}
+#line 351 "_expander.cc"
+yy60:
+ yych = *++YYCURSOR;
+ if (yych != '-') goto yy14;
+ yych = *++YYCURSOR;
+ if (yych != '[') goto yy14;
+ yych = *++YYCURSOR;
+ if (yych != '^') goto yy14;
+ yych = *++YYCURSOR;
+ if (yych >= 0x0001) goto yy14;
+ yych = *++YYCURSOR;
+ if (yych != '<') goto yy14;
+ yych = *++YYCURSOR;
+ if (yych != '>') goto yy14;
+ yych = *++YYCURSOR;
+ if (yych != ']') goto yy14;
+ yych = *++YYCURSOR;
+ if (yych != '*') goto yy14;
+ yych = *++YYCURSOR;
+ if (yych != '-') goto yy14;
+ yych = *++YYCURSOR;
+ if (yych != '-') goto yy14;
+ yych = *++YYCURSOR;
+ if (yych != '>') goto yy14;
+ ++YYCURSOR;
+#line 96 "_expander.re"
+ {RET(5);}
+#line 378 "_expander.cc"
+yy73:
+ ++YYCURSOR;
+#line 87 "_expander.re"
+ {RET(3);}
+#line 383 "_expander.cc"
+yy75:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych == '}') goto yy75;
+#line 86 "_expander.re"
+ {RET(2);}
+#line 390 "_expander.cc"
+yy78:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych == '{') goto yy78;
+#line 85 "_expander.re"
+ {RET(1);}
+#line 397 "_expander.cc"
+}
+#line 101 "_expander.re"
+
+
+
+
+noinclude:
+
+#line 406 "_expander.cc"
+{
+ YYCTYPE yych;
+ yych = *YYCURSOR;
+ if (yych <= 0x0000) goto yy86;
+ if (yych != '<') goto yy85;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if (yych == '/') goto yy88;
+yy84:
+#line 108 "_expander.re"
+ {goto noinclude;}
+#line 417 "_expander.cc"
+yy85:
+ yych = *++YYCURSOR;
+ goto yy84;
+yy86:
+ ++YYCURSOR;
+#line 109 "_expander.re"
+ {cursor=start+11; RET(5);}
+#line 425 "_expander.cc"
+yy88:
+ yych = *++YYCURSOR;
+ if (yych == 'N') goto yy90;
+ if (yych == 'n') goto yy90;
+yy89:
+ YYCURSOR = YYMARKER;
+ goto yy84;
+yy90:
+ yych = *++YYCURSOR;
+ if (yych == 'O') goto yy91;
+ if (yych != 'o') goto yy89;
+yy91:
+ yych = *++YYCURSOR;
+ if (yych == 'I') goto yy92;
+ if (yych != 'i') goto yy89;
+yy92:
+ yych = *++YYCURSOR;
+ if (yych == 'N') goto yy93;
+ if (yych != 'n') goto yy89;
+yy93:
+ yych = *++YYCURSOR;
+ if (yych == 'C') goto yy94;
+ if (yych != 'c') goto yy89;
+yy94:
+ yych = *++YYCURSOR;
+ if (yych == 'L') goto yy95;
+ if (yych != 'l') goto yy89;
+yy95:
+ yych = *++YYCURSOR;
+ if (yych == 'U') goto yy96;
+ if (yych != 'u') goto yy89;
+yy96:
+ yych = *++YYCURSOR;
+ if (yych == 'D') goto yy97;
+ if (yych != 'd') goto yy89;
+yy97:
+ yych = *++YYCURSOR;
+ if (yych == 'E') goto yy98;
+ if (yych != 'e') goto yy89;
+yy98:
+ yych = *++YYCURSOR;
+ if (yych != '>') goto yy89;
+ ++YYCURSOR;
+#line 107 "_expander.re"
+ {goto std;}
+#line 471 "_expander.cc"
+}
+#line 110 "_expander.re"
+
+
+nowiki:
+
+#line 478 "_expander.cc"
+{
+ YYCTYPE yych;
+ yych = *YYCURSOR;
+ if (yych <= 0x0000) goto yy106;
+ if (yych != '<') goto yy105;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if (yych == '/') goto yy108;
+yy104:
+#line 115 "_expander.re"
+ {goto nowiki;}
+#line 489 "_expander.cc"
+yy105:
+ yych = *++YYCURSOR;
+ goto yy104;
+yy106:
+ ++YYCURSOR;
+#line 116 "_expander.re"
+ {RET(0);}
+#line 497 "_expander.cc"
+yy108:
+ yych = *++YYCURSOR;
+ if (yych == 'N') goto yy110;
+ if (yych == 'n') goto yy110;
+yy109:
+ YYCURSOR = YYMARKER;
+ goto yy104;
+yy110:
+ yych = *++YYCURSOR;
+ if (yych == 'O') goto yy111;
+ if (yych != 'o') goto yy109;
+yy111:
+ yych = *++YYCURSOR;
+ if (yych == 'W') goto yy112;
+ if (yych != 'w') goto yy109;
+yy112:
+ yych = *++YYCURSOR;
+ if (yych == 'I') goto yy113;
+ if (yych != 'i') goto yy109;
+yy113:
+ yych = *++YYCURSOR;
+ if (yych == 'K') goto yy114;
+ if (yych != 'k') goto yy109;
+yy114:
+ yych = *++YYCURSOR;
+ if (yych == 'I') goto yy115;
+ if (yych != 'i') goto yy109;
+yy115:
+ yych = *++YYCURSOR;
+ if (yych != '>') goto yy109;
+ ++YYCURSOR;
+#line 114 "_expander.re"
+ {RET(5);}
+#line 531 "_expander.cc"
+}
+#line 117 "_expander.re"
+
+
+math:
+
+#line 538 "_expander.cc"
+{
+ YYCTYPE yych;
+ yych = *YYCURSOR;
+ if (yych <= 0x0000) goto yy123;
+ if (yych != '<') goto yy122;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if (yych == '/') goto yy125;
+yy121:
+#line 122 "_expander.re"
+ {goto math;}
+#line 549 "_expander.cc"
+yy122:
+ yych = *++YYCURSOR;
+ goto yy121;
+yy123:
+ ++YYCURSOR;
+#line 123 "_expander.re"
+ {RET(0);}
+#line 557 "_expander.cc"
+yy125:
+ yych = *++YYCURSOR;
+ if (yych == 'M') goto yy127;
+ if (yych == 'm') goto yy127;
+yy126:
+ YYCURSOR = YYMARKER;
+ goto yy121;
+yy127:
+ yych = *++YYCURSOR;
+ if (yych == 'A') goto yy128;
+ if (yych != 'a') goto yy126;
+yy128:
+ yych = *++YYCURSOR;
+ if (yych == 'T') goto yy129;
+ if (yych != 't') goto yy126;
+yy129:
+ yych = *++YYCURSOR;
+ if (yych == 'H') goto yy130;
+ if (yych != 'h') goto yy126;
+yy130:
+ yych = *++YYCURSOR;
+ if (yych != '>') goto yy126;
+ ++YYCURSOR;
+#line 121 "_expander.re"
+ {RET(5);}
+#line 583 "_expander.cc"
+}
+#line 124 "_expander.re"
+
+
+gallery:
+
+#line 590 "_expander.cc"
+{
+ YYCTYPE yych;
+ yych = *YYCURSOR;
+ if (yych <= 0x0000) goto yy138;
+ if (yych != '<') goto yy137;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if (yych == '/') goto yy140;
+yy136:
+#line 129 "_expander.re"
+ {goto gallery;}
+#line 601 "_expander.cc"
+yy137:
+ yych = *++YYCURSOR;
+ goto yy136;
+yy138:
+ ++YYCURSOR;
+#line 130 "_expander.re"
+ {RET(0);}
+#line 609 "_expander.cc"
+yy140:
+ yych = *++YYCURSOR;
+ if (yych == 'G') goto yy142;
+ if (yych == 'g') goto yy142;
+yy141:
+ YYCURSOR = YYMARKER;
+ goto yy136;
+yy142:
+ yych = *++YYCURSOR;
+ if (yych == 'A') goto yy143;
+ if (yych != 'a') goto yy141;
+yy143:
+ yych = *++YYCURSOR;
+ if (yych == 'L') goto yy144;
+ if (yych != 'l') goto yy141;
+yy144:
+ yych = *++YYCURSOR;
+ if (yych == 'L') goto yy145;
+ if (yych != 'l') goto yy141;
+yy145:
+ yych = *++YYCURSOR;
+ if (yych == 'E') goto yy146;
+ if (yych != 'e') goto yy141;
+yy146:
+ yych = *++YYCURSOR;
+ if (yych == 'R') goto yy147;
+ if (yych != 'r') goto yy141;
+yy147:
+ yych = *++YYCURSOR;
+ if (yych == 'Y') goto yy148;
+ if (yych != 'y') goto yy141;
+yy148:
+ yych = *++YYCURSOR;
+ if (yych != '>') goto yy141;
+ ++YYCURSOR;
+#line 128 "_expander.re"
+ {RET(5);}
+#line 647 "_expander.cc"
+}
+#line 131 "_expander.re"
+
+
+imagemap:
+
+#line 654 "_expander.cc"
+{
+ YYCTYPE yych;
+ yych = *YYCURSOR;
+ if (yych <= 0x0000) goto yy156;
+ if (yych != '<') goto yy155;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if (yych == '/') goto yy158;
+yy154:
+#line 136 "_expander.re"
+ {goto imagemap;}
+#line 665 "_expander.cc"
+yy155:
+ yych = *++YYCURSOR;
+ goto yy154;
+yy156:
+ ++YYCURSOR;
+#line 137 "_expander.re"
+ {RET(0);}
+#line 673 "_expander.cc"
+yy158:
+ yych = *++YYCURSOR;
+ if (yych == 'I') goto yy160;
+ if (yych == 'i') goto yy160;
+yy159:
+ YYCURSOR = YYMARKER;
+ goto yy154;
+yy160:
+ yych = *++YYCURSOR;
+ if (yych == 'M') goto yy161;
+ if (yych != 'm') goto yy159;
+yy161:
+ yych = *++YYCURSOR;
+ if (yych == 'A') goto yy162;
+ if (yych != 'a') goto yy159;
+yy162:
+ yych = *++YYCURSOR;
+ if (yych == 'G') goto yy163;
+ if (yych != 'g') goto yy159;
+yy163:
+ yych = *++YYCURSOR;
+ if (yych == 'E') goto yy164;
+ if (yych != 'e') goto yy159;
+yy164:
+ yych = *++YYCURSOR;
+ if (yych == 'M') goto yy165;
+ if (yych != 'm') goto yy159;
+yy165:
+ yych = *++YYCURSOR;
+ if (yych == 'A') goto yy166;
+ if (yych != 'a') goto yy159;
+yy166:
+ yych = *++YYCURSOR;
+ if (yych == 'P') goto yy167;
+ if (yych != 'p') goto yy159;
+yy167:
+ yych = *++YYCURSOR;
+ if (yych != '>') goto yy159;
+ ++YYCURSOR;
+#line 135 "_expander.re"
+ {RET(5);}
+#line 715 "_expander.cc"
+}
+#line 138 "_expander.re"
+
+
+pre:
+
+#line 722 "_expander.cc"
+{
+ YYCTYPE yych;
+ yych = *YYCURSOR;
+ if (yych <= 0x0000) goto yy175;
+ if (yych != '<') goto yy174;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if (yych == '/') goto yy177;
+yy173:
+#line 143 "_expander.re"
+ {goto pre;}
+#line 733 "_expander.cc"
+yy174:
+ yych = *++YYCURSOR;
+ goto yy173;
+yy175:
+ ++YYCURSOR;
+#line 144 "_expander.re"
+ {RET(0);}
+#line 741 "_expander.cc"
+yy177:
+ yych = *++YYCURSOR;
+ if (yych == 'P') goto yy179;
+ if (yych == 'p') goto yy179;
+yy178:
+ YYCURSOR = YYMARKER;
+ goto yy173;
+yy179:
+ yych = *++YYCURSOR;
+ if (yych == 'R') goto yy180;
+ if (yych != 'r') goto yy178;
+yy180:
+ yych = *++YYCURSOR;
+ if (yych == 'E') goto yy181;
+ if (yych != 'e') goto yy178;
+yy181:
+ yych = *++YYCURSOR;
+ if (yych != '>') goto yy178;
+ ++YYCURSOR;
+#line 142 "_expander.re"
+ {RET(5);}
+#line 763 "_expander.cc"
+}
+#line 145 "_expander.re"
+
+
+}
+
+
+PyObject *py_scan(PyObject *self, PyObject *args)
+{
+ PyObject *arg1;
+ if (!PyArg_ParseTuple(args, "O:_expander.scan", &arg1)) {
+ return 0;
+ }
+ PyUnicodeObject *unistr = (PyUnicodeObject*)PyUnicode_FromObject(arg1);
+ if (unistr == NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ "parameter cannot be converted to unicode in _expander.scan");
+ return 0;
+ }
+
+ Py_UNICODE *start = unistr->str;
+ Py_UNICODE *end = start+unistr->length;
+
+
+ MacroScanner scanner (start, end);
+ Py_BEGIN_ALLOW_THREADS
+ while (scanner.scan()) {
+ }
+ Py_END_ALLOW_THREADS
+ Py_XDECREF(unistr);
+
+ // return PyList_New(0); // uncomment to see timings for scanning
+
+ int size = scanner.tokens.size();
+ PyObject *result = PyList_New(size);
+ if (!result) {
+ return 0;
+ }
+
+ for (int i=0; i<size; i++) {
+ Token t = scanner.tokens[i];
+ PyList_SET_ITEM(result, i, Py_BuildValue("iii", t.type, t.start, t.len));
+ }
+
+ return result;
+}
+
+
+
+static PyMethodDef module_functions[] = {
+ {"scan", (PyCFunction)py_scan, METH_VARARGS, "scan(text)"},
+ {0, 0},
+};
+
+
+
+extern "C" {
+ DL_EXPORT(void) init_expander();
+}
+
+DL_EXPORT(void) init_expander()
+{
+ /*PyObject *m =*/ Py_InitModule("_expander", module_functions);
+}
diff --git a/mwlib/_expander.re b/mwlib/_expander.re
new file mode 100644
index 0000000..7abb2ac
--- /dev/null
+++ b/mwlib/_expander.re
@@ -0,0 +1,206 @@
+// -*- mode: c++ -*-
+// Copyright (c) 2007-2008 PediaPress GmbH
+// See README.txt for additional licensing information.
+
+#include <Python.h>
+
+#include <iostream>
+#include <assert.h>
+#include <vector>
+
+using namespace std;
+
+#define RET(x) {found(x); return x;}
+
+struct Token
+{
+ int type;
+ int start;
+ int len;
+};
+
+
+class MacroScanner
+{
+public:
+
+ MacroScanner(Py_UNICODE *_start, Py_UNICODE *_end) {
+ source = start = _start;
+ end = _end;
+ cursor = start;
+ }
+
+ int found(int val) {
+ if (val==5 && tokens.size()) {
+ Token &previous_token (tokens[tokens.size()-1]);
+ if (previous_token.type==val) {
+ previous_token.len += cursor-start;
+ return tokens.size()-1;
+ }
+ }
+ Token t;
+ t.type = val;
+ t.start = (start-source);
+ t.len = cursor-start;
+ tokens.push_back(t);
+ return tokens.size()-1;
+ }
+
+ inline int scan();
+
+ Py_UNICODE *source;
+
+ Py_UNICODE *start;
+ Py_UNICODE *cursor;
+ Py_UNICODE *end;
+ vector<Token> tokens;
+};
+
+
+int MacroScanner::scan()
+{
+
+std:
+
+ start=cursor;
+
+ Py_UNICODE *marker=cursor;
+
+ Py_UNICODE *save_cursor = cursor;
+
+
+#define YYCTYPE Py_UNICODE
+#define YYCURSOR cursor
+#define YYMARKER marker
+#define YYLIMIT (end)
+// #define YYFILL(n) return 0;
+
+/*!re2c
+re2c:yyfill:enable = 0 ;
+*/
+
+
+
+/*!re2c
+ "{"{2,} {RET(1);}
+ "}"{2,} {RET(2);}
+ "[[" | "]]" {RET(3);}
+ "|" {RET(6);}
+
+ '<noinclude>' {goto noinclude;}
+ '<nowiki>' {goto nowiki;}
+ '<imagemap' [^<>\000]* '>' {goto imagemap;}
+ '<math' [^<>\000]* '>' {goto math;}
+ '<gallery' [^<>\000]* '>' {goto gallery;}
+
+ "<!--[^\000<>]*-->" {RET(5);}
+
+ "\000" {RET(0);}
+ [^\000] {RET(5);}
+
+ */
+
+
+
+noinclude:
+/*!re2c
+ '</noinclude>' {goto std;}
+ [^\000] {goto noinclude;}
+ "\000" {cursor=start+11; RET(5);}
+ */
+
+nowiki:
+/*!re2c
+ '</nowiki>' {RET(5);}
+ [^\000] {goto nowiki;}
+ "\000" {RET(0);}
+ */
+
+math:
+/*!re2c
+ '</math>' {RET(5);}
+ [^\000] {goto math;}
+ "\000" {RET(0);}
+ */
+
+gallery:
+/*!re2c
+ '</gallery>' {RET(5);}
+ [^\000] {goto gallery;}
+ "\000" {RET(0);}
+ */
+
+imagemap:
+/*!re2c
+ '</imagemap>' {RET(5);}
+ [^\000] {goto imagemap;}
+ "\000" {RET(0);}
+ */
+
+pre:
+/*!re2c
+ '</pre>' {RET(5);}
+ [^\000] {goto pre;}
+ "\000" {RET(0);}
+ */
+
+}
+
+
+PyObject *py_scan(PyObject *self, PyObject *args)
+{
+ PyObject *arg1;
+ if (!PyArg_ParseTuple(args, "O:_expander.scan", &arg1)) {
+ return 0;
+ }
+ PyUnicodeObject *unistr = (PyUnicodeObject*)PyUnicode_FromObject(arg1);
+ if (unistr == NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ "parameter cannot be converted to unicode in _expander.scan");
+ return 0;
+ }
+
+ Py_UNICODE *start = unistr->str;
+ Py_UNICODE *end = start+unistr->length;
+
+
+ MacroScanner scanner (start, end);
+ Py_BEGIN_ALLOW_THREADS
+ while (scanner.scan()) {
+ }
+ Py_END_ALLOW_THREADS
+ Py_XDECREF(unistr);
+
+ // return PyList_New(0); // uncomment to see timings for scanning
+
+ int size = scanner.tokens.size();
+ PyObject *result = PyList_New(size);
+ if (!result) {
+ return 0;
+ }
+
+ for (int i=0; i<size; i++) {
+ Token t = scanner.tokens[i];
+ PyList_SET_ITEM(result, i, Py_BuildValue("iii", t.type, t.start, t.len));
+ }
+
+ return result;
+}
+
+
+
+static PyMethodDef module_functions[] = {
+ {"scan", (PyCFunction)py_scan, METH_VARARGS, "scan(text)"},
+ {0, 0},
+};
+
+
+
+extern "C" {
+ DL_EXPORT(void) init_expander();
+}
+
+DL_EXPORT(void) init_expander()
+{
+ /*PyObject *m =*/ Py_InitModule("_expander", module_functions);
+}
diff --git a/mwlib/_expander.so b/mwlib/_expander.so
new file mode 100755
index 0000000..be09917
--- /dev/null
+++ b/mwlib/_expander.so
Binary files differ
diff --git a/mwlib/_mwscan.cc b/mwlib/_mwscan.cc
new file mode 100644
index 0000000..f673880
--- /dev/null
+++ b/mwlib/_mwscan.cc
@@ -0,0 +1,1699 @@
+/* Generated by re2c 0.13.4 */
+#line 1 "_mwscan.re"
+// -*- mode: c++ -*-
+// Copyright (c) 2007-2008 PediaPress GmbH
+// See README.txt for additional licensing information.
+
+#include <Python.h>
+
+#include <iostream>
+#include <assert.h>
+#include <vector>
+using namespace std;
+
+#define RET(x) {found(x); return x;}
+
+typedef enum {
+ t_end,
+ t_text,
+ t_entity,
+ t_special,
+ t_magicword,
+ t_comment,
+ t_2box_open, // [[
+ t_2box_close, // ]]
+ t_http_url,
+ t_break,
+ t_begin_table,
+ t_end_table,
+ t_html_tag,
+ t_style,
+ t_pre,
+ t_section,
+ t_section_end,
+ t_item,
+ t_colon,
+ t_semicolon,
+ t_hrule,
+ t_newline,
+ t_column,
+ t_row,
+ t_tablecaption,
+ t_urllink,
+} mwtok;
+
+struct Token
+{
+ int type;
+ int start;
+ int len;
+};
+
+class Scanner
+{
+public:
+
+ Scanner(Py_UNICODE *_start, Py_UNICODE *_end) {
+ source = start = _start;
+ end = _end;
+ cursor = start;
+ line_startswith_section = -1;
+ tablemode=0;
+ }
+
+ int found(mwtok val) {
+ if (val==t_text && tokens.size()) {
+ Token &previous_token (tokens[tokens.size()-1]);
+ if (previous_token.type==val) {
+ previous_token.len += cursor-start;
+ return tokens.size()-1;
+ }
+ }
+ Token t;
+ t.type = val;
+ t.start = (start-source);
+ t.len = cursor-start;
+ tokens.push_back(t);
+ return tokens.size()-1;
+ }
+
+ bool bol() const {
+ return (start==source) || (start[-1]=='\n');
+ }
+
+ bool eol() const {
+ return *cursor=='\n' || *cursor==0;
+ }
+
+ void newline() {
+ if (line_startswith_section>=0) {
+ tokens[line_startswith_section].type = t_text;
+ }
+ line_startswith_section = -1;
+ }
+
+ inline int scan();
+
+ Py_UNICODE *source;
+
+ Py_UNICODE *start;
+ Py_UNICODE *cursor;
+ Py_UNICODE *end;
+ vector<Token> tokens;
+
+ int line_startswith_section;
+ int tablemode;
+};
+
+
+int Scanner::scan()
+{
+ start=cursor;
+
+ Py_UNICODE *marker=cursor;
+
+ Py_UNICODE *save_cursor = cursor;
+
+
+#define YYCTYPE Py_UNICODE
+#define YYCURSOR cursor
+#define YYMARKER marker
+#define YYLIMIT (end)
+// #define YYFILL(n) return 0;
+
+#line 124 "_mwscan.re"
+
+
+/*
+ the re2c manpage says:
+ "The user must arrange for a sentinel token to appear at the end of input"
+ \000 is our sentinel token.
+*/
+
+#line 157 "_mwscan.re"
+
+ if (!bol()) {
+ goto not_bol;
+ }
+
+#line 140 "_mwscan.cc"
+{
+ YYCTYPE yych;
+ unsigned int yyaccept = 0;
+
+ yych = *YYCURSOR;
+ if (yych <= '-') {
+ if (yych <= '"') {
+ if (yych <= 0x001F) goto yy18;
+ if (yych <= ' ') goto yy2;
+ if (yych <= '!') goto yy8;
+ goto yy18;
+ } else {
+ if (yych <= ')') {
+ if (yych <= '#') goto yy13;
+ goto yy18;
+ } else {
+ if (yych <= '*') goto yy13;
+ if (yych <= ',') goto yy18;
+ goto yy17;
+ }
+ }
+ } else {
+ if (yych <= '<') {
+ if (yych <= '9') goto yy18;
+ if (yych <= ':') goto yy11;
+ if (yych <= ';') goto yy15;
+ goto yy18;
+ } else {
+ if (yych <= 'z') {
+ if (yych <= '=') goto yy9;
+ goto yy18;
+ } else {
+ if (yych <= '{') goto yy4;
+ if (yych <= '|') goto yy6;
+ goto yy18;
+ }
+ }
+ }
+yy2:
+ yyaccept = 0;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if (yych <= 0x001F) goto yy3;
+ if (yych <= '!') goto yy47;
+ if (yych <= 'z') goto yy3;
+ if (yych <= '|') goto yy47;
+yy3:
+#line 199 "_mwscan.re"
+ {RET(t_pre);}
+#line 189 "_mwscan.cc"
+yy4:
+ ++YYCURSOR;
+ if ((yych = *YYCURSOR) == '|') goto yy43;
+yy5:
+#line 209 "_mwscan.re"
+ {goto not_bol;}
+#line 196 "_mwscan.cc"
+yy6:
+ ++YYCURSOR;
+ if ((yych = *YYCURSOR) <= ',') {
+ if (yych == '+') goto yy35;
+ } else {
+ if (yych <= '-') goto yy38;
+ if (yych == '}') goto yy41;
+ }
+yy7:
+#line 177 "_mwscan.re"
+ {
+ if (tablemode)
+ RET(t_column);
+
+ if (*start==' ') {
+ cursor = start+1;
+ RET(t_pre);
+ }
+ RET(t_text);
+ }
+#line 217 "_mwscan.cc"
+yy8:
+ yych = *++YYCURSOR;
+ goto yy7;
+yy9:
+ ++YYCURSOR;
+ if ((yych = *YYCURSOR) == '=') goto yy33;
+ goto yy32;
+yy10:
+#line 200 "_mwscan.re"
+ {
+ line_startswith_section = found(t_section);
+ return t_section;
+ }
+#line 231 "_mwscan.cc"
+yy11:
+ ++YYCURSOR;
+ if ((yych = *YYCURSOR) <= ')') {
+ if (yych == '#') goto yy27;
+ } else {
+ if (yych <= '*') goto yy27;
+ if (yych == ':') goto yy29;
+ }
+yy12:
+#line 205 "_mwscan.re"
+ {RET(t_colon);}
+#line 243 "_mwscan.cc"
+yy13:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ goto yy28;
+yy14:
+#line 204 "_mwscan.re"
+ {RET(t_item);}
+#line 251 "_mwscan.cc"
+yy15:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ goto yy26;
+yy16:
+#line 206 "_mwscan.re"
+ {RET(t_semicolon);}
+#line 259 "_mwscan.cc"
+yy17:
+ yyaccept = 1;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if (yych == '-') goto yy19;
+ goto yy5;
+yy18:
+ yych = *++YYCURSOR;
+ goto yy5;
+yy19:
+ yych = *++YYCURSOR;
+ if (yych == '-') goto yy21;
+yy20:
+ YYCURSOR = YYMARKER;
+ if (yyaccept <= 0) {
+ goto yy3;
+ } else {
+ goto yy5;
+ }
+yy21:
+ yych = *++YYCURSOR;
+ if (yych != '-') goto yy20;
+yy22:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych == '-') goto yy22;
+#line 207 "_mwscan.re"
+ {RET(t_hrule);}
+#line 287 "_mwscan.cc"
+yy25:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+yy26:
+ if (yych == ';') goto yy25;
+ goto yy16;
+yy27:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+yy28:
+ if (yych == '#') goto yy27;
+ if (yych == '*') goto yy27;
+ goto yy14;
+yy29:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych <= ')') {
+ if (yych == '#') goto yy27;
+ goto yy12;
+ } else {
+ if (yych <= '*') goto yy27;
+ if (yych == ':') goto yy29;
+ goto yy12;
+ }
+yy31:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+yy32:
+ if (yych == '\t') goto yy31;
+ if (yych == ' ') goto yy31;
+ goto yy10;
+yy33:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych <= 0x001F) {
+ if (yych == '\t') goto yy31;
+ goto yy10;
+ } else {
+ if (yych <= ' ') goto yy31;
+ if (yych == '=') goto yy33;
+ goto yy10;
+ }
+yy35:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych == '+') goto yy35;
+#line 189 "_mwscan.re"
+ {
+ if (tablemode)
+ RET(t_tablecaption);
+ if (*start==' ') {
+ cursor = start+1;
+ RET(t_pre);
+ }
+ RET(t_text);
+ }
+#line 344 "_mwscan.cc"
+yy38:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych == '-') goto yy38;
+#line 166 "_mwscan.re"
+ {
+ if (tablemode)
+ RET(t_row);
+ if (*start==' ') {
+ cursor = start+1;
+ RET(t_pre);
+ }
+ RET(t_text);
+ }
+#line 359 "_mwscan.cc"
+yy41:
+ ++YYCURSOR;
+#line 163 "_mwscan.re"
+ {--tablemode; RET(t_end_table);}
+#line 364 "_mwscan.cc"
+yy43:
+ ++YYCURSOR;
+#line 162 "_mwscan.re"
+ {++tablemode; RET(t_begin_table);}
+#line 369 "_mwscan.cc"
+yy45:
+ yych = *++YYCURSOR;
+ if (yych <= ',') {
+ if (yych == '+') goto yy35;
+ goto yy7;
+ } else {
+ if (yych <= '-') goto yy38;
+ if (yych == '}') goto yy41;
+ goto yy7;
+ }
+yy46:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+yy47:
+ if (yych <= '!') {
+ if (yych <= 0x001F) goto yy20;
+ if (yych <= ' ') goto yy46;
+ } else {
+ if (yych <= 'z') goto yy20;
+ if (yych <= '{') goto yy49;
+ if (yych <= '|') goto yy45;
+ goto yy20;
+ }
+ yych = *++YYCURSOR;
+ goto yy7;
+yy49:
+ ++YYCURSOR;
+ if ((yych = *YYCURSOR) == '|') goto yy43;
+ goto yy20;
+}
+#line 210 "_mwscan.re"
+
+
+
+not_bol:
+ cursor = save_cursor;
+ marker = cursor;
+
+
+#line 409 "_mwscan.cc"
+{
+ YYCTYPE yych;
+ unsigned int yyaccept = 0;
+ yych = *YYCURSOR;
+ if (yych <= 'Z') {
+ if (yych <= '\'') {
+ if (yych <= ' ') {
+ if (yych <= 0x0000) goto yy72;
+ if (yych == '\n') goto yy63;
+ goto yy74;
+ } else {
+ if (yych <= '!') goto yy66;
+ if (yych <= '%') goto yy74;
+ if (yych <= '&') goto yy71;
+ goto yy69;
+ }
+ } else {
+ if (yych <= ';') {
+ if (yych <= '/') goto yy74;
+ if (yych <= '9') goto yy59;
+ if (yych <= ':') goto yy68;
+ goto yy74;
+ } else {
+ if (yych <= '<') goto yy70;
+ if (yych <= '=') goto yy61;
+ if (yych <= '@') goto yy74;
+ goto yy59;
+ }
+ }
+ } else {
+ if (yych <= 'f') {
+ if (yych <= '^') {
+ if (yych <= '[') goto yy52;
+ if (yych == ']') goto yy60;
+ goto yy74;
+ } else {
+ if (yych <= '_') goto yy58;
+ if (yych <= '`') goto yy74;
+ if (yych <= 'e') goto yy59;
+ goto yy56;
+ }
+ } else {
+ if (yych <= 'm') {
+ if (yych == 'h') goto yy57;
+ if (yych <= 'l') goto yy59;
+ goto yy54;
+ } else {
+ if (yych <= 'z') goto yy59;
+ if (yych == '|') goto yy65;
+ goto yy74;
+ }
+ }
+ }
+yy52:
+ yyaccept = 0;
+ yych = *(YYMARKER = ++YYCURSOR);
+ switch (yych) {
+ case '[': goto yy249;
+ case 'f': goto yy252;
+ case 'h': goto yy251;
+ case 'm': goto yy253;
+ default: goto yy53;
+ }
+yy53:
+#line 256 "_mwscan.re"
+ {RET(t_special);}
+#line 476 "_mwscan.cc"
+yy54:
+ ++YYCURSOR;
+ if ((yych = *YYCURSOR) == 'a') goto yy237;
+ goto yy121;
+yy55:
+#line 225 "_mwscan.re"
+ {RET(t_text);}
+#line 484 "_mwscan.cc"
+yy56:
+ yych = *++YYCURSOR;
+ if (yych == 't') goto yy229;
+ goto yy121;
+yy57:
+ yych = *++YYCURSOR;
+ if (yych == 't') goto yy219;
+ goto yy121;
+yy58:
+ yych = *++YYCURSOR;
+ if (yych == '_') goto yy122;
+ goto yy121;
+yy59:
+ yych = *++YYCURSOR;
+ goto yy121;
+yy60:
+ yych = *++YYCURSOR;
+ if (yych == ']') goto yy118;
+ goto yy53;
+yy61:
+ ++YYCURSOR;
+ if ((yych = *YYCURSOR) == '=') goto yy116;
+ goto yy115;
+yy62:
+#line 228 "_mwscan.re"
+ {
+ if (eol()) {
+ if (line_startswith_section>=0) {
+ line_startswith_section=-1;
+ RET(t_section_end);
+ } else {
+ RET(t_text);
+ }
+ } else {
+ RET(t_text);
+ }
+ }
+#line 522 "_mwscan.cc"
+yy63:
+ ++YYCURSOR;
+ if ((yych = *YYCURSOR) == '\n') goto yy111;
+#line 241 "_mwscan.re"
+ {newline(); RET(t_newline);}
+#line 528 "_mwscan.cc"
+yy65:
+ yych = *++YYCURSOR;
+ if (yych <= '*') {
+ if (yych == '!') goto yy107;
+ goto yy53;
+ } else {
+ if (yych <= '+') goto yy109;
+ if (yych == '|') goto yy107;
+ goto yy53;
+ }
+yy66:
+ ++YYCURSOR;
+ if ((yych = *YYCURSOR) == '!') goto yy107;
+yy67:
+#line 266 "_mwscan.re"
+ {RET(t_text);}
+#line 545 "_mwscan.cc"
+yy68:
+ yych = *++YYCURSOR;
+ goto yy53;
+yy69:
+ yych = *++YYCURSOR;
+ if (yych == '\'') goto yy102;
+ goto yy67;
+yy70:
+ yyaccept = 1;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if (yych <= '/') {
+ if (yych == '!') goto yy86;
+ if (yych <= '.') goto yy67;
+ goto yy87;
+ } else {
+ if (yych <= 'Z') {
+ if (yych <= '@') goto yy67;
+ goto yy88;
+ } else {
+ if (yych <= '`') goto yy67;
+ if (yych <= 'z') goto yy88;
+ goto yy67;
+ }
+ }
+yy71:
+ yyaccept = 1;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if (yych <= '9') {
+ if (yych == '#') goto yy75;
+ if (yych <= '/') goto yy67;
+ goto yy77;
+ } else {
+ if (yych <= 'Z') {
+ if (yych <= '@') goto yy67;
+ goto yy77;
+ } else {
+ if (yych <= '`') goto yy67;
+ if (yych <= 'z') goto yy77;
+ goto yy67;
+ }
+ }
+yy72:
+ ++YYCURSOR;
+#line 265 "_mwscan.re"
+ {newline(); return t_end;}
+#line 591 "_mwscan.cc"
+yy74:
+ yych = *++YYCURSOR;
+ goto yy67;
+yy75:
+ yych = *++YYCURSOR;
+ if (yych <= 'W') {
+ if (yych <= '/') goto yy76;
+ if (yych <= '9') goto yy82;
+ } else {
+ if (yych <= 'X') goto yy81;
+ if (yych == 'x') goto yy81;
+ }
+yy76:
+ YYCURSOR = YYMARKER;
+ if (yyaccept <= 1) {
+ if (yyaccept <= 0) {
+ goto yy53;
+ } else {
+ goto yy67;
+ }
+ } else {
+ if (yyaccept <= 2) {
+ goto yy103;
+ } else {
+ goto yy55;
+ }
+ }
+yy77:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych <= ';') {
+ if (yych <= '/') goto yy76;
+ if (yych <= '9') goto yy77;
+ if (yych <= ':') goto yy76;
+ } else {
+ if (yych <= 'Z') {
+ if (yych <= '@') goto yy76;
+ goto yy77;
+ } else {
+ if (yych <= '`') goto yy76;
+ if (yych <= 'z') goto yy77;
+ goto yy76;
+ }
+ }
+yy79:
+ ++YYCURSOR;
+#line 263 "_mwscan.re"
+ {RET(t_entity);}
+#line 640 "_mwscan.cc"
+yy81:
+ yych = *++YYCURSOR;
+ if (yych == ';') goto yy76;
+ goto yy85;
+yy82:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych <= '/') goto yy76;
+ if (yych <= '9') goto yy82;
+ if (yych == ';') goto yy79;
+ goto yy76;
+yy84:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+yy85:
+ if (yych <= ';') {
+ if (yych <= '/') goto yy76;
+ if (yych <= '9') goto yy84;
+ if (yych <= ':') goto yy76;
+ goto yy79;
+ } else {
+ if (yych <= 'F') {
+ if (yych <= '@') goto yy76;
+ goto yy84;
+ } else {
+ if (yych <= '`') goto yy76;
+ if (yych <= 'f') goto yy84;
+ goto yy76;
+ }
+ }
+yy86:
+ yych = *++YYCURSOR;
+ if (yych == '-') goto yy94;
+ goto yy76;
+yy87:
+ yych = *++YYCURSOR;
+ if (yych <= '@') goto yy76;
+ if (yych <= 'Z') goto yy88;
+ if (yych <= '`') goto yy76;
+ if (yych >= '{') goto yy76;
+yy88:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych <= '>') {
+ if (yych <= ';') {
+ if (yych <= 0x0000) goto yy76;
+ } else {
+ if (yych <= '<') goto yy76;
+ if (yych >= '>') goto yy92;
+ }
+ } else {
+ if (yych <= 'Z') {
+ if (yych >= 'A') goto yy88;
+ } else {
+ if (yych <= '`') goto yy90;
+ if (yych <= 'z') goto yy88;
+ }
+ }
+yy90:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych <= '<') {
+ if (yych <= 0x0000) goto yy76;
+ if (yych <= ';') goto yy90;
+ goto yy76;
+ } else {
+ if (yych != '>') goto yy90;
+ }
+yy92:
+ ++YYCURSOR;
+#line 259 "_mwscan.re"
+ {RET(t_html_tag);}
+#line 713 "_mwscan.cc"
+yy94:
+ yych = *++YYCURSOR;
+ if (yych != '-') goto yy76;
+yy95:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych <= ';') {
+ if (yych <= 0x0000) goto yy76;
+ if (yych != '-') goto yy95;
+ } else {
+ if (yych == '=') goto yy95;
+ if (yych <= '>') goto yy76;
+ goto yy95;
+ }
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych <= ';') {
+ if (yych <= 0x0000) goto yy76;
+ if (yych != '-') goto yy95;
+ } else {
+ if (yych == '=') goto yy95;
+ if (yych <= '>') goto yy76;
+ goto yy95;
+ }
+yy98:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych <= ';') {
+ if (yych <= 0x0000) goto yy76;
+ if (yych == '-') goto yy98;
+ goto yy95;
+ } else {
+ if (yych <= '<') goto yy76;
+ if (yych != '>') goto yy95;
+ }
+ ++YYCURSOR;
+#line 262 "_mwscan.re"
+ {RET(t_comment);}
+#line 752 "_mwscan.cc"
+yy102:
+ ++YYCURSOR;
+ if ((yych = *YYCURSOR) == '\'') goto yy104;
+yy103:
+#line 257 "_mwscan.re"
+ {RET(t_style);}
+#line 759 "_mwscan.cc"
+yy104:
+ yyaccept = 2;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if (yych != '\'') goto yy103;
+ yych = *++YYCURSOR;
+ if (yych != '\'') goto yy76;
+ yych = *++YYCURSOR;
+ goto yy103;
+yy107:
+ ++YYCURSOR;
+#line 243 "_mwscan.re"
+ {
+ if (tablemode)
+ RET(t_column);
+ cursor = start+1;
+ RET(t_special);
+ }
+#line 777 "_mwscan.cc"
+yy109:
+ ++YYCURSOR;
+#line 250 "_mwscan.re"
+ {
+ if (tablemode)
+ RET(t_tablecaption);
+ cursor = start+1;
+ RET(t_special);
+ }
+#line 787 "_mwscan.cc"
+yy111:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych == '\n') goto yy111;
+#line 240 "_mwscan.re"
+ {newline(); RET(t_break);}
+#line 794 "_mwscan.cc"
+yy114:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+yy115:
+ if (yych == '\t') goto yy114;
+ if (yych == ' ') goto yy114;
+ goto yy62;
+yy116:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych <= 0x001F) {
+ if (yych == '\t') goto yy114;
+ goto yy62;
+ } else {
+ if (yych <= ' ') goto yy114;
+ if (yych == '=') goto yy116;
+ goto yy62;
+ }
+yy118:
+ ++YYCURSOR;
+#line 227 "_mwscan.re"
+ {RET(t_2box_close);}
+#line 817 "_mwscan.cc"
+yy120:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+yy121:
+ if (yych <= 'Z') {
+ if (yych <= '/') goto yy55;
+ if (yych <= '9') goto yy120;
+ if (yych <= '@') goto yy55;
+ goto yy120;
+ } else {
+ if (yych <= '_') {
+ if (yych <= '^') goto yy55;
+ goto yy120;
+ } else {
+ if (yych <= '`') goto yy55;
+ if (yych <= 'z') goto yy120;
+ goto yy55;
+ }
+ }
+yy122:
+ yych = *++YYCURSOR;
+ switch (yych) {
+ case 'E': goto yy126;
+ case 'F': goto yy125;
+ case 'N': goto yy124;
+ case 'S': goto yy127;
+ case 'T': goto yy123;
+ default: goto yy121;
+ }
+yy123:
+ yych = *++YYCURSOR;
+ if (yych == 'O') goto yy216;
+ goto yy121;
+yy124:
+ yych = *++YYCURSOR;
+ if (yych == 'E') goto yy146;
+ if (yych == 'O') goto yy147;
+ goto yy121;
+yy125:
+ yych = *++YYCURSOR;
+ if (yych == 'O') goto yy138;
+ goto yy121;
+yy126:
+ yych = *++YYCURSOR;
+ if (yych == 'N') goto yy135;
+ goto yy121;
+yy127:
+ yych = *++YYCURSOR;
+ if (yych != 'T') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'A') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'R') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'T') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != '_') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != '_') goto yy121;
+yy133:
+ ++YYCURSOR;
+ if ((yych = *YYCURSOR) <= 'Z') {
+ if (yych <= '/') goto yy134;
+ if (yych <= '9') goto yy120;
+ if (yych >= 'A') goto yy120;
+ } else {
+ if (yych <= '_') {
+ if (yych >= '_') goto yy120;
+ } else {
+ if (yych <= '`') goto yy134;
+ if (yych <= 'z') goto yy120;
+ }
+ }
+yy134:
+#line 224 "_mwscan.re"
+ {RET(t_magicword);}
+#line 894 "_mwscan.cc"
+yy135:
+ yych = *++YYCURSOR;
+ if (yych != 'D') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != '_') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych == '_') goto yy133;
+ goto yy121;
+yy138:
+ yych = *++YYCURSOR;
+ if (yych != 'R') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'C') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'E') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'T') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'O') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'C') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != '_') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych == '_') goto yy133;
+ goto yy121;
+yy146:
+ yych = *++YYCURSOR;
+ if (yych == 'W') goto yy203;
+ goto yy121;
+yy147:
+ yych = *++YYCURSOR;
+ switch (yych) {
+ case 'C': goto yy150;
+ case 'E': goto yy149;
+ case 'G': goto yy151;
+ case 'T': goto yy148;
+ default: goto yy121;
+ }
+yy148:
+ yych = *++YYCURSOR;
+ if (yych <= 'H') {
+ if (yych == 'C') goto yy186;
+ goto yy121;
+ } else {
+ if (yych <= 'I') goto yy187;
+ if (yych == 'O') goto yy188;
+ goto yy121;
+ }
+yy149:
+ yych = *++YYCURSOR;
+ if (yych == 'D') goto yy175;
+ goto yy121;
+yy150:
+ yych = *++YYCURSOR;
+ if (yych == 'C') goto yy159;
+ if (yych == 'O') goto yy160;
+ goto yy121;
+yy151:
+ yych = *++YYCURSOR;
+ if (yych != 'A') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'L') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'L') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'E') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'R') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'Y') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != '_') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych == '_') goto yy133;
+ goto yy121;
+yy159:
+ yych = *++YYCURSOR;
+ if (yych == '_') goto yy174;
+ goto yy121;
+yy160:
+ yych = *++YYCURSOR;
+ if (yych != 'N') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'T') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'E') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'N') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'T') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'C') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'O') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'N') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'V') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'E') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'R') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'T') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != '_') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych == '_') goto yy133;
+ goto yy121;
+yy174:
+ yych = *++YYCURSOR;
+ if (yych == '_') goto yy133;
+ goto yy121;
+yy175:
+ yych = *++YYCURSOR;
+ if (yych != 'I') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'T') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'S') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'E') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'C') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'T') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'I') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'O') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'N') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != '_') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych == '_') goto yy133;
+ goto yy121;
+yy186:
+ yych = *++YYCURSOR;
+ if (yych == '_') goto yy202;
+ goto yy121;
+yy187:
+ yych = *++YYCURSOR;
+ if (yych == 'T') goto yy191;
+ goto yy121;
+yy188:
+ yych = *++YYCURSOR;
+ if (yych != 'C') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != '_') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych == '_') goto yy133;
+ goto yy121;
+yy191:
+ yych = *++YYCURSOR;
+ if (yych != 'L') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'E') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'C') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'O') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'N') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'V') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'E') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'R') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'T') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != '_') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych == '_') goto yy133;
+ goto yy121;
+yy202:
+ yych = *++YYCURSOR;
+ if (yych == '_') goto yy133;
+ goto yy121;
+yy203:
+ yych = *++YYCURSOR;
+ if (yych != 'S') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'E') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'C') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'T') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'I') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'O') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'N') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'L') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'I') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'N') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'K') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != '_') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych == '_') goto yy133;
+ goto yy121;
+yy216:
+ yych = *++YYCURSOR;
+ if (yych != 'C') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != '_') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych == '_') goto yy133;
+ goto yy121;
+yy219:
+ yych = *++YYCURSOR;
+ if (yych != 't') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'p') goto yy121;
+ yyaccept = 3;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if (yych == ':') goto yy223;
+ if (yych != 's') goto yy121;
+ yyaccept = 3;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if (yych != ':') goto yy121;
+yy223:
+ yych = *++YYCURSOR;
+ if (yych != '/') goto yy76;
+ yych = *++YYCURSOR;
+ if (yych != '/') goto yy76;
+ yych = *++YYCURSOR;
+ if (yych <= '_') {
+ if (yych <= ':') {
+ if (yych <= '&') {
+ if (yych == '#') goto yy226;
+ if (yych <= '$') goto yy76;
+ } else {
+ if (yych <= '\'') goto yy76;
+ if (yych == '*') goto yy76;
+ }
+ } else {
+ if (yych <= '?') {
+ if (yych == '=') goto yy226;
+ if (yych <= '>') goto yy76;
+ } else {
+ if (yych <= '@') goto yy76;
+ if (yych <= 'Z') goto yy226;
+ if (yych <= '^') goto yy76;
+ }
+ }
+ } else {
+ if (yych <= 0x00D6) {
+ if (yych <= '~') {
+ if (yych <= '`') goto yy76;
+ if (yych <= 'z') goto yy226;
+ if (yych <= '}') goto yy76;
+ } else {
+ if (yych == 0x00C4) goto yy226;
+ if (yych <= 0x00D5) goto yy76;
+ }
+ } else {
+ if (yych <= 0x00E4) {
+ if (yych == 0x00DC) goto yy226;
+ if (yych <= 0x00E3) goto yy76;
+ } else {
+ if (yych <= 0x00F6) {
+ if (yych <= 0x00F5) goto yy76;
+ } else {
+ if (yych != 0x00FC) goto yy76;
+ }
+ }
+ }
+ }
+yy226:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych <= '_') {
+ if (yych <= ':') {
+ if (yych <= '&') {
+ if (yych == '#') goto yy226;
+ if (yych >= '%') goto yy226;
+ } else {
+ if (yych <= '\'') goto yy228;
+ if (yych != '*') goto yy226;
+ }
+ } else {
+ if (yych <= '?') {
+ if (yych == '=') goto yy226;
+ if (yych >= '?') goto yy226;
+ } else {
+ if (yych <= '@') goto yy228;
+ if (yych <= 'Z') goto yy226;
+ if (yych >= '_') goto yy226;
+ }
+ }
+ } else {
+ if (yych <= 0x00D6) {
+ if (yych <= '~') {
+ if (yych <= '`') goto yy228;
+ if (yych <= 'z') goto yy226;
+ if (yych >= '~') goto yy226;
+ } else {
+ if (yych == 0x00C4) goto yy226;
+ if (yych >= 0x00D6) goto yy226;
+ }
+ } else {
+ if (yych <= 0x00E4) {
+ if (yych == 0x00DC) goto yy226;
+ if (yych >= 0x00E4) goto yy226;
+ } else {
+ if (yych <= 0x00F6) {
+ if (yych >= 0x00F6) goto yy226;
+ } else {
+ if (yych == 0x00FC) goto yy226;
+ }
+ }
+ }
+ }
+yy228:
+#line 223 "_mwscan.re"
+ {RET(t_http_url);}
+#line 1221 "_mwscan.cc"
+yy229:
+ yych = *++YYCURSOR;
+ if (yych != 'p') goto yy121;
+ yyaccept = 3;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if (yych != ':') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != '/') goto yy76;
+ yych = *++YYCURSOR;
+ if (yych != '/') goto yy76;
+ yych = *++YYCURSOR;
+ if (yych <= '=') {
+ if (yych <= '&') {
+ if (yych <= '"') goto yy76;
+ if (yych == '%') goto yy76;
+ } else {
+ if (yych <= '\'') goto yy76;
+ if (yych <= ':') goto yy234;
+ if (yych <= '<') goto yy76;
+ }
+ } else {
+ if (yych <= '_') {
+ if (yych <= '>') goto yy76;
+ if (yych <= 'Z') goto yy234;
+ if (yych <= '^') goto yy76;
+ } else {
+ if (yych <= '{') {
+ if (yych <= '`') goto yy76;
+ } else {
+ if (yych <= '|') goto yy76;
+ if (yych >= 0x007F) goto yy76;
+ }
+ }
+ }
+yy234:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych <= '=') {
+ if (yych <= '&') {
+ if (yych <= '"') goto yy236;
+ if (yych != '%') goto yy234;
+ } else {
+ if (yych <= '\'') goto yy236;
+ if (yych <= ':') goto yy234;
+ if (yych >= '=') goto yy234;
+ }
+ } else {
+ if (yych <= '_') {
+ if (yych <= '>') goto yy236;
+ if (yych <= 'Z') goto yy234;
+ if (yych >= '_') goto yy234;
+ } else {
+ if (yych <= '{') {
+ if (yych >= 'a') goto yy234;
+ } else {
+ if (yych <= '|') goto yy236;
+ if (yych <= '~') goto yy234;
+ }
+ }
+ }
+yy236:
+#line 221 "_mwscan.re"
+ {RET(t_http_url);}
+#line 1285 "_mwscan.cc"
+yy237:
+ yych = *++YYCURSOR;
+ if (yych != 'i') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'l') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 't') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych != 'o') goto yy121;
+ yyaccept = 3;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if (yych != ':') goto yy121;
+ yych = *++YYCURSOR;
+ if (yych == '@') goto yy76;
+ goto yy244;
+yy243:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+yy244:
+ if (yych <= '9') {
+ if (yych <= '\'') {
+ if (yych == '!') goto yy243;
+ if (yych <= '"') goto yy76;
+ goto yy243;
+ } else {
+ if (yych <= ')') goto yy76;
+ if (yych == ',') goto yy76;
+ goto yy243;
+ }
+ } else {
+ if (yych <= '?') {
+ if (yych == '=') goto yy243;
+ if (yych <= '>') goto yy76;
+ goto yy243;
+ } else {
+ if (yych <= 'Z') {
+ if (yych >= 'A') goto yy243;
+ } else {
+ if (yych <= ']') goto yy76;
+ if (yych <= '~') goto yy243;
+ goto yy76;
+ }
+ }
+ }
+ yych = *++YYCURSOR;
+ if (yych <= '@') {
+ if (yych <= '.') {
+ if (yych <= ',') goto yy76;
+ } else {
+ if (yych <= '/') goto yy76;
+ if (yych >= ':') goto yy76;
+ }
+ } else {
+ if (yych <= '_') {
+ if (yych <= 'Z') goto yy246;
+ if (yych <= '^') goto yy76;
+ } else {
+ if (yych <= '`') goto yy76;
+ if (yych >= '{') goto yy76;
+ }
+ }
+yy246:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych <= '@') {
+ if (yych <= '.') {
+ if (yych >= '-') goto yy246;
+ } else {
+ if (yych <= '/') goto yy248;
+ if (yych <= '9') goto yy246;
+ }
+ } else {
+ if (yych <= '_') {
+ if (yych <= 'Z') goto yy246;
+ if (yych >= '_') goto yy246;
+ } else {
+ if (yych <= '`') goto yy248;
+ if (yych <= 'z') goto yy246;
+ }
+ }
+yy248:
+#line 219 "_mwscan.re"
+ {RET(t_http_url);}
+#line 1369 "_mwscan.cc"
+yy249:
+ ++YYCURSOR;
+#line 226 "_mwscan.re"
+ {RET(t_2box_open);}
+#line 1374 "_mwscan.cc"
+yy251:
+ yych = *++YYCURSOR;
+ if (yych == 't') goto yy274;
+ goto yy76;
+yy252:
+ yych = *++YYCURSOR;
+ if (yych == 't') goto yy266;
+ goto yy76;
+yy253:
+ yych = *++YYCURSOR;
+ if (yych != 'a') goto yy76;
+ yych = *++YYCURSOR;
+ if (yych != 'i') goto yy76;
+ yych = *++YYCURSOR;
+ if (yych != 'l') goto yy76;
+ yych = *++YYCURSOR;
+ if (yych != 't') goto yy76;
+ yych = *++YYCURSOR;
+ if (yych != 'o') goto yy76;
+ yych = *++YYCURSOR;
+ if (yych != ':') goto yy76;
+ yych = *++YYCURSOR;
+ if (yych == '@') goto yy76;
+ goto yy261;
+yy260:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+yy261:
+ if (yych <= '9') {
+ if (yych <= '\'') {
+ if (yych == '!') goto yy260;
+ if (yych <= '"') goto yy76;
+ goto yy260;
+ } else {
+ if (yych <= ')') goto yy76;
+ if (yych == ',') goto yy76;
+ goto yy260;
+ }
+ } else {
+ if (yych <= '?') {
+ if (yych == '=') goto yy260;
+ if (yych <= '>') goto yy76;
+ goto yy260;
+ } else {
+ if (yych <= 'Z') {
+ if (yych >= 'A') goto yy260;
+ } else {
+ if (yych <= ']') goto yy76;
+ if (yych <= '~') goto yy260;
+ goto yy76;
+ }
+ }
+ }
+ yych = *++YYCURSOR;
+ if (yych <= '@') {
+ if (yych <= '.') {
+ if (yych <= ',') goto yy76;
+ } else {
+ if (yych <= '/') goto yy76;
+ if (yych >= ':') goto yy76;
+ }
+ } else {
+ if (yych <= '_') {
+ if (yych <= 'Z') goto yy263;
+ if (yych <= '^') goto yy76;
+ } else {
+ if (yych <= '`') goto yy76;
+ if (yych >= '{') goto yy76;
+ }
+ }
+yy263:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych <= '@') {
+ if (yych <= '.') {
+ if (yych >= '-') goto yy263;
+ } else {
+ if (yych <= '/') goto yy265;
+ if (yych <= '9') goto yy263;
+ }
+ } else {
+ if (yych <= '_') {
+ if (yych <= 'Z') goto yy263;
+ if (yych >= '_') goto yy263;
+ } else {
+ if (yych <= '`') goto yy265;
+ if (yych <= 'z') goto yy263;
+ }
+ }
+yy265:
+#line 218 "_mwscan.re"
+ {RET(t_urllink);}
+#line 1467 "_mwscan.cc"
+yy266:
+ yych = *++YYCURSOR;
+ if (yych != 'p') goto yy76;
+ yych = *++YYCURSOR;
+ if (yych != ':') goto yy76;
+ yych = *++YYCURSOR;
+ if (yych != '/') goto yy76;
+ yych = *++YYCURSOR;
+ if (yych != '/') goto yy76;
+ yych = *++YYCURSOR;
+ if (yych <= '=') {
+ if (yych <= '&') {
+ if (yych <= '"') goto yy76;
+ if (yych == '%') goto yy76;
+ } else {
+ if (yych <= '\'') goto yy76;
+ if (yych <= ':') goto yy271;
+ if (yych <= '<') goto yy76;
+ }
+ } else {
+ if (yych <= '_') {
+ if (yych <= '>') goto yy76;
+ if (yych <= 'Z') goto yy271;
+ if (yych <= '^') goto yy76;
+ } else {
+ if (yych <= '{') {
+ if (yych <= '`') goto yy76;
+ } else {
+ if (yych <= '|') goto yy76;
+ if (yych >= 0x007F) goto yy76;
+ }
+ }
+ }
+yy271:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych <= '=') {
+ if (yych <= '&') {
+ if (yych <= '"') goto yy273;
+ if (yych != '%') goto yy271;
+ } else {
+ if (yych <= '\'') goto yy273;
+ if (yych <= ':') goto yy271;
+ if (yych >= '=') goto yy271;
+ }
+ } else {
+ if (yych <= '_') {
+ if (yych <= '>') goto yy273;
+ if (yych <= 'Z') goto yy271;
+ if (yych >= '_') goto yy271;
+ } else {
+ if (yych <= '{') {
+ if (yych >= 'a') goto yy271;
+ } else {
+ if (yych <= '|') goto yy273;
+ if (yych <= '~') goto yy271;
+ }
+ }
+ }
+yy273:
+#line 220 "_mwscan.re"
+ {RET(t_urllink);}
+#line 1530 "_mwscan.cc"
+yy274:
+ yych = *++YYCURSOR;
+ if (yych != 't') goto yy76;
+ yych = *++YYCURSOR;
+ if (yych != 'p') goto yy76;
+ yych = *++YYCURSOR;
+ if (yych == ':') goto yy278;
+ if (yych != 's') goto yy76;
+ yych = *++YYCURSOR;
+ if (yych != ':') goto yy76;
+yy278:
+ yych = *++YYCURSOR;
+ if (yych != '/') goto yy76;
+ yych = *++YYCURSOR;
+ if (yych != '/') goto yy76;
+ yych = *++YYCURSOR;
+ if (yych <= '_') {
+ if (yych <= ':') {
+ if (yych <= '&') {
+ if (yych == '#') goto yy281;
+ if (yych <= '$') goto yy76;
+ } else {
+ if (yych <= '\'') goto yy76;
+ if (yych == '*') goto yy76;
+ }
+ } else {
+ if (yych <= '?') {
+ if (yych == '=') goto yy281;
+ if (yych <= '>') goto yy76;
+ } else {
+ if (yych <= '@') goto yy76;
+ if (yych <= 'Z') goto yy281;
+ if (yych <= '^') goto yy76;
+ }
+ }
+ } else {
+ if (yych <= 0x00D6) {
+ if (yych <= '~') {
+ if (yych <= '`') goto yy76;
+ if (yych <= 'z') goto yy281;
+ if (yych <= '}') goto yy76;
+ } else {
+ if (yych == 0x00C4) goto yy281;
+ if (yych <= 0x00D5) goto yy76;
+ }
+ } else {
+ if (yych <= 0x00E4) {
+ if (yych == 0x00DC) goto yy281;
+ if (yych <= 0x00E3) goto yy76;
+ } else {
+ if (yych <= 0x00F6) {
+ if (yych <= 0x00F5) goto yy76;
+ } else {
+ if (yych != 0x00FC) goto yy76;
+ }
+ }
+ }
+ }
+yy281:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ if (yych <= '_') {
+ if (yych <= ':') {
+ if (yych <= '&') {
+ if (yych == '#') goto yy281;
+ if (yych >= '%') goto yy281;
+ } else {
+ if (yych <= '\'') goto yy283;
+ if (yych != '*') goto yy281;
+ }
+ } else {
+ if (yych <= '?') {
+ if (yych == '=') goto yy281;
+ if (yych >= '?') goto yy281;
+ } else {
+ if (yych <= '@') goto yy283;
+ if (yych <= 'Z') goto yy281;
+ if (yych >= '_') goto yy281;
+ }
+ }
+ } else {
+ if (yych <= 0x00D6) {
+ if (yych <= '~') {
+ if (yych <= '`') goto yy283;
+ if (yych <= 'z') goto yy281;
+ if (yych >= '~') goto yy281;
+ } else {
+ if (yych == 0x00C4) goto yy281;
+ if (yych >= 0x00D6) goto yy281;
+ }
+ } else {
+ if (yych <= 0x00E4) {
+ if (yych == 0x00DC) goto yy281;
+ if (yych >= 0x00E4) goto yy281;
+ } else {
+ if (yych <= 0x00F6) {
+ if (yych >= 0x00F6) goto yy281;
+ } else {
+ if (yych == 0x00FC) goto yy281;
+ }
+ }
+ }
+ }
+yy283:
+#line 222 "_mwscan.re"
+ {RET(t_urllink);}
+#line 1637 "_mwscan.cc"
+}
+#line 267 "_mwscan.re"
+
+}
+
+
+PyObject *py_scan(PyObject *self, PyObject *args)
+{
+ PyObject *arg1;
+ if (!PyArg_ParseTuple(args, "O:mwscan.scan", &arg1)) {
+ return 0;
+ }
+ PyUnicodeObject *unistr = (PyUnicodeObject*)PyUnicode_FromObject(arg1);
+ if (unistr == NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ "parameter cannot be converted to unicode in mwscan.scan");
+ return 0;
+ }
+
+ Py_UNICODE *start = unistr->str;
+ Py_UNICODE *end = start+unistr->length;
+
+
+ Scanner scanner (start, end);
+ Py_BEGIN_ALLOW_THREADS
+ while (scanner.scan()) {
+ }
+ Py_END_ALLOW_THREADS
+ Py_XDECREF(unistr);
+
+ // return PyList_New(0); // uncomment to see timings for scanning
+
+ int size = scanner.tokens.size();
+ PyObject *result = PyList_New(size);
+ if (!result) {
+ return 0;
+ }
+
+ for (int i=0; i<size; i++) {
+ Token t = scanner.tokens[i];
+ PyList_SET_ITEM(result, i, Py_BuildValue("iii", t.type, t.start, t.len));
+ }
+
+ return result;
+}
+
+
+
+static PyMethodDef module_functions[] = {
+ {"scan", (PyCFunction)py_scan, METH_VARARGS, "scan(text)"},
+ {0, 0},
+};
+
+
+
+extern "C" {
+ DL_EXPORT(void) init_mwscan();
+}
+
+DL_EXPORT(void) init_mwscan()
+{
+ /*PyObject *m =*/ Py_InitModule("_mwscan", module_functions);
+}
diff --git a/mwlib/_mwscan.re b/mwlib/_mwscan.re
new file mode 100644
index 0000000..99ffe95
--- /dev/null
+++ b/mwlib/_mwscan.re
@@ -0,0 +1,327 @@
+// -*- mode: c++ -*-
+// Copyright (c) 2007-2008 PediaPress GmbH
+// See README.txt for additional licensing information.
+
+#include <Python.h>
+
+#include <iostream>
+#include <assert.h>
+#include <vector>
+using namespace std;
+
+#define RET(x) {found(x); return x;}
+
+typedef enum {
+ t_end,
+ t_text,
+ t_entity,
+ t_special,
+ t_magicword,
+ t_comment,
+ t_2box_open, // [[
+ t_2box_close, // ]]
+ t_http_url,
+ t_break,
+ t_begin_table,
+ t_end_table,
+ t_html_tag,
+ t_style,
+ t_pre,
+ t_section,
+ t_section_end,
+ t_item,
+ t_colon,
+ t_semicolon,
+ t_hrule,
+ t_newline,
+ t_column,
+ t_row,
+ t_tablecaption,
+ t_urllink,
+} mwtok;
+
+struct Token
+{
+ int type;
+ int start;
+ int len;
+};
+
+class Scanner
+{
+public:
+
+ Scanner(Py_UNICODE *_start, Py_UNICODE *_end) {
+ source = start = _start;
+ end = _end;
+ cursor = start;
+ line_startswith_section = -1;
+ tablemode=0;
+ }
+
+ int found(mwtok val) {
+ if (val==t_text && tokens.size()) {
+ Token &previous_token (tokens[tokens.size()-1]);
+ if (previous_token.type==val) {
+ previous_token.len += cursor-start;
+ return tokens.size()-1;
+ }
+ }
+ Token t;
+ t.type = val;
+ t.start = (start-source);
+ t.len = cursor-start;
+ tokens.push_back(t);
+ return tokens.size()-1;
+ }
+
+ bool bol() const {
+ return (start==source) || (start[-1]=='\n');
+ }
+
+ bool eol() const {
+ return *cursor=='\n' || *cursor==0;
+ }
+
+ void newline() {
+ if (line_startswith_section>=0) {
+ tokens[line_startswith_section].type = t_text;
+ }
+ line_startswith_section = -1;
+ }
+
+ inline int scan();
+
+ Py_UNICODE *source;
+
+ Py_UNICODE *start;
+ Py_UNICODE *cursor;
+ Py_UNICODE *end;
+ vector<Token> tokens;
+
+ int line_startswith_section;
+ int tablemode;
+};
+
+
+int Scanner::scan()
+{
+ start=cursor;
+
+ Py_UNICODE *marker=cursor;
+
+ Py_UNICODE *save_cursor = cursor;
+
+
+#define YYCTYPE Py_UNICODE
+#define YYCURSOR cursor
+#define YYMARKER marker
+#define YYLIMIT (end)
+// #define YYFILL(n) return 0;
+
+/*!re2c
+re2c:yyfill:enable = 0 ;
+*/
+
+/*
+ the re2c manpage says:
+ "The user must arrange for a sentinel token to appear at the end of input"
+ \000 is our sentinel token.
+*/
+
+/*!re2c
+ any = [^\000];
+ ftp = "ftp://" [-a-zA-Z0-9_+${}~?=/@#&*(),:.]+ ;
+ mailto = "mailto:" [-a-zA-Z0-9_!#$%*./?|^{}`~&'+=]+ "@" [-a-zA-Z0-9_.]+ ;
+ url = "http" "s"? "://" [-\xe4\xc4\xf6\xd6\xfc\xdca-zA-Z_0-9./?=&:%:~()#+,]+ ;
+ entity_name = "&" [a-zA-Z0-9]+ ";";
+ entity_hex = "&#" 'x' [a-fA-F0-9]+ ";";
+ entity_dec = "&#" [0-9]+ ";";
+
+ entity = (entity_name | entity_hex | entity_dec);
+
+
+ magicword = ( "__TOC__"
+ | "__NOTOC__"
+ | "__FORCETOC__"
+ | "__NOEDITSECTION__"
+ | "__NEWSECTIONLINK__"
+ | "__NOCONTENTCONVERT__"
+ | "__NOCC__"
+ | "__NOGALLERY__"
+ | "__NOTITLECONVERT__"
+ | "__NOTC__"
+ | "__END__"
+ | "__START__"
+ );
+*/
+ if (!bol()) {
+ goto not_bol;
+ }
+/*!re2c
+ " "* "{|" {++tablemode; RET(t_begin_table);}
+ " "* "|}" {--tablemode; RET(t_end_table);}
+
+ " "* "|" "-"+
+ {
+ if (tablemode)
+ RET(t_row);
+ if (*start==' ') {
+ cursor = start+1;
+ RET(t_pre);
+ }
+ RET(t_text);
+ }
+
+ " "* ("|" | "!")
+ {
+ if (tablemode)
+ RET(t_column);
+
+ if (*start==' ') {
+ cursor = start+1;
+ RET(t_pre);
+ }
+ RET(t_text);
+ }
+
+ " "* "|" "+"+
+ {
+ if (tablemode)
+ RET(t_tablecaption);
+ if (*start==' ') {
+ cursor = start+1;
+ RET(t_pre);
+ }
+ RET(t_text);
+ }
+
+ " " {RET(t_pre);}
+ "="+ [ \t]* {
+ line_startswith_section = found(t_section);
+ return t_section;
+ }
+ ":"* [#*]+ {RET(t_item);}
+ ":"+ {RET(t_colon);}
+ ";"+ {RET(t_semicolon);}
+ "-"{4,} {RET(t_hrule);}
+
+ [^] {goto not_bol;}
+ */
+
+
+not_bol:
+ cursor = save_cursor;
+ marker = cursor;
+
+/*!re2c
+ "[" mailto {RET(t_urllink);}
+ mailto {RET(t_http_url);}
+ "[" ftp {RET(t_urllink);}
+ ftp {RET(t_http_url);}
+ "[" url {RET(t_urllink);}
+ url {RET(t_http_url);}
+ magicword {RET(t_magicword);}
+ [a-zA-Z0-9_]+ {RET(t_text);}
+ "[[" {RET(t_2box_open);}
+ "]]" {RET(t_2box_close);}
+ "="+ [ \t]* {
+ if (eol()) {
+ if (line_startswith_section>=0) {
+ line_startswith_section=-1;
+ RET(t_section_end);
+ } else {
+ RET(t_text);
+ }
+ } else {
+ RET(t_text);
+ }
+ }
+ "\n"{2,} {newline(); RET(t_break);}
+ "\n" {newline(); RET(t_newline);}
+ "||" | "|!" | "!!"
+ {
+ if (tablemode)
+ RET(t_column);
+ cursor = start+1;
+ RET(t_special);
+ }
+ "|+"
+ {
+ if (tablemode)
+ RET(t_tablecaption);
+ cursor = start+1;
+ RET(t_special);
+ }
+ [:|\[\]] {RET(t_special);}
+ "'''''" | "'''" | "''" {RET(t_style);}
+ "<" "/"? [a-zA-Z]+ [^\000<>]* "/"? ">"
+ {RET(t_html_tag);}
+
+ "<!--"[^\000<>]*"-->"
+ {RET(t_comment);}
+ entity {RET(t_entity);}
+
+ "\000" {newline(); return t_end;}
+ . {RET(t_text);}
+*/
+}
+
+
+PyObject *py_scan(PyObject *self, PyObject *args)
+{
+ PyObject *arg1;
+ if (!PyArg_ParseTuple(args, "O:mwscan.scan", &arg1)) {
+ return 0;
+ }
+ PyUnicodeObject *unistr = (PyUnicodeObject*)PyUnicode_FromObject(arg1);
+ if (unistr == NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ "parameter cannot be converted to unicode in mwscan.scan");
+ return 0;
+ }
+
+ Py_UNICODE *start = unistr->str;
+ Py_UNICODE *end = start+unistr->length;
+
+
+ Scanner scanner (start, end);
+ Py_BEGIN_ALLOW_THREADS
+ while (scanner.scan()) {
+ }
+ Py_END_ALLOW_THREADS
+ Py_XDECREF(unistr);
+
+ // return PyList_New(0); // uncomment to see timings for scanning
+
+ int size = scanner.tokens.size();
+ PyObject *result = PyList_New(size);
+ if (!result) {
+ return 0;
+ }
+
+ for (int i=0; i<size; i++) {
+ Token t = scanner.tokens[i];
+ PyList_SET_ITEM(result, i, Py_BuildValue("iii", t.type, t.start, t.len));
+ }
+
+ return result;
+}
+
+
+
+static PyMethodDef module_functions[] = {
+ {"scan", (PyCFunction)py_scan, METH_VARARGS, "scan(text)"},
+ {0, 0},
+};
+
+
+
+extern "C" {
+ DL_EXPORT(void) init_mwscan();
+}
+
+DL_EXPORT(void) init_mwscan()
+{
+ /*PyObject *m =*/ Py_InitModule("_mwscan", module_functions);
+}
diff --git a/mwlib/_mwscan.so b/mwlib/_mwscan.so
new file mode 100755
index 0000000..50feae0
--- /dev/null
+++ b/mwlib/_mwscan.so
Binary files differ
diff --git a/mwlib/_version.py b/mwlib/_version.py
new file mode 100644
index 0000000..9e73d4f
--- /dev/null
+++ b/mwlib/_version.py
@@ -0,0 +1,9 @@
+class _Version(tuple):
+ """internal version object, subclass of C{tuple},
+ but implements a fancier __str__ representation
+ """
+ def __str__(self):
+ return '.'.join([str(x) for x in self])
+
+version = _Version((0,6,1))
+del _Version
diff --git a/mwlib/advtree.py b/mwlib/advtree.py
new file mode 100644
index 0000000..52d57e5
--- /dev/null
+++ b/mwlib/advtree.py
@@ -0,0 +1,545 @@
+# Copyright (c) 2007-2008 PediaPress GmbH
+# See README.txt for additional licensing information.
+
+"""
+The parse tree generated by the parser is a 1:1 representation of the mw-markup.
+Unfortunally these trees have some flaws if used to geenerate derived documents.
+
+This module seeks to rebuild the parstree
+to be:
+ * more logical markup
+ * clean up the parse tree
+ * make it more accessible
+ * allow for validity checks
+ * implement rebuilding strategies
+
+Usefull Documentation:
+http://en.wikipedia.org/wiki/Wikipedia:Don%27t_use_line_breaks
+http://meta.wikimedia.org/wiki/Help:Advanced_editing
+"""
+
+import weakref
+from mwlib.parser import Magic, Math, _VListNode, Ref, Link, URL, NamedURL # not used but imported
+from mwlib.parser import CategoryLink, SpecialLink, Caption, LangLink # not used but imported
+from mwlib.parser import Item, ItemList, Node, Table, Row, Cell, Paragraph, PreFormatted
+from mwlib.parser import Section, Style, TagNode, Text, Timeline
+from mwlib.parser import ImageLink, Article, Book, Chapter
+import copy
+from mwlib.log import Log
+
+log = Log("advtree")
+
+
+def _idIndex(lst, el):
+ # return first appeareance of element in list
+ for i, e in enumerate(lst):
+ if e is el:
+ return i
+ return -1
+
+class AdvancedNode:
+ """
+ MixIn Class that extends Nodes so they become easier accessible
+
+ allows to traverse the tree in any direction and
+ build derived convinience functions
+ """
+ _parentref = None # weak referece to parent element
+ isblocknode = False
+
+ def copy(self):
+ "return a copy of this node and all its children"
+ n = copy.copy(self)
+ n.children = []
+ n._parentref = None
+ for c in self:
+ n.appendChild(c.copy())
+ return n
+
+
+ def moveto(self, targetnode, prefix=False):
+ """
+ moves this node after target node
+ if prefix is true, move in front of target node
+ """
+ if self.parent:
+ self.parent.removeChild(self)
+ tp = targetnode.parent
+ idx = _idIndex(tp.children, targetnode)
+ if not prefix:
+ idx+=1
+ tp.children = tp.children[:idx] + [self] + tp.children[idx:]
+ self._parentref = weakref.ref(tp)
+
+ def appendChild(self, c):
+ self.children.append(c)
+ c._parentref = weakref.ref(self)
+
+ def remove(self):
+ if self.parent:
+ for (idx, n) in enumerate(self.parent.children):
+ if n is self:
+ self.parent.children = self.parent.children[:idx] + self.parent.children[idx+1:]
+ return 0
+ else:
+ return 1
+
+ def removeChild(self, c):
+ self.replaceChild(c, [])
+
+ def replaceChild(self, c, newchildren = []):
+ idx = _idIndex(self.children, c)
+ self.children.remove(c)
+ c._parentref = None
+ if newchildren:
+ self.children = self.children[:idx] + newchildren + self.children[idx:]
+ for nc in newchildren:
+ nc._parentref = weakref.ref(self)
+
+ def getParents(self):
+ if self.parent:
+ return self.parent.getParents() + [self.parent]
+ else:
+ return []
+
+ def getParent(self):
+ if not self._parentref:
+ return None
+ x = self._parentref()
+ if not x:
+ raise weakref.ReferenceError
+ return x
+
+ def getLevel(self):
+ "returns the number of nodes of same class in parents"
+ return [p.__class__ for p in self.getParents()].count(self.__class__)
+
+
+ def getParentNodesByClass(self, klass):
+ "returns parents w/ klass"
+ return [p for p in self.parents if p.__class__ == klass]
+
+ def getChildNodesByClass(self, klass):
+ "returns all children w/ klass"
+ return [p for p in self.getAllChildren() if p.__class__ == klass]
+
+ def getAllChildren(self):
+ "don't confuse w/ Node.allchildren() which returns allchildren + self"
+ for c in self.children:
+ yield c
+ for x in c.getAllChildren():
+ yield x
+
+ def getSiblings(self):
+ return [c for c in self.getAllSiblings() if c is not self]
+
+ def getAllSiblings(self):
+ "all siblings plus me my self and i"
+ if self.parent:
+ return self.parent.children
+ return []
+
+ def getPrevious(self):
+ "return previous sibling"
+ s = self.getAllSiblings()
+ try:
+ idx = _idIndex(s,self)
+ except ValueError:
+ return None
+ if idx -1 <0:
+ return None
+ else:
+ return s[idx-1]
+
+ def getNext(self):
+ "return next sibling"
+ s = self.getAllSiblings()
+ try:
+ idx = _idIndex(s,self)
+ except ValueError:
+ return None
+ if idx+1 >= len(s):
+ return None
+ else:
+ return s[idx+1]
+
+ def getLast(self):
+ "return last sibling"
+ s = self.getAllSiblings()
+ if s:
+ return s[-1]
+
+ def getFirst(self):
+ "return first sibling"
+ s = self.getAllSiblings()
+ if s:
+ return s[0]
+
+ def getLastChild(self):
+ "return last child of this node"
+ if self.children:
+ return self.children[-1]
+
+ def getFirstChild(self):
+ "return first child of this node"
+ if self.children:
+ return self.children[0]
+
+ def getAllDisplayText(self, amap = None):
+ "return all text that is intended for display"
+ text = []
+ if not amap:
+ amap = {Text:"caption", Link:"target", URL:"caption", Math:"caption", ImageLink:"caption" }
+ for n in self.allchildren():
+ access = amap.get(n.__class__, "")
+ if access:
+ text.append( getattr(n, access) )
+ alltext = [t for t in text if t]
+ if alltext:
+ return u''.join(alltext)
+ else:
+ return ''
+
+ parent = property(getParent)
+ parents = property(getParents)
+ next = property(getNext)
+ previous = property(getPrevious)
+ siblings = property(getSiblings)
+ last = property(getLast)
+ first = property(getFirst)
+ lastchild = property(getLastChild)
+ firstchild = property(getFirstChild)
+
+
+
+# --------------------------------------------------------------------------
+# MixinClasses w/ special behaviour
+# -------------------------------------------------------------------------
+
+class AdvancedTable(AdvancedNode):
+ @property
+ def rows(self):
+ return [r for r in self if r.__class__ == Row]
+
+ @property
+ def numcols(self):
+ cols = [[n.__class__ for n in row].count(Cell) for row in self.rows]
+ if cols:
+ return max(cols)
+ else:
+ return 0
+
+class AdvancedRow(AdvancedNode):
+ @property
+ def cells(self):
+ return [c for c in self if c.__class__ == Cell]
+
+
+class AdvancedSection(AdvancedNode):
+ h_level = 0 # this is set if it originates from an H1, H2, ... TagNode
+ def getSectionLevel(self):
+ return 1 + self.getLevel()
+
+class AdvancedImageLink(AdvancedNode):
+ isblocknode = property ( lambda s: not s.isInline() )
+
+class AdvancedMath(AdvancedNode):
+ @property
+ def isblocknode(self):
+ if self.caption.strip().startswith("\\begin{align}") or \
+ self.caption.strip().startswith("\\begin{alignat}"):
+ return True
+ return False
+
+
+
+# --------------------------------------------------------------------------
+# Missing as Classes derived from parser.Style
+# -------------------------------------------------------------------------
+
+
+class Emphasized(Style, AdvancedNode):
+ "EM"
+ pass
+
+class Strong(Style, AdvancedNode):
+ pass
+
+class DefinitionList(Style, AdvancedNode):
+ "DL"
+ pass
+
+class DefinitionTerm(Style, AdvancedNode):
+ "DT"
+ pass
+
+class DefinitionDescription(Style, AdvancedNode):
+ "DD"
+ pass
+
+class Blockquote(Style, AdvancedNode):
+ "margins to left & right"
+ pass
+
+class Indented(Style, AdvancedNode):
+ "margin to the left"
+
+class Overline(Style, AdvancedNode):
+ _style = "overline"
+
+class Underline(Style, AdvancedNode):
+ _style = "u"
+
+class Sub(Style, AdvancedNode):
+ _style = "sub"
+
+class Sup(Style, AdvancedNode):
+ _style = "sup"
+
+class Small(Style, AdvancedNode):
+ _style = "small"
+
+class Big(Style, AdvancedNode):
+ _style = "big"
+
+class Cite(Style, AdvancedNode):
+ _style = "cite"
+
+
+_styleNodeMap = dict( (k._style,k) for k in [Overline, Underline, Sub, Sup, Small, Big, Cite] )
+
+# --------------------------------------------------------------------------
+# Missing as Classes derived from parser.TagNode
+# -------------------------------------------------------------------------
+
+class Source(TagNode, AdvancedNode):
+ _tag = "source"
+
+class Code(TagNode, AdvancedNode):
+ _tag = "code"
+
+class BreakingReturn(TagNode, AdvancedNode):
+ _tag = "br"
+
+class HorizontalRule(TagNode, AdvancedNode):
+ _tag = "hr"
+
+class Index(TagNode, AdvancedNode):
+ _tag = "index"
+
+class Teletyped(TagNode, AdvancedNode):
+ _tag = "tt"
+
+class Reference(TagNode, AdvancedNode):
+ _tag = "ref"
+
+class ReferenceList(TagNode, AdvancedNode):
+ _tag = "references"
+
+class Gallery(TagNode, AdvancedNode):
+ _tag = "gallery"
+
+class Center(TagNode, AdvancedNode):
+ _tag = "center"
+
+class Div(TagNode, AdvancedNode):
+ _tag = "div"
+
+class Span(TagNode, AdvancedNode): # span is defined as inline node which is in theory correct.
+ _tag = "span"
+
+class Strike(TagNode,AdvancedNode):
+ _tag = "strike"
+
+class ImageMap(TagNode, AdvancedNode): # defined as block node, maybe incorrect
+ _tag = "imagemap"
+
+_tagNodeMap = dict( (k._tag,k) for k in [Source, Code, BreakingReturn, HorizontalRule, Index, Teletyped, Reference, ReferenceList, Gallery, Center, Div, Span, Strike, ImageMap] )
+_styleNodeMap["s"] = Strike # Special Handling for deprecated s style
+
+
+# --------------------------------------------------------------------------
+# BlockNode separation for AdvancedNode.isblocknode
+# -------------------------------------------------------------------------
+
+"""
+For writers it is usefull to know whether elements are inline (within a paragraph) or not.
+We define list for blocknodes, which are used in AdvancedNode as:
+
+AdvancedNode.isblocknode
+
+Image depends on result of Image.isInline() see above
+
+Open Issues: Math, Magic, (unknown) TagNode
+
+"""
+_blockNodesMap = (Book, Chapter, Article, Section, Paragraph, Div,
+ PreFormatted, Cell, Row, Table, Item, BreakingReturn,
+ ItemList, Timeline, Cite, HorizontalRule, Gallery, Indented,
+ DefinitionList, DefinitionTerm, DefinitionDescription, ReferenceList, Source)
+
+for k in _blockNodesMap:
+ k.isblocknode = True
+
+
+
+# --------------------------------------------------------------------------
+# funcs for extending the nodes
+# -------------------------------------------------------------------------
+
+def MixIn(pyClass, mixInClass, makeFirst=False):
+ if mixInClass not in pyClass.__bases__:
+ if makeFirst:
+ pyClass.__bases__ = (mixInClass,) + pyClass.__bases__
+ else:
+ pyClass.__bases__ += (mixInClass,)
+
+def extendClasses(node):
+ for c in node.children[:]:
+ extendClasses(c)
+ c._parentref = weakref.ref(node)
+
+# Nodes we defined above and that are separetly handled in extendClasses
+_advancedNodesMap = {Section: AdvancedSection, ImageLink:AdvancedImageLink,
+ Math:AdvancedMath, Row:AdvancedRow, Table:AdvancedTable}
+MixIn(Node, AdvancedNode)
+for k, v in _advancedNodesMap.items():
+ MixIn(k,v)
+
+# --------------------------------------------------------------------------
+# funcs for repairing the tree
+# -------------------------------------------------------------------------
+
+
+def fixTagNodes(node):
+ """
+ detect known TagNode(s) and associate appropriate Nodes
+ """
+ for c in node.children:
+ if c.__class__ == TagNode:
+ if c.caption in _tagNodeMap:
+ c.__class__ = _tagNodeMap[c.caption]
+ elif c.caption in ("h1", "h2", "h3", "h4", "h5", "h6"): # FIXME
+ # NEED TO MOVE NODE IF IT REALLY STARTS A SECTION
+ c.__class__ = Section
+ MixIn(c.__class__, AdvancedSection)
+ c.level = int(c.caption[1])
+ c.caption = ""
+ else:
+ log.warn("fixTagNodes, unknowntagnode %r" % c)
+ #raise Exception, "unknown tag %s" % c.caption # FIXME
+ fixTagNodes(c)
+
+
+def fixStyle(node):
+ """
+ parser.Style Nodes are mapped to logical markup
+ detection of DefinitionList depends on removeNodes
+ and removeNewlines
+ """
+ if not node.__class__ == Style:
+ return
+ # replace this node by a more apporiate
+ if node.caption == "''":
+ node.__class__ = Emphasized
+ node.caption = ""
+ elif node.caption=="'''''":
+ node.__class__ = Strong
+ node.caption = ""
+ em = Emphasized("''")
+ for c in node.children:
+ em.appendChild(c)
+ node.children = []
+ node.appendChild(em)
+ elif node.caption == "'''":
+ node.__class__ = Strong
+ node.caption = ""
+ elif node.caption == ";":
+ # this starts a definition list ? DL [DT->DD, ...]
+ # check if previous node is DefinitionList, if not create one
+ if node.previous.__class__ == DefinitionList:
+ node.__class__ = DefinitionTerm
+ node.moveto(node.previous.lastchild)
+ else:
+ node.__class__ = DefinitionList
+ dt = DefinitionTerm()
+ for c in node.children:
+ dt.appendChild(c)
+ node.children = []
+ node.appendChild(dt)
+ elif node.caption.startswith(":"):
+ if node.previous.__class__ == DefinitionList:
+ node.__class__ = DefinitionDescription
+ node.moveto(node.previous.lastchild)
+ node.caption = ""
+ else:
+ node.__class__ = Indented
+ elif node.caption in _styleNodeMap:
+ node.__class__ = _styleNodeMap[node.caption]
+ node.caption = ""
+ else:
+ log.warn("fixStyle, unknownstyle %r" % node)
+ #raise Exception, "unknown style %s" % node.caption # FIXME
+ pass
+ return node
+
+def fixStyles(node):
+ if node.__class__ == Style:
+ fixStyle(node)
+ for c in node.children[:]:
+ fixStyles(c)
+
+
+def removeNodes(node):
+ """
+ the parser generates empty Node elements that do
+ nothing but group other nodes. we remove them here
+ """
+ if node.__class__ == Node:
+ # first child of section groups heading text - grouping Node must not be removed
+ if not (node.previous == None and node.parent.__class__ == Section):
+ node.parent.replaceChild(node, node.children)
+ for c in node.children[:]:
+ removeNodes(c)
+
+def removeNewlines(node):
+ """
+ remove newlines, tabs, spaces if we are next to a blockNode
+ """
+ if node.__class__ == Text and not node.getParentNodesByClass(PreFormatted) and not node.getParentNodesByClass(Source):
+ if node.caption.strip() == u"":
+ prev = node.previous or node.parent # previous sibling node or parentnode
+ next = node.next or node.parent.next
+ if not next or next.isblocknode or not prev or prev.isblocknode:
+ node.parent.removeChild(node)
+ node.caption = node.caption.replace("\n", " ")
+
+ for c in node.children[:]:
+ removeNewlines(c)
+
+
+
+
+def buildAdvancedTree(root): # USE WITH CARE
+ """
+ extends and cleans parse trees
+ do not use this funcs without knowing whether these
+ Node modifications fit your problem
+ """
+ extendClasses(root)
+ fixTagNodes(root)
+ removeNodes(root)
+ removeNewlines(root)
+ fixStyles(root)
+
+def getAdvTree(fn):
+ from mwlib.dummydb import DummyDB
+ from mwlib.uparser import parseString
+ db = DummyDB()
+ input = unicode(open(fn).read(), 'utf8')
+ r = parseString(title=fn, raw=input, wikidb=db)
+ buildAdvancedTree(r)
+ return r
+
+
+
diff --git a/mwlib/allnodes.py b/mwlib/allnodes.py
new file mode 100644
index 0000000..ebd6d3b
--- /dev/null
+++ b/mwlib/allnodes.py
@@ -0,0 +1,27 @@
+import mwlib.parser
+import mwlib.advtree
+
+import types
+
+def allnodes():
+ all = set()
+ names = set()
+ for m in (mwlib.parser, mwlib.advtree):
+ for x in dir(m):
+ if x in names:
+ continue
+ k = getattr(m, x)
+ if type(k) == types.TypeType:
+ if issubclass(k, mwlib.parser.Node):
+ all.add(k)
+ names.add(x)
+ return all
+
+
+if __name__ == '__main__':
+ # EXAMPLE THAT SHOWS HOW TO IDENTIFY MISSING NODES
+ from mwlib.parser import Control, Chapter
+ my = set((Control, Chapter))
+ missing = allnodes() - my
+ assert len(missing) == len(allnodes()) -2
+ #print missing
diff --git a/mwlib/apps.py b/mwlib/apps.py
new file mode 100644
index 0000000..55a427e
--- /dev/null
+++ b/mwlib/apps.py
@@ -0,0 +1,378 @@
+
+# Copyright (c) 2007-2008 PediaPress GmbH
+# See README.txt for additional licensing information.
+
+"""main programs - installed via setuptools' entry_points"""
+
+import optparse
+
+def buildcdb():
+ parser = optparse.OptionParser(usage="%prog --input XMLDUMP --output OUTPUT")
+ parser.add_option("-i", "--input", help="input file")
+ parser.add_option("-o", "--output", help="write output to OUTPUT")
+ options, args = parser.parse_args()
+
+ if args:
+ parser.error("too many arguments.")
+
+
+ input = options.input
+ output = options.output
+
+ if not (input and output):
+ parser.error("missing argument.")
+
+ import os
+ from mwlib import cdbwiki
+
+ cdbwiki.BuildWiki(input, output)()
+ open(os.path.join(output, "wikiconf.txt"), "w").write("""
+[wiki]
+type = cdb
+path = %s
+
+[images]
+type = download
+url = http://upload.wikimedia.org/wikipedia/commons/
+localpath = ~/images
+""" % (os.path.abspath(output),))
+
+def show():
+ parser = optparse.OptionParser(usage="%prog [-e|--expand] --conf CONF ARTICLE [...]")
+ parser.add_option("-c", "--conf", help="config file")
+ parser.add_option("-e", "--expand", action="store_true", help="expand templates")
+ parser.add_option("-t", "--template", action="store_true", help="show template")
+
+ options, args = parser.parse_args()
+
+ if not args:
+ parser.error("missing ARTICLE argument")
+
+ articles = [unicode(x, 'utf-8') for x in args]
+
+ conf = options.conf
+ if not conf:
+ parser.error("missing --conf argument")
+
+ from mwlib import wiki, expander
+
+ db = wiki.makewiki(conf)['wiki']
+
+ for a in articles:
+ if options.template:
+ raw=db.getTemplate(a)
+ else:
+ raw=db.getRawArticle(a)
+
+ if raw:
+ if options.expand:
+ te = expander.Expander(raw, pagename=a, wikidb=db)
+ raw = te.expandTemplates()
+
+ print raw.encode("utf-8")
+
+
+def buildzip():
+ parser = optparse.OptionParser(usage="%prog [OPTIONS] [ARTICLE ...]")
+ parser.add_option("-c", "--conf", help="config file (required unless --baseurl is given)")
+ parser.add_option("-b", "--baseurl", help="base URL for mwapidb backend")
+ parser.add_option("-s", "--shared-baseurl", help="base URL for shared images for mwapidb backend")
+ parser.add_option("-m", "--metabook", help="JSON encoded text file with book structure")
+ parser.add_option('--collectionpage', help='Title of a collection page')
+ parser.add_option("-x", "--noimages", action="store_true", help="exclude images")
+ parser.add_option("-o", "--output", help="write output to OUTPUT")
+ parser.add_option("-p", "--posturl", help="http post to POSTURL")
+ parser.add_option("-i", "--imagesize",
+ help="max. pixel size (width or height) for images (default: 800)")
+ parser.add_option("-d", "--daemonize", action="store_true",
+ help='become daemon after collection articles (before POST request)')
+ parser.add_option("-l", "--logfile", help="log to logfile")
+ parser.add_option("--license", help="Title of article containing full license text")
+ parser.add_option("--template-blacklist", help="Title of article containing blacklisted templates")
+ options, args = parser.parse_args()
+
+ import tempfile
+ import os
+ import zipfile
+
+ from mwlib import utils
+ from mwlib.utils import daemonize
+
+ articles = [unicode(x, 'utf-8') for x in args]
+
+ baseurl = options.baseurl
+ conf = options.conf
+ if not baseurl and not options.conf:
+ parser.error("neither --conf nor --baseurl specified\nuse --help for all options")
+
+ posturl = None
+ def post_status(status):
+ print 'status:', status
+ if not posturl:
+ return
+ try:
+ return urllib2.urlopen(posturl, urllib.urlencode({'status': status})).read()
+ except Exception, e:
+ print 'ERROR posting status %r to %r' % (status, posturl)
+
+ def post_progress(progress):
+ print 'progress', progress
+ if not posturl:
+ return
+ try:
+ return urllib2.urlopen(posturl, urllib.urlencode({'progress': int(progress)})).read()
+ except Exception, e:
+ print 'ERROR posting progress %r to %r' % (progress, posturl)
+
+ try:
+ if options.logfile:
+ utils.start_logging(options.logfile)
+
+ output = options.output
+
+ from mwlib import wiki, recorddb, metabook
+
+ mb = metabook.MetaBook()
+ if conf:
+ from ConfigParser import ConfigParser
+
+ w = wiki.makewiki(conf)
+ cp = ConfigParser()
+ cp.read(conf)
+ license = {
+ 'name': cp.get('wiki', 'defaultarticlelicense')
+ }
+ if license['name'] is not None:
+ license['wikitext'] = w['wiki'].getRawArticle(license['name'])
+ mb.source = {
+ 'name': cp.get('wiki', 'name'),
+ 'url': cp.get('wiki', 'url'),
+ 'defaultarticlelicense': license,
+ }
+ else:
+ w = {
+ 'wiki': wiki.wiki_mwapi(baseurl, options.license, options.template_blacklist),
+ 'images': wiki.image_mwapi(baseurl, shared_base_url=options.shared_baseurl)
+ }
+ metadata = w['wiki'].getMetaData()
+ mb.source = {
+ 'name': metadata['name'],
+ 'url': metadata['url'],
+ 'defaultarticlelicense': metadata['license'],
+ }
+
+ if options.noimages:
+ w['images'] = None
+ else:
+ if options.imagesize:
+ imagesize = int(options.imagesize)
+ else:
+ imagesize = 800
+
+ if output:
+ zipfilename = output
+ else:
+ fd, zipfilename = tempfile.mkstemp()
+ os.close(fd)
+
+ if options.collectionpage:
+ mwcollection = w['wiki'].getRawArticle(options.collectionpage)
+ mb.loadCollectionPage(mwcollection)
+ elif options.metabook:
+ mb.readJsonFile(options.metabook)
+
+ # do not daemonize earlier: Collection extension deletes input metabook file!
+ if options.daemonize:
+ daemonize()
+
+ posturl = options.posturl
+ if posturl:
+ posturl = posturl.encode('utf-8')
+
+ from mwlib.utils import get_multipart
+ import urllib
+ import urllib2
+
+ zf = zipfile.ZipFile(zipfilename, 'w')
+ z = recorddb.ZipfileCreator(zf, w['wiki'], w['images'])
+
+ post_status('parsing')
+
+ for x in articles:
+ z.addArticle(x)
+ mb.addArticles(articles)
+
+ z.addObject('metabook.json', mb.dumpJson())
+ articles = list(mb.getArticles())
+ if articles:
+ inc = 70/len(articles)
+ else:
+ inc = 0
+ p = 0
+ for title, revision in articles:
+ post_progress(p)
+ z.addArticle(title, revision=revision)
+ p += inc
+
+ post_status('packaging')
+
+ if not options.noimages:
+ z.writeImages(size=imagesize)
+
+ post_progress(80)
+
+ z.writeContent()
+ zf.close()
+
+ post_progress(90)
+
+ if posturl:
+ post_status('uploading')
+ zf = open(zipfilename, "rb")
+ ct, data = get_multipart('collection.zip', zf.read(), 'collection')
+ zf.close()
+ req = urllib2.Request(posturl, data=data, headers={"Content-Type": ct})
+ result = urllib2.urlopen(req).read()
+
+ if w['images']:
+ w['images'].clear()
+
+ if not output:
+ os.unlink(zipfilename)
+
+ post_status('finished')
+ post_progress(100)
+ except Exception, e:
+ post_status('error')
+ raise
+
+
+def parse():
+ parser = optparse.OptionParser(usage="%prog [-a|--all] --conf CONF [ARTICLE1 ...]")
+ parser.add_option("-a", "--all", action="store_true", help="parse all articles")
+ parser.add_option("--tb", action="store_true", help="show traceback on error")
+
+ parser.add_option("-c", "--conf", help="config file")
+
+ options, args = parser.parse_args()
+
+ if not args and not options.all:
+ parser.error("missing option.")
+
+ if not options.conf:
+ parser.error("missing --conf argument")
+
+ articles = [unicode(x, 'utf-8') for x in args]
+
+ conf = options.conf
+
+ import traceback
+ from mwlib import wiki, uparser
+
+ w = wiki.makewiki(conf)
+
+ db = w['wiki']
+
+ if options.all:
+ if not hasattr(db, "articles"):
+ raise RuntimeError("%s does not support iterating over all articles" % (db, ))
+ articles = db.articles()
+
+
+ import time
+ for x in articles:
+ try:
+ raw = db.getRawArticle(x)
+ # yes, raw can be None, when we have a redirect to a non-existing article.
+ if raw is None:
+ continue
+ stime=time.time()
+ a=uparser.parseString(x, raw=raw, wikidb=db)
+ except Exception, err:
+ print "F", repr(x), err
+ if options.tb:
+ traceback.print_exc()
+ else:
+ print "G", time.time()-stime, repr(x)
+
+def serve():
+ parser = optparse.OptionParser(usage="%prog --conf CONF ARTICLE [...]")
+ parser.add_option("-c", "--conf", help="config file")
+
+ options, args = parser.parse_args()
+
+
+ conf = options.conf
+ if not options.conf:
+ parser.error("missing --conf argument")
+
+ from mwlib import wiki, web
+
+ res = wiki.makewiki(conf)
+ db = res['wiki']
+ images = res['images']
+ from wsgiref.simple_server import make_server, WSGIServer
+
+ from SocketServer import ForkingMixIn
+ class MyServer(ForkingMixIn, WSGIServer):
+ pass
+
+ iface, port = '0.0.0.0', 8080
+ print "serving on %s:%s" % (iface, port)
+ http = make_server(iface, port, web.Serve(db, res['images']), server_class=MyServer)
+ http.serve_forever()
+
+
+
+def html():
+ parser = optparse.OptionParser(usage="%prog --conf CONF ARTICLE [...]")
+ parser.add_option("-c", "--conf", help="config file")
+
+ options, args = parser.parse_args()
+
+ if not args:
+ parser.error("missing ARTICLE argument")
+
+ articles = [unicode(x, 'utf-8') for x in args]
+
+ conf = options.conf
+ if not options.conf:
+ parser.error("missing --conf argument")
+
+ import StringIO
+ import tempfile
+ import os
+ import webbrowser
+ from mwlib import wiki, uparser, htmlwriter
+
+ res = wiki.makewiki(conf)
+ db = res['wiki']
+ images = res['images']
+
+ for a in articles:
+ raw=db.getRawArticle(a)
+ if not raw:
+ continue
+
+ out=StringIO.StringIO()
+ out.write("""<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="content-type" content="text/html; charset="utf-8"></meta>
+<link rel="stylesheet" href="pedia.css" />
+</head>
+<body>
+
+""")
+
+ a=uparser.parseString(x, raw=raw, wikidb=db)
+ w=htmlwriter.HTMLWriter(out, images)
+ w.write(a)
+
+ fd, htmlfile = tempfile.mkstemp(".html")
+ os.close(fd)
+ open(htmlfile, "wb").write(out.getvalue().encode('utf-8'))
+ webbrowser.open("file://"+htmlfile)
+
+
diff --git a/mwlib/caller.py b/mwlib/caller.py
new file mode 100755
index 0000000..583a123
--- /dev/null
+++ b/mwlib/caller.py
@@ -0,0 +1,20 @@
+#! /usr/bin/env python
+
+# Copyright (c) 2007-2008 PediaPress GmbH
+# See README.txt for additional licensing information.
+
+import sys
+import os
+
+def caller(n=2):
+ """return caller as string"""
+ f = sys._getframe(n)
+ return "%s:%s" % (f.f_code.co_filename, f.f_lineno)
+
+def short(n=2):
+ """return caller as string"""
+ f = sys._getframe(n)
+ return "%s:%s" % (os.path.basename(f.f_code.co_filename), f.f_lineno)
+
+def callerframe(n=2):
+ return sys._getframe(n)
diff --git a/mwlib/cdb.py b/mwlib/cdb.py
new file mode 100755
index 0000000..9aa2a65
--- /dev/null
+++ b/mwlib/cdb.py
@@ -0,0 +1,262 @@
+#! /usr/bin/env python
+"""
+Dan Bernstein's CDB implemented in Python
+
+see http://cr.yp.to/cdb.html
+
+"""
+
+from __future__ import generators
+
+import os
+import struct
+import mmap
+
+def uint32_unpack(buf):
+ return struct.unpack('<L', buf)[0]
+
+def uint32_pack(n):
+ return struct.pack('<L', n)
+
+CDB_HASHSTART = 5381
+
+def cdb_hash(buf):
+ h = CDB_HASHSTART
+ for c in buf:
+ h = (h + (h << 5)) & 0xffffffffL
+ h ^= ord(c)
+ return h
+
+class Cdb(object):
+
+ def __init__(self, fp):
+ self.fp = fp
+ fd = fp.fileno()
+ self.size = os.fstat(fd).st_size
+ self.map = mmap.mmap(fd, self.size, access=mmap.ACCESS_READ)
+ self.eod = uint32_unpack(self.map[:4])
+ self.findstart()
+ self.loop = 0 # number of hash slots searched under this key
+ # initialized if loop is nonzero
+ self.khash = 0
+ self.hpos = 0
+ self.hslots = 0
+ # initialized if findnext() returns 1
+ self.dpos = 0
+ self.dlen = 0
+
+ def close(self):
+ self.map.close()
+
+ def __iter__(self, fn=None):
+ len = 2048
+ while len < self.eod:
+ klen, vlen = struct.unpack("<LL", self.map[len:len+8])
+ len += 8
+ key = self.map[len:len+klen]
+ len += klen
+ val = self.map[len:len+vlen]
+ len += vlen
+ if fn:
+ yield fn(key, val)
+ else:
+ yield (key, val)
+
+ def iteritems(self):
+ return self.__iter__()
+
+ def iterkeys(self):
+ return self.__iter__(lambda k,v: k)
+
+ def itervalues(self):
+ return self.__iter__(lambda k,v: v)
+
+ def items(self):
+ ret = []
+ for i in self.iteritems():
+ ret.append(i)
+ return ret
+
+ def keys(self):
+ ret = []
+ for i in self.iterkeys():
+ ret.append(i)
+ return ret
+
+ def values(self):
+ ret = []
+ for i in self.itervalues():
+ ret.append(i)
+ return ret
+
+ def findstart(self):
+ self.loop = 0
+
+ def read(self, n, pos):
+ # XXX add code for platforms without mmap
+ return self.map[pos:pos+n]
+
+ def match(self, key, pos):
+ if key == self.read(len(key), pos):
+ return 1
+ else:
+ return 0
+
+ def findnext(self, key):
+ if not self.loop:
+ u = cdb_hash(key)
+ buf = self.read(8, u << 3 & 2047)
+ self.hslots = uint32_unpack(buf[4:])
+ if not self.hslots:
+ raise KeyError
+ self.hpos = uint32_unpack(buf[:4])
+ self.khash = u
+ u >>= 8
+ u %= self.hslots
+ u <<= 3
+ self.kpos = self.hpos + u
+
+ while self.loop < self.hslots:
+ buf = self.read(8, self.kpos)
+ pos = uint32_unpack(buf[4:])
+ if not pos:
+ raise KeyError
+ self.loop += 1
+ self.kpos += 8
+ if self.kpos == self.hpos + (self.hslots << 3):
+ self.kpos = self.hpos
+ u = uint32_unpack(buf[:4])
+ if u == self.khash:
+ buf = self.read(8, pos)
+ u = uint32_unpack(buf[:4])
+ if u == len(key):
+ if self.match(key, pos + 8):
+ dlen = uint32_unpack(buf[4:])
+ dpos = pos + 8 + len(key)
+ return self.read(dlen, dpos)
+ raise KeyError
+
+ def __getitem__(self, key):
+ self.findstart()
+ return self.findnext(key)
+
+ def get(self, key, default=None):
+ self.findstart()
+ try:
+ return self.findnext(key)
+ except KeyError:
+ return default
+
+def cdb_dump(infile):
+ """dump a database in djb's cdbdump format"""
+ db = Cdb(infile)
+ for key,value in db.iteritems():
+ print "+%d,%d:%s->%s" % (len(key), len(value), key, value)
+ print
+
+def cdb_make(outfile, items):
+ pos = 2048
+ tables = {} # { h & 255 : [(h, p)] }
+
+ # write keys and data
+ outfile.seek(pos)
+ for key, value in items:
+ outfile.write(uint32_pack(len(key)) + uint32_pack(len(value)))
+ h = cdb_hash(key)
+ outfile.write(key)
+ outfile.write(value)
+ tables.setdefault(h & 255, []).append((h, pos))
+ pos += 8 + len(key) + len(value)
+
+ final = ''
+ # write hash tables
+ for i in range(256):
+ entries = tables.get(i, [])
+ nslots = 2*len(entries)
+ final += uint32_pack(pos) + uint32_pack(nslots)
+ null = (0, 0)
+ table = [null] * nslots
+ for h, p in entries:
+ n = (h >> 8) % nslots
+ while table[n] is not null:
+ n = (n + 1) % nslots
+ table[n] = (h, p)
+ for h, p in table:
+ outfile.write(uint32_pack(h) + uint32_pack(p))
+ pos += 8
+
+ # write header (pointers to tables and their lengths)
+ outfile.flush()
+ outfile.seek(0)
+ outfile.write(final)
+
+class CdbMake(object):
+ def __init__(self, outfile):
+ self.pos = 2048
+ self.outfile = outfile
+ self.outfile.seek(self.pos)
+ self.tables = {}
+
+ def add(self, key, value):
+ outfile = self.outfile
+ outfile.write(uint32_pack(len(key)) + uint32_pack(len(value)))
+ h = cdb_hash(key)
+ outfile.write(key)
+ outfile.write(value)
+ self.tables.setdefault(h & 255, []).append((h, self.pos))
+ self.pos += 8 + len(key) + len(value)
+
+ def finish(self):
+ final = ''
+ tables = self.tables
+ pos = self.pos
+ outfile = self.outfile
+
+ # write hash tables
+ for i in range(256):
+ entries = tables.get(i, [])
+ nslots = 2*len(entries)
+ final += uint32_pack(pos) + uint32_pack(nslots)
+ null = (0, 0)
+ table = [null] * nslots
+ for h, p in entries:
+ n = (h >> 8) % nslots
+ while table[n] is not null:
+ n = (n + 1) % nslots
+ table[n] = (h, p)
+ for h, p in table:
+ outfile.write(uint32_pack(h) + uint32_pack(p))
+ pos += 8
+
+ # write header (pointers to tables and their lengths)
+ outfile.flush()
+ outfile.seek(0)
+ outfile.write(final)
+
+
+def test():
+ #db = Cdb(open("t"))
+ #print db['one']
+ #print db['two']
+ #print db['foo']
+ #print db['us']
+ #print db.get('ec')
+ #print db.get('notthere')
+ db = open('test.cdb', 'wb')
+ cdb_make(db,
+ [('one', 'Hello'),
+ ('two', 'Goodbye'),
+ ('foo', 'Bar'),
+ ('us', 'United States'),
+ ])
+ db.close()
+ db = Cdb(open("test.cdb", 'rb'))
+ print db['one']
+ print db['two']
+ print db['foo']
+ print db['us']
+ print db.get('ec')
+ print db.get('notthere')
+
+if __name__ == '__main__':
+ test()
diff --git a/mwlib/cdbwiki.py b/mwlib/cdbwiki.py
new file mode 100755
index 0000000..98bb6a7
--- /dev/null
+++ b/mwlib/cdbwiki.py
@@ -0,0 +1,243 @@
+#! /usr/bin/env python
+
+# Copyright (c) 2007-2008 PediaPress GmbH
+# See README.txt for additional licensing information.
+
+import sys
+import os
+import zlib
+import re
+
+from mwlib import cdb
+
+try:
+ from xml.etree import cElementTree
+except ImportError:
+ import cElementTree
+
+ns = '{http://www.mediawiki.org/xml/export-0.3/}'
+
+wikiindex = "wikiidx"
+wikidata = "wikidata.bin"
+
+
+
+def normname(name):
+ name = name.strip().replace("_", " ")
+ name = name[:1].upper()+name[1:]
+ return name
+
+class Tags:
+ page = ns + 'page'
+
+ # <title> inside <page>
+ title = ns + 'title'
+
+ # <revision> inside <page>
+ revision = ns + 'revision'
+
+ # <id> inside <revision>
+ revid = ns + 'id'
+
+ # <contributor><username> inside <revision>
+ username = ns + 'contributor/' + ns + 'username'
+
+ # <text> inside <revision>
+ text = ns + 'text'
+
+ # <timestamp> inside <revision>
+ timestamp = ns + 'timestamp'
+
+ # <revision><text> inside <page>
+ revision_text = ns + 'revision/' + ns + 'text'
+
+ siteinfo = ns + "siteinfo"
+
+class DumpParser(object):
+ category_ns = set(['category', 'kategorie'])
+ image_ns = set(['image', 'bild'])
+ template_ns = set(['template', 'vorlage'])
+ wikipedia_ns = set(['wikipedia'])
+
+ tags = Tags()
+
+
+ def __init__(self, xmlfilename):
+ self.xmlfilename = xmlfilename
+
+ def _write(self, msg):
+ sys.stdout.write(msg)
+ sys.stdout.flush()
+
+ def openInputStream(self):
+ if self.xmlfilename.lower().endswith(".bz2"):
+ f = os.popen("bunzip2 -c %s" % self.xmlfilename, "r")
+ elif self.xmlfilename.lower().endswith(".7z"):
+ f = os.popen("7z -so x %s" % self.xmlfilename, "r")
+ else:
+ f = open(self.xmlfilename, "r")
+
+ return f
+
+ def __call__(self):
+ f = self.openInputStream()
+
+ count = 0
+ for event, elem in cElementTree.iterparse(f):
+ if elem.tag != self.tags.page:
+ continue
+ self.handlePageElement(elem)
+ elem.clear()
+ count += 1
+
+ if count % 5000 == 0:
+ self._write(" %s\n" % count)
+ elif count % 100 == 0:
+ self._write(".")
+
+
+ def handlePageElement(self, page):
+ title = page.find(self.tags.title).text
+ revisions = page.findall(self.tags.revision)
+ if not revisions:
+ return
+ revision = revisions[-1]
+
+ texttag = revision.find(self.tags.text)
+ timestamptag = revision.find(self.tags.timestamp)
+ revision.clear()
+
+ if texttag is not None:
+ text = texttag.text
+ texttag.clear()
+ else:
+ text = None
+
+ if timestamptag is not None:
+ timestamp = timestamptag.text
+ timestamptag.clear()
+ else:
+ timestamp = None
+
+ if not text:
+ return
+
+ if isinstance(title, str):
+ title = unicode(title)
+ if isinstance(text, str):
+ text = unicode(text)
+
+
+ if ':' in title:
+ ns, rest = title.split(':', 1)
+ ns = ns.lower()
+ if ns not in self.template_ns:
+ return
+ self.handleTemplate(rest, text, timestamp)
+ else:
+ self.handleArticle(title, text, timestamp)
+
+ def handleArticle(self, title, text, timestamp):
+ print "ART:", repr(title), len(text), timestamp
+
+ def handleTemplate(self, title, text, timestamp):
+ print "TEMPL:", repr(title), len(text), timestamp
+
+class BuildWiki(DumpParser):
+ def __init__(self, xmlfilename, outputdir):
+ DumpParser.__init__(self, xmlfilename)
+ self.outputdir = outputdir
+
+ def __call__(self):
+ if not os.path.exists(self.outputdir):
+ os.makedirs(self.outputdir)
+
+ n = os.path.join(self.outputdir, wikiindex)
+ out = open(os.path.join(self.outputdir, wikidata), "wb")
+ self.out = out
+ f = open(n+'.cdb', 'wb')
+ c = cdb.CdbMake(f)
+ self.cdb = c
+
+ DumpParser.__call__(self)
+ c.finish()
+ f.close()
+
+
+ def _writeobj(self, key, val):
+ key = key.encode("utf-8")
+ val = zlib.compress(val)
+ pos = self.out.tell()
+ self.out.write(val)
+ self.cdb.add(key, "%s %s" % (pos, len(val)))
+
+ def handleArticle(self, title, text, timestamp):
+ self._writeobj(u":"+title, text.encode("utf-8"))
+
+ def handleTemplate(self, title, text, timestamp):
+ self._writeobj(u"T:"+title, text.encode("utf-8"))
+
+
+
+class WikiDB(object):
+ redirect_rex = re.compile(r'^#Redirect:?\s*?\[\[(?P<redirect>.*?)\]\]', re.IGNORECASE)
+
+ def __init__(self, dir):
+ self.dir = dir
+ self.obj2pos_path = os.path.join(self.dir, wikidata)
+ self.cdb = cdb.Cdb(open(os.path.join(self.dir, wikiindex+'.cdb'), 'rb'))
+
+ def _readobj(self, key):
+ key = key.encode("utf-8")
+
+ try:
+ data = self.cdb[key]
+ except KeyError:
+ return None
+
+ pos, len = map(int, data.split())
+
+ f=open(self.obj2pos_path, "rb")
+ f.seek(pos)
+ d=f.read(len)
+ f.close()
+ return zlib.decompress(d)
+
+ def getRawArticle(self, title, raw=None, revision=None):
+ title = normname(title)
+ res = self._readobj(":"+title)
+ if res is None:
+ return None
+
+ res = unicode(res, 'utf-8')
+ mo = self.redirect_rex.search(res)
+ if mo:
+ redirect = mo.group('redirect')
+ redirect = normname(redirect.split("|", 1)[0].split("#", 1)[0])
+
+ return self.getRawArticle(redirect)
+
+ return res
+
+ def getTemplate(self, title, followRedirects=False):
+ if ":" in title:
+ title = title.split(':', 1)[1]
+
+ title = normname(title)
+ res = unicode(self._readobj(u"T:"+title) or "", 'utf-8')
+ if not res:
+ return res
+
+ mo = self.redirect_rex.search(res)
+ if mo:
+ redirect = mo.group('redirect')
+ redirect = normname(redirect.split("|", 1)[0].split("#", 1)[0])
+ return self.getTemplate(redirect)
+ return res
+
+
+ def articles(self):
+ for k, v in self.cdb:
+ if k[0]==':':
+ k = unicode(k[1:], "utf-8")
+ yield k
diff --git a/mwlib/dummydb.py b/mwlib/dummydb.py
new file mode 100644
index 0000000..e17a90f
--- /dev/null
+++ b/mwlib/dummydb.py
@@ -0,0 +1,10 @@
+
+# Copyright (c) 2007-2008 PediaPress GmbH
+# See README.txt for additional licensing information.
+
+class DummyDB(object):
+ def getRawArticle(self, name):
+ return None
+
+ def getTemplate(self, name, followRedirects=False):
+ return None
diff --git a/mwlib/expander.py b/mwlib/expander.py
new file mode 100755
index 0000000..a3a529d
--- /dev/null
+++ b/mwlib/expander.py
@@ -0,0 +1,553 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2007-2008 PediaPress GmbH
+# See README.txt for additional licensing information.
+
+from __future__ import with_statement
+import sys
+import re
+import os
+from mwlib import magics
+import mwlib.log
+
+DEBUG = "DEBUG_EXPANDER" in os.environ
+
+
+log = mwlib.log.Log("expander")
+
+splitpattern = """
+({{+) # opening braces
+|(}}+) # closing braces
+|(\[\[|\]\]) # link
+|((?:<noinclude>.*?</noinclude>)|(?:</?includeonly>)) # noinclude, comments: usually ignore
+|(?P<text>(?:<nowiki>.*?</nowiki>) # nowiki
+|(?:<math>.*?</math>)
+|(?:<imagemap[^<>]*>.*?</imagemap>)
+|(?:<gallery[^<>]*>.*?</gallery>)
+|(?:<source[^<>]*>.*?</source>)
+|(?:<pre.*?>.*?</pre>)
+|(?:[:\[\]\|{}<]) # all special characters
+|(?:[^\[\]\|:{}<]*)) # all others
+"""
+
+splitrx = re.compile(splitpattern, re.VERBOSE | re.DOTALL | re.IGNORECASE)
+
+onlyincluderx = re.compile("<onlyinclude>(.*?)</onlyinclude>", re.DOTALL | re.IGNORECASE)
+
+commentrx = re.compile(r"(\n *)?<!--.*?-->( *\n)?", re.DOTALL)
+
+def remove_comments(txt):
+ def repl(m):
+ #print "M:", repr(txt[m.start():m.end()])
+ if txt[m.start()]=='\n' and txt[m.end()-1]=='\n':
+ return '\n'
+ return (m.group(1) or "")+(m.group(2) or "")
+ return commentrx.sub(repl, txt)
+
+def preprocess(txt):
+ txt=txt.replace("\t", " ")
+ txt=remove_comments(txt)
+ return txt
+
+class symbols:
+ bra_open = 1
+ bra_close = 2
+ link = 3
+ noi = 4
+ txt = 5
+
+def old_tokenize(txt):
+ txt = preprocess(txt)
+
+ if "<onlyinclude>" in txt:
+ # if onlyinclude tags are used, only use text between those tags. template 'legend' is a example
+ txt = "".join(onlyincluderx.findall(txt))
+
+
+ tokens = []
+ for (v1, v2, v3, v4, v5) in splitrx.findall(txt):
+ if v5:
+ tokens.append((5, v5))
+ elif v4:
+ tokens.append((4, v4))
+ elif v3:
+ tokens.append((3, v3))
+ elif v2:
+ tokens.append((2, v2))
+ elif v1:
+ tokens.append((1, v1))
+
+ tokens.append((None, ''))
+
+ return tokens
+
+
+def new_tokenize(txt):
+ txt = preprocess(txt)
+
+ import _expander
+
+ if "<onlyinclude>" in txt:
+ # if onlyinclude tags are used, only use text between those tags. template 'legend' is a example
+ txt = "".join(onlyincluderx.findall(txt))
+
+ txt=txt+u'\0'
+ tokens = _expander.scan(txt)
+
+ res = []
+ for t in tokens:
+ type,start,len=t
+ if type:
+ res.append((type, txt[start:start+len]))
+ else:
+ res.append((None, ''))
+
+
+ return res
+
+tokenize = old_tokenize
+
+
+
+class Node(object):
+ def __init__(self):
+ self.children = []
+
+ def __repr__(self):
+ return "<%s %s children>" % (self.__class__.__name__, len(self.children))
+
+ def __iter__(self):
+ for x in self.children:
+ yield x
+
+ def show(self, out=None):
+ show(self, out=out)
+
+class Variable(Node):
+ pass
+
+class Template(Node):
+ pass
+
+def show(node, indent=0, out=None):
+ if out is None:
+ out=sys.stdout
+
+ out.write("%s%r\n" % (" "*indent, node))
+ if isinstance(node, basestring):
+ return
+ for x in node.children:
+ show(x, indent+1, out)
+
+def optimize(node):
+ if isinstance(node, basestring):
+ return node
+
+ if type(node) is Node and len(node.children)==1:
+ return optimize(node.children[0])
+
+ for i, x in enumerate(node.children):
+ node.children[i] = optimize(x)
+ return node
+
+
+class Parser(object):
+ template_ns = set([ ((5, u'Plantilla'), (5, u':')),
+ ])
+
+
+ def __init__(self, txt):
+ self.txt = txt
+ self.tokens = tokenize(txt)
+ self.pos = 0
+
+ def getToken(self):
+ return self.tokens[self.pos]
+
+ def setToken(self, tok):
+ self.tokens[self.pos] = tok
+
+
+ def variableFromChildren(self, children):
+ v=Variable()
+ name = Node()
+ v.children.append(name)
+
+ try:
+ idx = children.index(u"|")
+ except ValueError:
+ name.children = children
+ else:
+ name.children = children[:idx]
+ v.children.extend(children[idx+1:])
+ return v
+
+ def _eatBrace(self, num):
+ ty, txt = self.getToken()
+ assert ty == symbols.bra_close
+ assert len(txt)>= num
+ newlen = len(txt)-num
+ if newlen==0:
+ self.pos+=1
+ return
+
+ if newlen==1:
+ ty = symbols.txt
+
+ txt = txt[:newlen]
+ self.setToken((ty, txt))
+
+
+ def templateFromChildren(self, children):
+ t=Template()
+ # find the name
+ name = Node()
+ t.children.append(name)
+ for idx, c in enumerate(children):
+ if c==u'|':
+ break
+ name.children.append(c)
+
+
+ # find the arguments
+
+
+ arg = Node()
+
+ linkcount = 0
+ for idx, c in enumerate(children[idx+1:]):
+ if c==u'[[':
+ linkcount += 1
+ elif c==']]':
+ linkcount -= 1
+ elif c==u'|' and linkcount==0:
+ t.children.append(arg)
+ arg = Node()
+ continue
+ arg.children.append(c)
+
+
+ if arg.children:
+ t.children.append(arg)
+
+
+ return t
+
+ def parseOpenBrace(self):
+ ty, txt = self.getToken()
+ n = Node()
+
+ numbraces = len(txt)
+ self.pos += 1
+
+ while 1:
+ ty, txt = self.getToken()
+ if ty==symbols.bra_open:
+ n.children.append(self.parseOpenBrace())
+ elif ty is None:
+ break
+ elif ty==symbols.bra_close:
+ closelen = len(txt)
+ if closelen==2 or numbraces==2:
+ t=self.templateFromChildren(n.children)
+ n=Node()
+ n.children.append(t)
+ self._eatBrace(2)
+ numbraces-=2
+ else:
+ v=self.variableFromChildren(n.children)
+ n=Node()
+ n.children.append(v)
+ self._eatBrace(3)
+ numbraces -= 3
+
+ if numbraces==0:
+ break
+ elif numbraces==1:
+ n.children.insert(0, "{")
+ break
+ elif ty==symbols.noi:
+ self.pos += 1 # ignore <noinclude>
+ else: # link, txt
+ n.children.append(txt)
+ self.pos += 1
+
+ return n
+
+ def parse(self):
+ n = Node()
+ while 1:
+ ty, txt = self.getToken()
+ if ty==symbols.bra_open:
+ n.children.append(self.parseOpenBrace())
+ elif ty is None:
+ break
+ elif ty==symbols.noi:
+ self.pos += 1 # ignore <noinclude>
+ else: # bra_close, link, txt
+ n.children.append(txt)
+ self.pos += 1
+ return n
+
+def parse(txt):
+ return optimize(Parser(txt).parse())
+
+class MemoryLimitError(Exception):
+ pass
+
+class LazyArgument(object):
+ def __init__(self, node, expander, variables):
+ self.node = node
+ self.expander = expander
+ self._flatten = None
+ self.variables = variables
+
+ def flatten(self):
+ if self._flatten is None:
+ arg=[]
+ self.expander.flatten(self.node, arg, self.variables)
+
+ arg = u"".join(arg).strip()
+ if len(arg)>256*1024:
+ raise MemoryLimitError("template argument too long: %s bytes" % (len(arg),))
+
+ self._flatten = arg
+ return self._flatten
+
+class ArgumentList(object):
+ class notfound: pass
+
+ def __init__(self):
+ self.args = []
+ self.namedargs = {}
+ def __repr__(self):
+ return "<ARGLIST args=%r>" % ([x.flatten() for x in self.args],)
+ def append(self, a):
+ self.args.append(a)
+
+ def get(self, n, default):
+ return self.__getitem__(n) or default
+
+ def __iter__(self):
+ for x in self.args:
+ yield x
+
+ def __getslice__(self, i, j):
+ for x in self.args[i:j]:
+ yield x.flatten()
+
+ def __len__(self):
+ return len(self.args)
+
+ def __getitem__(self, n):
+ if isinstance(n, (int, long)):
+ try:
+ a=self.args[n]
+ except IndexError:
+ return u""
+ return a.flatten()
+
+ assert isinstance(n, basestring), "expected int or string"
+
+ varcount=1
+ if n not in self.namedargs:
+ for x in self.args:
+ f=x.flatten()
+ if u"=" in f:
+ name, val = f.split(u"=", 1)
+ name = name.strip()
+ val = val.strip()
+ self.namedargs[name] = val
+ if n==name:
+ return val
+ else:
+ name = str(varcount)
+ varcount+=1
+ self.namedargs[name] = f
+
+ if n==name:
+ return f
+ self.namedargs[n] = u''
+
+ val = self.namedargs[n]
+
+ return val
+
+
+class Expander(object):
+ def __init__(self, txt, pagename="", wikidb=None):
+ assert wikidb is not None, "must supply wikidb argument in Expander.__init__"
+ self.db = wikidb
+ self.resolver = magics.MagicResolver(pagename=pagename)
+ self.resolver.wikidb = wikidb
+
+ self.parsed = Parser(txt).parse()
+ #show(self.parsed)
+ self.parsedTemplateCache = {}
+
+ self.blacklist = set()
+ with open("template_blacklist", 'r') as f:
+ for line in f.readlines():
+ self.blacklist.add(line.rstrip())
+
+ def getParsedTemplate(self, name):
+ if name.startswith("[["):
+ return None
+
+ if name.startswith(":"):
+ log.info("including article")
+ raw = self.db.getRawArticle(name[1:])
+ else:
+ name = name[0].capitalize() + name[1:]
+ name = "Plantilla:" + name
+ try:
+ return self.parsedTemplateCache[name]
+ except KeyError:
+ pass
+
+ # Check to see if this is a template in our blacklist --
+ # one that we don't want to bother rendering.
+ if name in self.blacklist:
+ log.info("Skipping template " + name.encode('utf8'))
+ raw = None
+ else:
+ raw = self.db.getTemplate(name, True)
+
+ if raw is None:
+ log.warn("no template", repr(name))
+ res = None
+ else:
+ # add newline to templates starting with a (semi)colon, or tablemarkup
+ # XXX what else? see test_implicit_newline in test_expander
+ if raw.startswith(":") or raw.startswith(";") or raw.startswith("{|"):
+ raw = '\n'+raw
+
+ log.info("parsing template", repr(name))
+ res = Parser(raw).parse()
+ if DEBUG:
+ print "TEMPLATE:", name, repr(raw)
+ res.show()
+
+ self.parsedTemplateCache[name] = res
+ return res
+
+
+ def flatten(self, n, res, variables):
+ if isinstance(n, Template):
+ name = []
+ self.flatten(n.children[0], name, variables)
+ name = u"".join(name).strip()
+ if len(name)>256*1024:
+ raise MemoryLimitError("template name too long: %s bytes" % (len(name),))
+
+ remainder = None
+ if ":" in name:
+ try_name, try_remainder = name.split(':', 1)
+ if self.resolver.has_magic(try_name):
+ name=try_name
+ remainder = try_remainder
+
+ var = ArgumentList()
+
+ varcount = 1 #unnamed vars
+
+ def args():
+ if remainder is not None:
+ tmpnode=Node()
+ tmpnode.children.append(remainder)
+ yield tmpnode
+ for x in n.children[1:]:
+ yield x
+
+ for x in args():
+ var.append(LazyArgument(x, self, variables))
+
+ rep = self.resolver(name, var)
+
+ if rep is not None:
+ res.append(rep)
+ else:
+ p = self.getParsedTemplate(name)
+ if p:
+ if DEBUG:
+ msg = "EXPANDING %r %s ===> " % (name, var)
+ oldidx = len(res)
+ self.flatten(p, res, var)
+
+ if DEBUG:
+ msg += "".join(res[oldidx:])
+ print msg
+
+
+ elif isinstance(n, Variable):
+ name = []
+ self.flatten(n.children[0], name, variables)
+ name = u"".join(name).strip()
+ if len(name)>256*1024:
+ raise MemoryLimitError("template name too long: %s bytes" % (len(name),))
+
+ v = variables.get(name, None)
+
+ if v is None:
+ if len(n.children)>1:
+ self.flatten(n.children[1:], res, variables)
+ else:
+ pass
+ # FIXME. breaks If
+ #res.append(u"{{{%s}}}" % (name,))
+ else:
+ res.append(v)
+ else:
+ for x in n:
+ if isinstance(x, basestring):
+ res.append(x)
+ else:
+ self.flatten(x, res, variables)
+
+ def expandTemplates(self):
+ res = []
+ self.flatten(self.parsed, res, ArgumentList())
+ return u"".join(res)
+
+
+class DictDB(object):
+ """wikidb implementation used for testing"""
+ def __init__(self, *args, **kw):
+ if args:
+ self.d, = args
+ else:
+ self.d = {}
+
+ self.d.update(kw)
+
+ normd = {}
+ for k, v in self.d.items():
+ normd[k.lower()] = v
+ self.d = normd
+
+ def getRawArticle(self, title):
+ return self.d[title.lower()]
+
+ def getTemplate(self, title, dummy):
+ return self.d.get(title.lower(), u"")
+
+def expandstr(s, expected=None, wikidb=None):
+ """debug function. expand templates in string s"""
+ if wikidb:
+ db = wikidb
+ else:
+ db = DictDB(dict(a=s))
+
+ te = Expander(s, pagename="thispage", wikidb=db)
+ res = te.expandTemplates()
+ print "EXPAND: %r -> %r" % (s, res)
+ if expected:
+ assert res==expected, "expected %r, got %r" % (expected, res)
+ return res
+
+if __name__=="__main__":
+ #print splitrx.groupindex
+ d=unicode(open(sys.argv[1]).read(), 'utf8')
+ e = Expander(d)
+ print e.expandTemplates()
diff --git a/mwlib/expr.py b/mwlib/expr.py
new file mode 100755
index 0000000..fa11ce9
--- /dev/null
+++ b/mwlib/expr.py
@@ -0,0 +1,222 @@
+#! /usr/bin/env python
+
+# Copyright (c) 2007-2008 PediaPress GmbH
+# See README.txt for additional licensing information.
+# based on pyparsing example code (SimpleCalc.py)
+
+"""Implementation of mediawiki's #expr template.
+http://meta.wikimedia.org/wiki/ParserFunctions#.23expr:
+"""
+
+from __future__ import division
+
+import re
+import inspect
+import math
+
+class ExprError(Exception):
+ pass
+
+def _myround(a,b):
+ r=round(a, int(b))
+ if int(r)==r:
+ return int(r)
+ return r
+
+
+pattern = """
+(?:\s+)
+|((?:(?:\d+)(?:\.\d+)?
+ |(?:\.\d+)) (?:e(?:\+|-)?\d+)?)
+|(\+|-|\*|/|>=|<=|<>|!=|[a-zA-Z]+|.)
+"""
+
+rxpattern = re.compile(pattern, re.VERBOSE | re.DOTALL | re.IGNORECASE)
+def tokenize(s):
+ res = []
+ for (v1,v2) in rxpattern.findall(s):
+ if not (v1 or v2):
+ continue
+ v2=v2.lower()
+ if v2 in Expr.constants:
+ res.append((v2,""))
+ else:
+ res.append((v1,v2))
+ return res
+
+ return [(v1,v2.lower()) for (v1,v2) in rxpattern.findall(s) if v1 or v2]
+
+class uminus: pass
+class uplus: pass
+
+precedence = {"(":-1, ")":-1}
+functions = {}
+
+def addop(op, prec, fun, numargs=None):
+ precedence[op] = prec
+ if numargs is None:
+ numargs = len(inspect.getargspec(fun)[0])
+
+
+ def wrap(stack):
+ assert len(stack)>=numargs
+ args = tuple(stack[-numargs:])
+ del stack[-numargs:]
+ stack.append(fun(*args))
+
+ functions[op] = wrap
+
+a=addop
+a(uminus, 10, lambda x: -x)
+a(uplus, 10, lambda x: x)
+a("^", 10, math.pow, 2)
+a("not", 9, lambda x:int(not(bool(x))))
+a("abs", 9, abs, 1)
+a("sin", 9, math.sin, 1)
+a("cos", 9, math.cos, 1)
+a("asin", 9, math.asin, 1)
+a("acos", 9, math.acos, 1)
+a("tan", 9, math.tan, 1)
+a("atan", 9, math.atan, 1)
+a("exp", 9, math.exp, 1)
+a("ln", 9, math.log, 1)
+a("ceil", 9, lambda x: int(math.ceil(x)))
+a("floor", 9, lambda x: int(math.floor(x)))
+a("trunc", 9, long, 1)
+
+a("*", 8, lambda x,y: x*y)
+a("/", 8, lambda x,y: x/y)
+a("div", 8, lambda x,y: x/y)
+a("mod", 8, lambda x,y: int(x)%int(y))
+
+
+a("+", 6, lambda x,y: x+y)
+a("-", 6, lambda x,y: x-y)
+
+a("round", 5, _myround)
+
+a("<", 4, lambda x,y: int(x<y))
+a(">", 4, lambda x,y: int(x>y))
+a("<=", 4, lambda x,y: int(x<=y))
+a(">=", 4, lambda x,y: int(x>=y))
+a("!=", 4, lambda x,y: int(x!=y))
+a("<>", 4, lambda x,y: int(x!=y))
+a("=", 4, lambda x,y: int(x==y))
+
+a("and", 3, lambda x,y: int(bool(x) and bool(y)))
+a("or", 2, lambda x,y: int(bool(x) or bool(y)))
+del a
+
+class Expr(object):
+ constants = dict(
+ e=math.e,
+ pi=math.pi)
+
+ def as_float_or_int(self, s):
+ try:
+ return self.constants[s]
+ except KeyError:
+ pass
+
+ if "." in s or "e" in s.lower():
+ return float(s)
+ return long(s)
+
+ def output_operator(self, op):
+ return functions[op](self.operand_stack)
+
+ def output_operand(self, operand):
+ self.operand_stack.append(operand)
+
+ def parse_expr(self, s):
+ tokens = tokenize(s)
+ if not tokens:
+ return ""
+
+ self.operand_stack = []
+ operator_stack = []
+
+ seen_operand=False
+
+ last_operand, last_operator = False, True
+
+ for operand, operator in tokens:
+ if operand:
+ if last_operand:
+ raise ExprError("expected operator")
+ self.output_operand(self.as_float_or_int(operand))
+ elif operator=="(":
+ operator_stack.append("(")
+ elif operator==")":
+ while 1:
+ if not operator_stack:
+ raise ExprError("unbalanced parenthesis")
+ t = operator_stack.pop()
+ if t=="(":
+ break
+ self.output_operator(t)
+ elif operator in precedence:
+ if last_operator and last_operator!=")":
+ if operator=='-':
+ operator = uminus
+ elif operator=='+':
+ operator = uplus
+
+ is_unary = operator in (uplus, uminus)
+ prec = precedence[operator]
+ while not is_unary and operator_stack and prec<=precedence[operator_stack[-1]]:
+ p = operator_stack.pop()
+ self.output_operator(p)
+ operator_stack.append(operator)
+ else:
+ raise ExprError("unknown operator: %r" % (operator,))
+
+ last_operand, last_operator = operand, operator
+
+
+ while operator_stack:
+ p=operator_stack.pop()
+ if p=="(":
+ raise ExprError("unbalanced parenthesis")
+ self.output_operator(p)
+
+ if len(self.operand_stack)!=1:
+ raise ExprError("bad stack: %s" % (self.operand_stack,))
+
+ return self.operand_stack[-1]
+
+def expr(s):
+ return Expr().parse_expr(s)
+
+def main():
+ ParseException = ExprError
+ import time
+ try:
+ import readline # do not remove. makes raw_input use readline
+ readline
+ except ImportError:
+ pass
+
+ ep = expr
+
+ while 1:
+ input_string = raw_input("> ")
+ if not input_string:
+ continue
+
+ stime = time.time()
+ try:
+ res=expr(input_string)
+ except Exception, err:
+ print "ERROR:", err
+ import traceback
+ traceback.print_exc()
+
+ continue
+ print res
+ print time.time()-stime, "s"
+
+if __name__=='__main__':
+ main()
+
+
diff --git a/mwlib/htmlwriter.py b/mwlib/htmlwriter.py
new file mode 100755
index 0000000..dabb979
--- /dev/null
+++ b/mwlib/htmlwriter.py
@@ -0,0 +1,436 @@
+#! /usr/bin/env python
+
+# Copyright (c) 2007-2008 PediaPress GmbH
+# See README.txt for additional licensing information.
+
+import os
+from mwlib import parser, rendermath, timeline
+
+import urllib
+import cgi
+
+#from PIL import Image
+
+from mwlib.log import Log
+
+log = Log("htmlwriter")
+
+class HTMLWriter(object):
+ imglevel = 0
+ namedLinkCount = 1
+ def __init__(self, out, images=None, math_renderer=None):
+ self.out = out
+ self.level = 0
+ self.images = images
+ # self.images = imgdb.ImageDB(os.path.expanduser("~/images"))
+ self.references = []
+ if math_renderer is None:
+ self.math_renderer = rendermath.Renderer()
+ else:
+ self.math_renderer = math_renderer
+
+ def _write(self, s):
+ self.out.write(cgi.escape(s).encode('utf8'))
+
+ def getCategoryList(self, obj):
+ categories = list(set(c.target for c in obj.find(parser.CategoryLink)))
+ categories.sort()
+ return categories
+
+ def write(self, obj):
+ m = "write" + obj.__class__.__name__
+ m=getattr(self, m)
+ m(obj)
+
+ def ignore(self, obj):
+ pass
+
+ def serializeVList(self,vlist):
+ args = []
+ styleArgs = []
+ gotClass = 0
+ gotExtraClass = 0
+ for (key,value) in vlist.items():
+ if isinstance(value, (basestring, int)):
+ if key=="class":
+ args.append('%s="%s"' % (key, value))
+ gotClass = 1
+ else:
+ args.append('%s="%s"' % (key, value))
+ if isinstance(value, dict) and key=="style":
+ for (_key,_value) in value.items():
+ styleArgs.append("%s:%s" % (_key, _value))
+ args.append(' style="%s"' % ';'.join(styleArgs))
+ gotExtraClass = 1
+ return ' '.join(args)
+
+
+ def writeMagic(self, m):
+ if m.values.get('html'):
+ for x in m.children:
+ self.write(x)
+
+ def writeCaption(self, obj):
+ # todo- A table contained a Caption node, causing an exception in write.
+ # Not sure what the HTML should be, if any.
+ pass
+
+ def writeSection(self, obj):
+ header = "h%s" % (obj.level)
+ self.out.write("<%s>" % header)
+ self.write(obj.children[0])
+ self.out.write("</%s>" % header)
+
+ self.level += 1
+ for x in obj.children[1:]:
+ self.write(x)
+ self.level -= 1
+
+ def writePreFormatted(self, n):
+ self.out.write("<pre>")
+ for x in n:
+ self.write(x)
+ self.out.write("</pre>")
+
+ def writeNode(self, n):
+ for x in n:
+ self.write(x)
+
+ def writeCell(self, cell):
+ svl = ""
+ if cell.vlist:
+ svl = self.serializeVList(cell.vlist)
+
+ self.out.write('<td %s>' % svl)
+ for x in cell:
+ self.write(x)
+ self.out.write("</td>")
+
+ def writeTagNode(self, t):
+ if t.caption == 'ref':
+ self.references.append(t)
+ self.out.write("<sup>%s</sup>" % len(self.references))
+ return
+ elif t.caption == 'references':
+ if not self.references:
+ return
+
+ self.out.write("<ol>")
+ for r in self.references:
+ self.out.write("<li>")
+ for x in r:
+ self.write(x)
+ self.out.write("</li>")
+ self.out.write("</ol>")
+
+ self.references = []
+ return
+ elif t.caption=='imagemap':
+ # FIXME. this is not complete. t.imagemap.entries should also be handled.
+ print "WRITEIMAGEMAP:", t.imagemap
+ if t.imagemap.imagelink:
+ self.write(t.imagemap.imagelink)
+ return
+
+
+ self.out.write(t.starttext.encode('utf8'))
+ for x in t:
+ self.write(x)
+ self.out.write(t.endtext.encode('utf8'))
+
+ def writeRow(self, row):
+ self.out.write('<tr>')
+ for x in row:
+ self.write(x)
+
+ self.out.write('</tr>')
+
+ def writeTable(self, t):
+ svl = ""
+ if t.vlist:
+ svl = self.serializeVList(t.vlist)
+
+
+
+ self.out.write("<table %s>" % svl)
+ if t.caption:
+ self.out.write("<caption>")
+ self.write(t.caption)
+ self.out.write("<caption>")
+ for x in t:
+ self.write(x)
+ self.out.write("</table>")
+
+ def writeMath(self, obj):
+ latex = obj.caption
+ #p = self.math_renderer.render(latex)
+ self.out.write('<tt>%s</tt>' % latex)
+
+ def writeURL(self, obj):
+ self.out.write('<a href="%s" class="offsite" ttid="externallink">' % obj.caption)
+ if obj.children:
+ for x in obj.children:
+ self.write(x)
+ else:
+ self.out.write(obj.caption)
+
+ self.out.write('&nbsp;<img src="/static/outgoing_link.gif" /></a>')
+
+ def writeNamedURL(self, obj):
+ self.out.write('<a href="%s" class="offsite" ttid="externallink">' % obj.caption)
+ if obj.children:
+ for x in obj.children:
+ self.write(x)
+ else:
+ name = "[%s]" % self.namedLinkCount
+ self.namedLinkCount += 1
+ self.out.write(name)
+
+ self.out.write('&nbsp;<img src="/static/outgoing_link.gif" /></a>')
+
+
+ def writeParagraph(self, obj):
+ self.out.write("\n<p>")
+ for x in obj:
+ self.write(x)
+ self.out.write("</p>\n")
+
+ def getHREF(self, obj):
+ parts = obj.target.encode('utf-8').split('#')
+ parts[0] = parts[0].replace(" ", "_")
+
+
+ return '../%s/' % ("#".join([urllib.quote(x) for x in parts]))
+
+ writeLangLink = ignore
+
+ def writeLink(self, obj):
+ if obj.target is None:
+ return
+
+ href = self.getHREF(obj)
+ if href is not None:
+ self.out.write('<a href="%s" class="normallink">' % (href,))
+ else:
+ self.out.write('<a class="deadlink">')
+ if obj.children:
+ for x in obj.children:
+ self.write(x)
+ else:
+ self._write(obj.target)
+
+ self.out.write("</a>")
+
+ def writeSpecialLink(self, obj):
+ if obj.children:
+ for x in obj.children:
+ self.write(x)
+ else:
+ self._write(obj.target)
+
+ def writeCategoryLink(self, obj):
+ if obj.colon:
+ if obj.children:
+ for x in obj.children:
+ self.write(x)
+ else:
+ self._write(obj.target)
+
+ def writeTimeline(self, obj):
+ img = timeline.drawTimeline(obj.caption)
+ if img is None:
+ return
+
+ target = "/timeline/"+os.path.basename(img)
+ width, height = Image.open(img).size
+
+ self.out.write('<img src="%s" width="%s" height="%s" />' % (target, width, height))
+
+ def writeImageLink(self, obj):
+ """
+ <span class='image'>
+ <span class='left'>
+ <img src='bla' />
+ <span class='imagecaption'>bla bla</span>
+ <span/>
+ <span/>
+ """
+
+ if self.images is None:
+ return
+
+ width = obj.width
+ height = obj.height
+
+ #if not width:
+ # width = 400 # what could be a sensible default if no width is given? maybe better 0?
+
+ if width:
+ path = self.images.getPath(obj.target, size=max(width, height))
+ url = self.images.getURL(obj.target, size=max(width, height))
+ else:
+ path = self.images.getPath(obj.target)
+ url = self.images.getURL(obj.target)
+
+ if url is None:
+ return
+
+ if isinstance(path, str):
+ path = unicode(path, 'utf8')
+
+ if self.imglevel==0:
+ self.imglevel += 1
+
+ # WTB: Added the ability to not specify width & height since images may not be found locally.
+ # This may have to be redone eventually, perhaps we need a database of image dimensions,
+ # but I doubt it. Besides, more hardcoded pathnames in 'getimg'?
+ try:
+ def getimg():
+ return Image.open(path)
+ img = None
+
+ if not width:
+ if not img:
+ img = getimg()
+ size = img.size
+ width = min(400, size[0])
+
+ if not height:
+ if not img:
+ img = getimg()
+ size = img.size
+ height = size[1]*width/size[0]
+ except IOError, err:
+ log.warn("Image.open failed:", err, "path=", repr(path))
+ # WTB: Removed following return as images will not always be found locally.
+ #self.imglevel -= 1
+ #return
+
+ attr = ''
+ attr_css = ''
+
+ if width:
+ attr += "width='%d' " % width
+ attr_css += "width:%dpx " % width
+
+ if height:
+ attr += "height='%d' " % height
+ # WTB: Note- height not applied to CSS.
+
+ if obj.isInline():
+ self.out.write('<img src="%s" %s/>' % (url.encode("utf8"), attr.encode("utf8")))
+ else:
+ # WTB: This looked like a mistake to me, it was modifying obj.align instead of align.
+ # This function should not modify obj at all.
+ align = obj.align
+ if obj.thumb == True and not align:
+ align = "clear right"
+ self.out.write('''<div class="bbotstyle image %s" style="%s">'''% (align, attr_css))
+ self.out.write('<img src="%s" %s/>' % (url.encode("utf8"), attr.encode("utf8")))
+
+ self.out.write('<span class="imagecaption">')
+ for x in obj.children:
+ self.write(x)
+ self.out.write('</span></div>')
+ self.imglevel -= 1
+ else:
+ self.out.write('<a href="%s">' % url)
+ for x in obj.children:
+ self.write(x)
+ self.out.write('</a>')
+
+ def writeText(self, t):
+ #self.out.write(cgi.escape(t.caption).encode('ascii', 'xmlcharrefreplace'))
+ self._write(t.caption)
+
+ writeControl = writeText
+
+ def writeArticle(self, a):
+ if a.caption:
+ self.out.write("<h1>")
+ self._write(a.caption)
+ self.out.write(' <font size=1>&middot; <a class="offsite" ')
+ self.out.write('href="http://es.wikipedia.org/wiki/')
+ self._write(a.caption)
+ self.out.write('">De Wikipedia, la enciclopedia libre</a>')
+ self.out.write("</font>")
+ self.out.write('</h1>')
+
+ for x in a:
+ self.write(x)
+
+ self.out.write("\n<br/>")
+
+ def writeStyle(self, s):
+ if s.caption == "''":
+ tag = 'em'
+ elif s.caption=="'''''":
+ self.out.write("<strong><em>")
+ for x in s:
+ self.write(x)
+ self.out.write("</em></strong>")
+ return
+ elif s.caption == "'''":
+ tag = 'strong'
+ elif s.caption == ";":
+ self.out.write("<div><strong>")
+ for x in s:
+ self.write(x)
+ self.out.write("</strong></div>")
+ return
+
+ elif s.caption.startswith(":"):
+ self.out.write("<blockquote>"*len(s.caption))
+ for x in s:
+ self.write(x)
+ self.out.write("</blockquote>"*len(s.caption))
+ return
+ elif s.caption == "overline":
+ self.out.write('<u style="text-decoration: overline;">')
+ for x in s:
+ self.write(x)
+ self.out.write('</u>')
+ return
+ else:
+ tag = s.caption
+
+
+ self.out.write("<%s>" % tag)
+ for x in s:
+ self.write(x)
+ self.out.write("</%s>" % tag)
+
+ def writeItem(self, item):
+ self.out.write("<li>")
+ for x in item:
+ self.write(x)
+ self.out.write("</li>\n")
+
+ def writeItemList(self, lst):
+ if lst.numbered:
+ tag = "ol"
+ else:
+ tag = "ul"
+
+ self.out.write("<%s>" % tag)
+
+ for x in lst:
+ self.write(x)
+ self.out.write("\n")
+
+ self.out.write("</%s>" % tag)
+
+
+class NoLinksWriter(HTMLWriter):
+ """Subclass that ignores (non-outgoing) links"""
+
+ def writeLink(self, obj):
+ if obj.target is None:
+ return
+
+ if obj.children:
+ for x in obj.children:
+ self.write(x)
+ else:
+ self._write(obj.target)
+
diff --git a/mwlib/imgmap.py b/mwlib/imgmap.py
new file mode 100755
index 0000000..80bb826
--- /dev/null
+++ b/mwlib/imgmap.py
@@ -0,0 +1,122 @@
+#! /usr/bin/env python
+
+# Copyright (c) 2007-2008 PediaPress GmbH
+# See README.txt for additional licensing information.
+
+from pyparsing import (Literal, restOfLine, Word, nums, Group,
+ ZeroOrMore, OneOrMore, And, Suppress, LineStart,
+ LineEnd, StringEnd, ParseException, Optional, White)
+
+class gob(object):
+ def __init__(self, **kw):
+ self.__dict__.update(kw)
+
+ def __repr__(self):
+ return "<%s %r>" % (self.__class__.__name__, self.__dict__)
+
+class Poly(gob): pass
+class Rect(gob): pass
+class Circle(gob): pass
+class Comment(gob): pass
+class Desc(gob): pass
+class Default(gob): pass
+class ImageMap(gob): pass
+
+def _makepoly(tokens):
+ return Poly(caption=tokens[2].strip(), vertices=list(tokens[1]))
+
+def _makerect(tokens):
+ return Rect(caption=tokens[-1].strip(), top_left=tuple(tokens[1]), bottom_right=tuple(tokens[2]))
+
+def _makecomment(tokens):
+ return Comment(comment=tokens[1])
+
+def _makecircle(tokens):
+ return Circle(caption=tokens[3].strip(), center=tokens[1], radius=tokens[2])
+
+def _makedesc(tokens):
+ return Desc(location=tokens[1])
+
+def _makeimagemap(tokens):
+ image = None
+ for x in tokens:
+ if isinstance(x, basestring):
+ image = x
+ break
+ return ImageMap(entries=list(tokens), image=image)
+
+
+comment = (Literal('#')+restOfLine).setParseAction(_makecomment)
+
+integer = Word(nums).setParseAction(lambda s: int(s[0]))
+integer_pair = (integer+integer).setParseAction(lambda x: tuple(x))
+
+poly = Literal("poly")+Group(ZeroOrMore(integer_pair))+restOfLine
+poly = poly.setParseAction(_makepoly)
+
+rect = Literal("rect")+integer_pair+integer_pair+restOfLine
+rect = rect.setParseAction(_makerect)
+
+circle = Literal("circle")+integer_pair+integer+restOfLine
+circle = circle.setParseAction(_makecircle)
+
+desc = Literal("desc") + (Literal("top-right")
+ |Literal("bottom-right")
+ |Literal("bottom-left")
+ |Literal("top-left")
+ |Literal("none"))
+desc = desc.setParseAction(_makedesc)
+default = Literal("default")+restOfLine
+default.setParseAction(lambda t: Default(caption=t[1].strip()))
+
+
+def _makeother(tokens):
+ if not tokens[0]:
+ return [None]
+ return tokens
+
+# we can't use restOfLine.setParseAction(_makeother) as that sets the
+# parse action for any occurence of restOfLine
+
+other = And([restOfLine]).setParseAction(_makeother)
+line = Suppress(LineStart()) + (comment | poly | rect | circle | desc | default | other) + Suppress(LineEnd())
+imagemap = ZeroOrMore(line) + StringEnd()
+imagemap.setParseAction(_makeimagemap)
+
+def ImageMapFromString(s):
+ # uhh. damn. can't get pyparsing to parse
+ # commands, other lines (i.e. syntax errors strictly speaking)
+ # and lines containing only whitespace...
+ lines = []
+ for x in s.split("\n"):
+ x=x.strip()
+ if x:
+ lines.append(x)
+ s="\n".join(lines)
+
+ try:
+ return imagemap.parseString(s)[0]
+ except ParseException, err:
+ return ImageMap(entries=[], image=None)
+
+def main():
+ ex="""
+
+
+Image:Foo.jpg|200px|picture of a foo
+poly 131 45 213 41 210 110 127 109 [[Display]]
+poly 104 126 105 171 269 162 267 124 [[Keyboard]]
+rect 15 95 94 176 [[Foo type A]]
+# A comment, this line is ignored
+circle 57 57 20 [[Foo type B]]
+desc bottom-left
+default [[Mainz]]
+---dfg-sdfg--sdfg
+blubb
+"""
+ res = ImageMapFromString(ex)
+ for x in res.entries:
+ print x
+
+if __name__=='__main__':
+ main()
diff --git a/mwlib/lang.py b/mwlib/lang.py
new file mode 100755
index 0000000..ca122a0
--- /dev/null
+++ b/mwlib/lang.py
@@ -0,0 +1,10 @@
+#! /usr/bin/env python
+
+# Copyright (c) 2007-2008 PediaPress GmbH
+# See README.txt for additional licensing information.
+
+import os
+languages = set(open(os.path.join(os.path.dirname(__file__), 'lang.txt')).read().split())
+
+
+
diff --git a/mwlib/lang.txt b/mwlib/lang.txt
new file mode 100644
index 0000000..9dfb78e
--- /dev/null
+++ b/mwlib/lang.txt
@@ -0,0 +1,253 @@
+af
+als
+am
+an
+ang
+ar
+ast
+az
+ba
+be
+bg
+bm
+bn
+bo
+br
+bs
+ca
+ceb
+chr
+co
+cs
+csb
+cv
+cy
+da
+de
+el
+en
+eo
+es
+et
+eu
+fa
+fi
+fiu-vro
+fo
+fr
+frp
+fur
+fy
+ga
+gd
+gl
+gn
+gu
+he
+hi
+hr
+ht
+hu
+hy
+ia
+id
+ie
+ilo
+io
+is
+it
+iu
+ja
+jbo
+jv
+ka
+kg
+km
+kn
+ko
+ks
+ksh
+ku
+kw
+ky
+la
+lad
+lb
+li
+ln
+lt
+lv
+mg
+mi
+mk
+ml
+mo
+mr
+ms
+mt
+my
+na
+nah
+nap
+nb
+nds
+nds-nl
+ng
+nl
+nn
+no
+nrm
+nv
+oc
+os
+pam
+pap
+pdc
+pl
+ps
+pt
+rm
+ro
+roa-rup
+ru
+sa
+sc
+scn
+sco
+se
+sh
+si
+simple
+sk
+sl
+sm
+so
+sq
+sr
+st
+su
+sv
+sw
+ta
+te
+th
+tk
+tl
+to
+tpi
+tr
+tt
+ug
+uk
+ur
+vec
+vi
+vo
+wa
+war
+yi
+za
+zh
+zh-min-nan
+zh-yue
+pms
+dv
+got
+haw
+wo
+tet
+qu
+lmo
+be-x-old
+hsb
+pag
+bat-smg
+bpy
+lij
+udm
+nov
+cbk-zam
+arc
+kab
+ru-sib
+diq
+gv
+zea
+wuu
+cdo
+lg
+hak
+ty
+lo
+tn
+ti
+tg
+dk
+yo
+dz
+vls
+bar
+eml
+bxr
+ee
+rmy
+eve
+zu
+rw
+new
+rn
+xal
+bh
+bi
+wp
+om
+glk
+tw
+or
+aa
+xh
+ch
+ce
+cr
+fj
+cu
+zh-tw
+pa
+chy
+pi
+hz
+ho
+bug
+uz
+mn
+ik
+ss
+kj
+bet
+pih
+ab
+ve
+ak
+ii
+as
+av
+ay
+ig
+nan
+zh-min-nan
+ne
+ny
+sn
+ff
+mh
+mzn
+kk
+ki
+kl
+kv
+sg
+sd
+roa-tara
+zh-classical
+zh-cn
+map-bms
diff --git a/mwlib/licenses.py b/mwlib/licenses.py
new file mode 100644
index 0000000..2e96efd
--- /dev/null
+++ b/mwlib/licenses.py
@@ -0,0 +1,185 @@
+#! /usr/bin/env python
+
+"""Mapping of lower-cased template names of licenses to their normalized name.
+This file has been automatically generated with tools/get_license_templates.py
+"""
+
+lower2normal = {u'attr-tartu': u'Attr-Tartu',
+ u'attribution': u'Attribution',
+ u'attribution entomart': u'Attribution Entomart',
+ u'attribution-ubisoft': u'Attribution-Ubisoft',
+ u'attribution-ubisoft/ja': u'Attribution-Ubisoft/ja',
+ u'attribution-ubisoft/ko': u'Attribution-Ubisoft/ko',
+ u'attribution-ubisoft/nl': u'Attribution-Ubisoft/nl',
+ u'attribution/lv': u'Attribution/lv',
+ u'attribution/zh-hant': u'Attribution/zh-hant',
+ u'autotravel': u'Autotravel',
+ u'bsdu': u'BSDu',
+ u'cc-by-1.0': u'Cc-by-1.0',
+ u'cc-by-1.0-nl': u'Cc-by-1.0-nl',
+ u'cc-by-2.0': u'Cc-by-2.0',
+ u'cc-by-2.0-be': u'Cc-by-2.0-be',
+ u'cc-by-2.0-br': u'Cc-by-2.0-br',
+ u'cc-by-2.0-cl': u'Cc-by-2.0-cl',
+ u'cc-by-2.0-es': u'Cc-by-2.0-es',
+ u'cc-by-2.0-fr': u'Cc-by-2.0-fr',
+ u'cc-by-2.0-it': u'Cc-by-2.0-it',
+ u'cc-by-2.0-kr': u'Cc-by-2.0-kr',
+ u'cc-by-2.0-nl': u'Cc-by-2.0-nl',
+ u'cc-by-2.0-uk': u'Cc-by-2.0-uk',
+ u'cc-by-2.1-au': u'Cc-by-2.1-au',
+ u'cc-by-2.1-es': u'Cc-by-2.1-es',
+ u'cc-by-2.1-jp': u'Cc-by-2.1-jp',
+ u'cc-by-2.5': u'Cc-by-2.5',
+ u'cc-by-2.5-bg': u'Cc-by-2.5-bg',
+ u'cc-by-2.5-br': u'Cc-by-2.5-br',
+ u'cc-by-2.5-dk': u'Cc-by-2.5-dk',
+ u'cc-by-2.5-in': u'Cc-by-2.5-in',
+ u'cc-by-2.5-it': u'Cc-by-2.5-it',
+ u'cc-by-2.5-my': u'Cc-by-2.5-my',
+ u'cc-by-2.5-nl': u'Cc-by-2.5-nl',
+ u'cc-by-2.5-pl': u'Cc-by-2.5-pl',
+ u'cc-by-2.5-se': u'Cc-by-2.5-se',
+ u'cc-by-3.0': u'Cc-by-3.0',
+ u'cc-by-3.0-gr': u'Cc-by-3.0-gr',
+ u'cc-by-3.0-indiafm': u'Cc-by-3.0-IndiaFM',
+ u'cc-by-3.0-nl': u'Cc-by-3.0-nl',
+ u'cc-by-3.0-rs': u'Cc-by-3.0-rs',
+ u'cc-by-3.0-us': u'Cc-by-3.0-us',
+ u'cc-by-nc-sa-2.0-dual': u'Cc-by-nc-sa-2.0-dual',
+ u'cc-by-sa-1.0': u'Cc-by-sa-1.0',
+ u'cc-by-sa-1.0-fi': u'Cc-by-sa-1.0-fi',
+ u'cc-by-sa-1.0-tw': u'Cc-by-sa-1.0-tw',
+ u'cc-by-sa-2.0': u'Cc-by-sa-2.0',
+ u'cc-by-sa-2.0-at': u'Cc-by-sa-2.0-at',
+ u'cc-by-sa-2.0-be': u'Cc-by-sa-2.0-be',
+ u'cc-by-sa-2.0-br': u'Cc-by-sa-2.0-br',
+ u'cc-by-sa-2.0-ca': u'Cc-by-sa-2.0-ca',
+ u'cc-by-sa-2.0-cl': u'Cc-by-sa-2.0-cl',
+ u'cc-by-sa-2.0-de': u'Cc-by-sa-2.0-de',
+ u'cc-by-sa-2.0-es': u'Cc-by-sa-2.0-es',
+ u'cc-by-sa-2.0-fr': u'Cc-by-sa-2.0-fr',
+ u'cc-by-sa-2.0-it': u'Cc-by-sa-2.0-it',
+ u'cc-by-sa-2.0-kr': u'Cc-by-sa-2.0-kr',
+ u'cc-by-sa-2.0-nl': u'Cc-by-sa-2.0-nl',
+ u'cc-by-sa-2.0-tw': u'Cc-by-sa-2.0-tw',
+ u'cc-by-sa-2.0-uk': u'Cc-by-sa-2.0-uk',
+ u'cc-by-sa-2.1-au': u'Cc-by-sa-2.1-au',
+ u'cc-by-sa-2.1-es': u'Cc-by-sa-2.1-es',
+ u'cc-by-sa-2.1-jp': u'Cc-by-sa-2.1-jp',
+ u'cc-by-sa-2.5': u'Cc-by-sa-2.5',
+ u'cc-by-sa-2.5,1.0': u'Cc-by-sa-2.5,1.0',
+ u'cc-by-sa-2.5,2.0,1.0': u'Cc-by-sa-2.5,2.0,1.0',
+ u'cc-by-sa-2.5,2.0,1.0-no-link': u'Cc-by-sa-2.5,2.0,1.0-no-link',
+ u'cc-by-sa-2.5-ar': u'Cc-by-sa-2.5-ar',
+ u'cc-by-sa-2.5-au': u'Cc-by-sa-2.5-au',
+ u'cc-by-sa-2.5-bg': u'Cc-by-sa-2.5-bg',
+ u'cc-by-sa-2.5-br': u'Cc-by-sa-2.5-br',
+ u'cc-by-sa-2.5-ca': u'Cc-by-sa-2.5-ca',
+ u'cc-by-sa-2.5-ch': u'Cc-by-sa-2.5-ch',
+ u'cc-by-sa-2.5-cl': u'Cc-by-sa-2.5-cl',
+ u'cc-by-sa-2.5-cn': u'Cc-by-sa-2.5-cn',
+ u'cc-by-sa-2.5-de': u'Cc-by-sa-2.5-de',
+ u'cc-by-sa-2.5-dk': u'Cc-by-sa-2.5-dk',
+ u'cc-by-sa-2.5-es': u'Cc-by-sa-2.5-es',
+ u'cc-by-sa-2.5-hu': u'Cc-by-sa-2.5-hu',
+ u'cc-by-sa-2.5-in': u'Cc-by-sa-2.5-in',
+ u'cc-by-sa-2.5-it': u'Cc-by-sa-2.5-it',
+ u'cc-by-sa-2.5-mx': u'Cc-by-sa-2.5-mx',
+ u'cc-by-sa-2.5-nl': u'Cc-by-sa-2.5-nl',
+ u'cc-by-sa-2.5-pl': u'Cc-by-sa-2.5-pl',
+ u'cc-by-sa-2.5-pt': u'Cc-by-sa-2.5-pt',
+ u'cc-by-sa-2.5-se': u'Cc-by-sa-2.5-se',
+ u'cc-by-sa-2.5-si': u'Cc-by-sa-2.5-si',
+ u'cc-by-sa-2.5-tw': u'Cc-by-sa-2.5-tw',
+ u'cc-by-sa-3.0': u'Cc-by-sa-3.0',
+ u'cc-by-sa-3.0,2.5,2.0,1.0': u'Cc-by-sa-3.0,2.5,2.0,1.0',
+ u'cc-by-sa-3.0,2.5,2.0,1.0-no-link': u'Cc-by-sa-3.0,2.5,2.0,1.0-no-link',
+ u'cc-by-sa-3.0-gr': u'Cc-by-sa-3.0-gr',
+ u'cc-by-sa-3.0-nl': u'Cc-by-sa-3.0-nl',
+ u'cc-by-sa-3.0-rs': u'Cc-by-sa-3.0-rs',
+ u'cc-by-sa-3.0-tw': u'Cc-by-sa-3.0-tw',
+ u'cc-by-sa-3.0-us': u'Cc-by-sa-3.0-us',
+ u'cc-by-sa-jul': u'Cc-by-sa-jul',
+ u'cecill': u'CeCILL',
+ u'cng': u'CNG',
+ u'elephants dream': u'Elephants Dream',
+ u'fal': u'FAL',
+ u'geograph': u'Geograph',
+ u'gfdl': u'GFDL',
+ u'gfdl or cc-by-nc-sa': u'GFDL or cc-by-nc-sa',
+ u'gfdl or cc-by-nc-sa/2.5': u'GFDL or cc-by-nc-sa/2.5',
+ u'gfdl-1.2': u'GFDL-1.2',
+ u'gfdl-1.2-en': u'GFDL-1.2-en',
+ u'gfdl-1.2/es': u'GFDL-1.2/es',
+ u'gfdl-1.2/vi': u'GFDL-1.2/vi',
+ u'gfdl-cc-triple': u'GFDL-CC-triple',
+ u'gfdl-dd': u'GFDL-DD',
+ u'gfdl-en': u'GFDL-en',
+ u'gfdl-en/bg': u'GFDL-en/bg',
+ u'gfdl-en/fr': u'GFDL-en/fr',
+ u'gfdl-en/pl': u'GFDL-en/pl',
+ u'gfdl-gmt': u'GFDL-GMT',
+ u'gfdl-is': u'GFDL-IS',
+ u'gfdl-it': u'GFDL-it',
+ u'gfdl-ja': u'GFDL-ja',
+ u'gfdl-landsat-kashmir3d': u'GFDL-Landsat-Kashmir3d',
+ u'gfdl-opengeodb': u'GFDL-OpenGeoDB',
+ u'gfdl-retouched': u'GFDL-retouched',
+ u'gfdl-samoborac': u'GFDL-Samoborac',
+ u'gfdl-self': u'GFDL-self',
+ u'gfdl-user': u'GFDL-user',
+ u'gfdl-user-als': u'GFDL-user-als',
+ u'gfdl-user-ar': u'GFDL-user-ar',
+ u'gfdl-user-bat-smg': u'GFDL-user-bat-smg',
+ u'gfdl-user-bs': u'GFDL-user-bs',
+ u'gfdl-user-cs': u'GFDL-user-cs',
+ u'gfdl-user-da': u'GFDL-user-da',
+ u'gfdl-user-de': u'GFDL-user-de',
+ u'gfdl-user-el': u'GFDL-user-el',
+ u'gfdl-user-en-no-disclaimers': u'GFDL-user-en-no-disclaimers',
+ u'gfdl-user-en-note': u'GFDL-user-en-note',
+ u'gfdl-user-en-with-disclaimers': u'GFDL-user-en-with-disclaimers',
+ u'gfdl-user-es': u'GFDL-user-es',
+ u'gfdl-user-fa': u'GFDL-user-fa',
+ u'gfdl-user-fi': u'GFDL-user-fi',
+ u'gfdl-user-fr': u'GFDL-user-fr',
+ u'gfdl-user-gl': u'GFDL-user-gl',
+ u'gfdl-user-he': u'GFDL-user-he',
+ u'gfdl-user-hi': u'GFDL-user-hi',
+ u'gfdl-user-hu': u'GFDL-user-hu',
+ u'gfdl-user-id': u'GFDL-user-id',
+ u'gfdl-user-it': u'GFDL-user-it',
+ u'gfdl-user-ja': u'GFDL-user-ja',
+ u'gfdl-user-ko': u'GFDL-user-ko',
+ u'gfdl-user-lt': u'GFDL-user-lt',
+ u'gfdl-user-nl': u'GFDL-user-nl',
+ u'gfdl-user-nn': u'GFDL-user-nn',
+ u'gfdl-user-no': u'GFDL-user-no',
+ u'gfdl-user-pl': u'GFDL-user-pl',
+ u'gfdl-user-pt': u'GFDL-user-pt',
+ u'gfdl-user-ru': u'GFDL-user-ru',
+ u'gfdl-user-sk': u'GFDL-user-sk',
+ u'gfdl-user-sq': u'GFDL-user-sq',
+ u'gfdl-user-tr': u'GFDL-user-tr',
+ u'gfdl-user-uk': u'GFDL-user-uk',
+ u'gfdl-user-vi': u'GFDL-user-vi',
+ u'gfdl-user-vls': u'GFDL-user-vls',
+ u'gfdl-user-w': u'GFDL-user-w',
+ u'gfdl-user-zh': u'GFDL-user-zh',
+ u'gpl': u'GPL',
+ u'gplv2 only': u'GPLv2 only',
+ u'gplv3': u'GPLv3',
+ u'inewton': u'INewton',
+ u'lgpl': u'LGPL',
+ u'mdb': u'MdB',
+ u'met.no': u'Met.no',
+ u'norges golfforbund': u'Norges Golfforbund',
+ u'open font': u'Open Font',
+ u'parlament.ch': u'Parlament.ch',
+ u'picswiss': u'Picswiss',
+ u'polishsenatecopyright': u'PolishSenateCopyright',
+ u'stationsweb': u'Stationsweb',
+ u'statistics netherlands map': u'Statistics Netherlands map',
+ u'swiss government portrait': u'Swiss Government Portrait',
+ u'www.nordenskirker.dk': u'Www.nordenskirker.dk'}
diff --git a/mwlib/log.py b/mwlib/log.py
new file mode 100755
index 0000000..9afef8f
--- /dev/null
+++ b/mwlib/log.py
@@ -0,0 +1,53 @@
+#! /usr/bin/env python
+
+# Copyright (c) 2007-2008 PediaPress GmbH
+# See README.txt for additional licensing information.
+
+import sys
+
+class Stdout(object):
+ """late-bound sys.stdout"""
+ def write(self, msg):
+ sys.stdout.write(msg)
+
+ def flush(self):
+ sys.stdout.flush()
+
+class Stderr(object):
+ """late-bound sys.stderr"""
+ def write(self, msg):
+ sys.stderr.write(msg)
+
+ def flush(self):
+ sys.stderr.flush()
+
+class Log(object):
+ logfile = Stderr()
+
+ def __init__(self, prefix=None):
+ if prefix is None:
+ self._prefix = []
+ else:
+ if isinstance(prefix, basestring):
+ self._prefix = [prefix]
+ else:
+ self._prefix = prefix
+
+ def __getattr__(self, name):
+ return Log([self, name])
+
+ def __nonzero__(self):
+ return bool(self._prefix)
+
+ def __str__(self):
+ return ".".join(str(x) for x in self._prefix if x)
+
+ def __call__(self, msg, *args):
+ if not self.logfile:
+ return
+
+ if args:
+ msg = " ".join(([msg] + [repr(x) for x in args]))
+
+ s = "%s >> %s\n" % (".".join(str(x) for x in self._prefix if x), msg)
+ self.logfile.write(s)
diff --git a/mwlib/magics.py b/mwlib/magics.py
new file mode 100755
index 0000000..4246ba5
--- /dev/null
+++ b/mwlib/magics.py
@@ -0,0 +1,469 @@
+#! /usr/bin/env python
+
+# Copyright (c) 2007-2008 PediaPress GmbH
+# See README.txt for additional licensing information.
+
+"""expand magic variables/colon functions
+http://meta.wikimedia.org/wiki/Help:Colon_function
+http://meta.wikimedia.org/wiki/Help:Magic_words
+http://meta.wikimedia.org/wiki/ParserFunctions
+"""
+
+import datetime
+import urllib
+from mwlib.log import Log
+from mwlib import expr
+
+log = Log("expander")
+
+def singlearg(fun):
+ def wrap(self, args):
+ rl=args
+ if not rl:
+ a=u''
+ else:
+ a=rl[0]
+
+ return fun(self, a)
+
+ return wrap
+
+def noarg(fun):
+ def wrap(self, *args):
+ return fun(self)
+ return wrap
+
+def as_numeric(x):
+ try:
+ return int(x)
+ except ValueError:
+ pass
+ return float(x)
+
+
+def maybe_numeric_compare(a,b):
+ if a==b:
+ return True
+ try:
+ a=as_numeric(a)
+ b=as_numeric(b)
+ except ValueError:
+ return False
+
+ return a==b
+
+
+class OtherMagic(object):
+ def DEFAULTSORT(self, args):
+ """see http://en.wikipedia.org/wiki/Template:DEFAULTSORT"""
+ return u""
+
+
+class TimeMagic(object):
+ now = datetime.datetime.now()
+
+ @noarg
+ def CURRENTDAY(self):
+ """Displays the current day in numeric form."""
+ return "%s" % self.now.day
+
+ @noarg
+ def CURRENTDAY2(self):
+ """[MW1.5+] Ditto with leading zero 01 .. 31)."""
+ return "%02d" % self.now.day
+
+ @noarg
+ def CURRENTDAYNAME(self):
+ """Displays the current day in named form."""
+ return self.now.strftime("%A")
+
+ @noarg
+ def CURRENTDOW(self):
+ """current day as number (0=Sunday, 1=Monday...)."""
+ return str((self.now.weekday()+1) % 7)
+
+ @noarg
+ def CURRENTMONTH(self):
+ """The number 01 .. 12 of the current month."""
+ return "%02d" % self.now.month
+
+ @noarg
+ def CURRENTMONTHABBREV(self):
+ """[MW1.5+] current month abbreviated Jan .. Dec."""
+ return self.now.strftime("%b")
+
+ @noarg
+ def CURRENTMONTHNAME(self):
+ """current month in named form January .. December. """
+ return self.now.strftime("%B")
+
+ @noarg
+ def CURRENTTIME(self):
+ """The current time of day (00:00 .. 23:59)."""
+ return self.now.strftime("%H:%M")
+
+ @noarg
+ def CURRENTWEEK(self):
+ """Number of the current week (1-53) according to ISO 8601 with no leading zero."""
+ return str(self.now.isocalendar()[1])
+
+ @noarg
+ def CURRENTYEAR(self):
+ """Returns the current year."""
+ return str(self.now.year)
+
+ @noarg
+ def CURRENTTIMESTAMP(self):
+ """[MW1.7+] Returns the current time stamp. e.g.: 20060528125203"""
+ return self.now.strftime("%Y%m%d%H%M%S")
+
+ def MONTHNAME(self, args):
+ rl = args
+ if not rl:
+ return u"Missing required parameter 1=month!"
+ try:
+ m=int(rl[0].strip()) % 12
+ except ValueError:
+ return u"month should be an integer"
+ if m==0:
+ m=12
+
+ return datetime.datetime(2000, m, 1).strftime("%B")
+
+class PageMagic(object):
+ def __init__(self, pagename='', server="http://en.wikipedia.org", revisionid=0):
+ self.pagename = pagename
+ self.server = server
+ self.revisionid = revisionid
+
+ def PAGENAME(self, args):
+ """Returns the name of the current page, including all levels (Title/Subtitle/Sub-subtitle)"""
+ return self.pagename
+
+ def PAGENAMEE(self, args):
+ """same as PAGENAME but More URL-friendly percent encoded
+ special characters (To use an articlename in an external link).
+ """
+ return urllib.quote(self.pagename.encode('utf8'))
+
+
+ def SUBPAGENAME(self, args):
+ """[MW1.6+] Returns the name of the current page, excluding parent
+ pages ('Title/Subtitle' becomes 'Subtitle').
+ """
+ return self.pagename.split('/')[-1]
+
+ def SUBPAGENAMEE(self, args):
+ return urllib.quote(self.SUBPAGENAMEE())
+
+ def BASEPAGENAME(self, args):
+ """[MW1.7+] The basename of a subpage ('Title/Subtitle' becomes 'Title')
+ """
+ return self.pagename.rsplit('/', 1)[0]
+
+ def BASEPAGENAMEE(self, args):
+ """[MW1.7+] The basename of a subpage ('Title/Subtitle' becomes 'Title')
+ """
+ return urllib.quote(self.BASEPAGENAME(args))
+
+ def NAMESPACE(self, args):
+ """Returns the name of the namespace the current page resides in."""
+ return u"" # we currently only have articles living in the main/empty namespace
+
+ def NAMESPACEE(self, args):
+ """Returns the name of the namespace the current page resides in. (quoted)"""
+ return urllib.quote(self.NAMESPACE(args))
+
+ def REVISIONID(self, args):
+ """[MW1.5+] The unique identifying number of a page, see Help:Diff."""
+ return str(self.revisionid)
+
+ @noarg
+ def SITENAME(self):
+ """Value of $wgSitename."""
+ return ""
+
+ def NS(self, args):
+ """Returns the name of a given namespace number."""
+ return "++NS not implemented++"
+
+ def LOCALURL(self, args):
+ """Returns the local URL of a given page. The page might not exist."""
+ try:
+ url = "/wiki"+ "".join(args)
+ except:
+ url = '' # FIXME
+ return "/wiki"+url
+
+ def LOCALURLE(self, args):
+ """Returns the local URL of a given page. The page might not exist."""
+ return urllib.quote(self.LOCALURL(args))
+
+ def URLENCODE(self, args):
+ """[MW1.7+] To use a variable (parameter in a template) with spaces in an external link."""
+ try:
+ url = urllib.quote_plus("".join(args[0]))
+ except:
+ url = "".join(args[0])
+ return url
+
+ @noarg
+ def SERVER(self):
+ """Value of $wgServer"""
+ return self.server
+
+ def FULLURL(self, args):
+ return u''
+ u = "".join(args)
+ self.SERVERNAME({})
+
+ @noarg
+ def SERVERNAME(self):
+ return self.SERVER({})[len("http://"):]
+
+
+class NumberMagic(object):
+ def DISPLAYTITLE(self, args):
+ """[MW 1.7+] (unclear)"""
+ return ""
+
+ def NUMBEROFARTICLES(self, args):
+ """A variable which returns the total number of articles on the Wiki."""
+ return "0"
+
+ def NUMBEROFPAGES(self, args):
+ """[MW1.7+] Returns the total number of pages. """
+ return "0"
+
+ def NUMBEROFFILES(self, args):
+ """[MW1.5+] Returns the number of uploaded files (rows in the image table)."""
+ return "0"
+
+ def NUMBEROFUSERS(self, args):
+ """[MW1.7+] Returns the number of registered users (rows in the user table)."""
+ return "0"
+
+ def CURRENTVERSION(self, args):
+ """[MW1.7+] Returns the current version of MediaWiki being run. [5]"""
+ return "1.7alpha"
+
+
+
+class StringMagic(object):
+ @singlearg
+ def LC(self, a):
+ return a.lower()
+
+ @singlearg
+ def UC(self, a):
+ return a.upper()
+
+ @singlearg
+ def LCFIRST(self, a):
+ return a[:1].lower()+a[1:]
+
+ @singlearg
+ def UCFIRST(self, a):
+ return a[:1].upper()+a[1:]
+
+ @singlearg
+ def FORMATNUM(self, a):
+ return a
+
+class ParserFunctions(object):
+ wikidb = None
+ def _error(self,s):
+ return '<strong class="error">%s</strong>' % (s,)
+
+ def TAG(self, args):
+ name = args[0].strip()
+ r= u"<%s>%s</%s>" % (name, args[1], name)
+ return r
+
+
+ def IF(self, rl):
+ if rl[0]:
+ return rl[1]
+ else:
+ return rl[2]
+
+ def IFEXIST(self, args):
+ name = args[0]
+ if not self.wikidb:
+ return args.get(args[2], "")
+
+ # wrong place. FIXME.
+ if ':' in name:
+ ns, name = name.split(':', 1)
+ if ns.lower() in ['vorlage', 'template']:
+ r=self.wikidb.getTemplate(name)
+ else:
+ r=None
+ else:
+ r=self.wikidb.getRawArticle(name)
+
+ if r:
+ return args[1]
+ else:
+ return args[2]
+
+
+
+ def IFEQ(self, rl):
+ if maybe_numeric_compare(rl[0], rl[1]):
+ return rl[2]
+ else:
+ return rl[3]
+
+ def EXPR(self, rl):
+ if rl:
+ try:
+ r=str(expr.expr(rl[0]))
+ except Exception, err:
+ return self._error(err)
+
+ if "e" in r:
+ f,i = r.split("e")
+ i=int(i)
+ if i<0:
+ sign = ''
+ else:
+ sign = '+'
+ fixed=str(float(f))+"E"+sign+str(int(i))
+ return fixed
+ return r
+ return u"0"
+
+
+ def IFEXPR(self, rl):
+ try:
+ r = expr.expr(rl[0])
+ except Exception, err:
+ return self._error(err)
+
+ if r:
+ return rl[1]
+ else:
+ return rl[2]
+
+ def SWITCH(self, args):
+ """see http://meta.wikimedia.org/wiki/ParserFunctions#.23switch:"""
+ cmpval = args[0].strip()
+ found=False # used for fall through
+ for c in args[1:]:
+ if '=' in c:
+ val, result = c.split('=', 1)
+ val=val.strip()
+ result=result.strip()
+ if found or maybe_numeric_compare(val, cmpval):
+ return result
+ else:
+ if maybe_numeric_compare(cmpval,c.strip()):
+ found=True
+
+ d=args["#default"]
+ if d:
+ return d
+
+
+ last = args[-1]
+
+ if '=' not in last:
+ return last
+ return u''
+
+ def TITLEPARTS(self, args):
+ title = args[0]
+ try:
+ numseg = int(args[1])
+ except ValueError:
+ numseq = 0
+
+ try:
+ start = int(args[2])
+ except ValueError:
+ start = 1
+
+ if start>0:
+ start -= 1
+
+ parts = title.split("/")[start:]
+ if numseg:
+ parts = parts[:numseg]
+ return "/".join(parts)
+
+ def IFERROR(self, args):
+ errmark = '<strong class="error">'
+ val = args[0]
+ bad=args[1]
+ good=args[2] or val
+
+ if errmark in val:
+ return bad
+ else:
+ return good
+
+
+for x in dir(ParserFunctions):
+ if x.startswith("_"):
+ continue
+ setattr(ParserFunctions, "#"+x, getattr(ParserFunctions, x))
+ delattr(ParserFunctions, x)
+
+class DummyResolver(object):
+ pass
+
+class MagicResolver(TimeMagic, PageMagic, NumberMagic, StringMagic, ParserFunctions, OtherMagic, DummyResolver):
+ def __call__(self, name, args):
+ try:
+ name = str(name)
+ except UnicodeEncodeError:
+ return None
+
+
+ m = getattr(self, name.upper(), None)
+ if m is None:
+ return None
+
+ if isinstance(m, basestring):
+ return m
+
+ res = m(args) or '' # FIXME: catch TypeErros
+ assert isinstance(res, basestring), "MAGIC %r returned %r" % (name, res)
+ return res
+
+ def has_magic(self, name):
+ try:
+ name = str(name)
+ except UnicodeEncodeError:
+ return False
+
+
+ m = getattr(self, name.upper(), None)
+ return m is not None
+
+
+
+magic_words = ['basepagename', 'basepagenamee', 'contentlanguage', 'currentday', 'currentday2', 'currentdayname', 'currentdow', 'currenthour', 'currentmonth', 'currentmonthabbrev', 'currentmonthname', 'currentmonthnamegen', 'currenttime', 'currenttimestamp', 'currentversion', 'currentweek', 'currentyear', 'defaultsort', 'directionmark', 'displaytitle', 'fullpagename', 'fullpagenamee', 'language', 'localday', 'localday2', 'localdayname', 'localdow', 'localhour', 'localmonth', 'localmonthabbrev', 'localmonthname', 'localmonthnamegen', 'localtime', 'localtimestamp', 'localweek', 'localyear', 'namespace', 'namespacee', 'newsectionlink', 'numberofadmins', 'numberofarticles', 'numberofedits', 'numberoffiles', 'numberofpages', 'numberofusers', 'pagename', 'pagenamee', 'pagesinnamespace', 'revisionday', 'revisionday2', 'revisionid', 'revisionmonth', 'revisiontimestamp', 'revisionyear', 'scriptpath', 'server', 'servername', 'sitename', 'subjectpagename', 'subjectpagenamee', 'subjectspace', 'subjectspacee', 'subpagename', 'subpagenamee', 'talkpagename', 'talkpagenamee', 'talkspace', 'talkspacee', 'urlencode']
+
+def _populate_dummy():
+ m=MagicResolver()
+
+ def get_dummy(name):
+ def resolve(*args):
+ log.warn("using dummy resolver for %s" % (name,))
+ return u""
+ return resolve
+
+ missing = set()
+ for x in magic_words:
+ if not m.has_magic(x):
+ missing.add(x)
+ setattr(DummyResolver, x.upper(), get_dummy(x))
+
+ if missing:
+ missing = list(missing)
+ missing.sort()
+ #log.info("installed dummy resolvers for %s" % (", ".join(missing),))
+
+_populate_dummy()
diff --git a/mwlib/metabook.py b/mwlib/metabook.py
new file mode 100755
index 0000000..e36e70d
--- /dev/null
+++ b/mwlib/metabook.py
@@ -0,0 +1,119 @@
+#! /usr/bin/env python
+#! -*- coding:utf-8 -*-
+
+import re
+import simplejson
+
+"""
+See METABOOK.txt for description of Metabook data
+"""
+
+class MetaBook(object):
+ """Encapsulate meta information about an article collection"""
+
+ title = u""
+ subtitle = u""
+
+ def __init__(self):
+ self.type = 'collection'
+ self.version = 1
+ self.items = []
+
+ def addArticles(self, articleTitles, chapterTitle=None, contentType='text/x-wiki'):
+ """
+ @param articleTitles: sequence of article titles or dicts containing
+ article title (value for key 'title') and optionally display title
+ (value for key 'displaytitle').
+ @type articleTitles: [unicode|{str: unicode}]
+ """
+
+ articles = []
+ for title in articleTitles:
+ article = {
+ 'type': 'article',
+ 'content-type': contentType,
+ }
+ if isinstance(title, dict):
+ article.update(title)
+ else:
+ article['title'] = title
+ articles.append(article)
+ if chapterTitle:
+ self.items.append({
+ 'type': 'chapter',
+ 'title': chapterTitle,
+ 'items': articles,
+ })
+ else:
+ self.items.extend(articles)
+
+ def dumpJson(self):
+ return simplejson.dumps(vars(self))
+
+ def loadJson(self, jsonStr):
+ for (var, value) in simplejson.loads(jsonStr).items():
+ setattr(self, var, value)
+
+ def readJsonFile(self, filename):
+ self.loadJson(open(filename, 'rb').read())
+
+ def loadCollectionPage(self, mwcollection):
+ """Parse wikitext of a MediaWiki collection page
+
+ @param mwcollection: wikitext of a MediaWiki collection page as created by
+ the Collection extension for MediaWiki
+ @type mwcollection: unicode
+ """
+
+ titleRe = '^==\s+(?P<title>.*?)\s+==$'
+ subtitleRe = '^===\s+(?P<subtitle>.*?)\s+===$'
+ chapterRe = '^;(?P<chapter>.*?)$'
+ articleRe = '^:\[\[:?(?P<article>.*?)(?:\|(?P<displaytitle>.*?))?\]\]$'
+ alltogetherRe = re.compile("(%s)|(%s)|(%s)|(%s)" % (titleRe, subtitleRe, chapterRe, articleRe))
+ gotChapter = False
+ chapter = ''
+ articles = []
+ for line in mwcollection.splitlines():
+ res = alltogetherRe.search(line.strip())
+ if not res:
+ continue
+ if res.group('title'):
+ self.title = res.group('title')
+ elif res.group('subtitle'):
+ self.subtitle = res.group('subtitle')
+ elif res.group('chapter'):
+ self.addArticles(articles, chapter)
+ articles = []
+ chapter = res.group('chapter')
+ elif res.group('article'):
+ d = {'title': res.group('article')}
+ if res.group('displaytitle'):
+ d['displaytitle'] = res.group('displaytitle')
+ articles.append(d)
+
+ if len(articles):
+ self.addArticles(articles, chapter)
+
+ def getArticles(self):
+ """Generator that produces a sequence of (title, revision) pairs for
+ each article contained in this collection. If no revision is specified,
+ None is returned for the revision item.
+ """
+
+ for item in self.getItems():
+ if item['type'] == 'article':
+ yield item['title'], item.get('revision', None)
+
+ def getItems(self):
+ """Generator that produces a flattened list of chapters and articles
+ in this collection.
+ """
+
+ for item in self.items:
+ if item['type'] == 'article':
+ yield item
+ elif item['type'] == 'chapter':
+ yield item
+ for article in item.get('items', []):
+ yield article
+
diff --git a/mwlib/mwapidb.py b/mwlib/mwapidb.py
new file mode 100644
index 0000000..4826ef4
--- /dev/null
+++ b/mwlib/mwapidb.py
@@ -0,0 +1,376 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2008, PediaPress GmbH
+# See README.txt for additional licensing information.
+
+import os
+import re
+import shutil
+import tempfile
+import time
+import urllib
+import urllib2
+import urlparse
+
+import simplejson
+
+from mwlib import uparser, utils
+from mwlib.log import Log
+
+log = Log("mwapidb")
+
+try:
+ from mwlib.licenses import lower2normal
+except ImportError:
+ log.warn('no licenses found')
+ lower2normal = {}
+
+# ==============================================================================
+
+
+def fetch_url(url, ignore_errors=False):
+ log.info("fetching %r" % (url,))
+ opener = urllib2.build_opener()
+ opener.addheaders = [('User-agent', 'mwlib')]
+ try:
+ data = opener.open(url).read()
+ except urllib2.URLError, err:
+ if ignore_errors:
+ log.error("%s - while fetching %r" % (err, url))
+ return None
+ raise RuntimeError('Could not fetch %r: %s' % (url, err))
+ log.info("got %r (%d Bytes)" % (url, len(data)))
+ return data
+
+
+# ==============================================================================
+
+
+class APIHelper(object):
+ def __init__(self, base_url):
+ """
+ @param base_url: base URL (or list of URLs) of a MediaWiki,
+ i.e. URL path to php scripts,
+ e.g. 'http://en.wikipedia.org/w/' for English Wikipedia.
+ @type base_url: basestring or [basestring]
+ """
+
+ if isinstance(base_url, unicode):
+ self.base_url = base_url.encode('utf-8')
+ else:
+ self.base_url = base_url
+ if self.base_url[-1] != '/':
+ self.base_url += '/'
+
+ def query(self, **kwargs):
+ args = {
+ 'action': 'query',
+ 'format': 'json',
+ }
+ args.update(**kwargs)
+ for k, v in args.items():
+ if isinstance(v, unicode):
+ args[k] = v.encode('utf-8')
+ data = fetch_url('%sapi.php?%s' % (self.base_url, urllib.urlencode(args)))
+ if data is None:
+ return None
+ try:
+ return simplejson.loads(unicode(data, 'utf-8'))['query']
+ except KeyError:
+ return None
+ except:
+ raise RuntimeError('api.php query failed. Are you sure you specified the correct baseurl?')
+
+ def page_query(self, **kwargs):
+ q = self.query(**kwargs)
+ if q is None:
+ return None
+ try:
+ page = q['pages'].values()[0]
+ except (KeyError, IndexError):
+ return None
+ if 'missing' in page:
+ return None
+ return page
+
+
+# ==============================================================================
+
+
+class ImageDB(object):
+ def __init__(self, base_url, shared_base_url=None):
+ self.api_helpers = [APIHelper(base_url)]
+ if shared_base_url is not None:
+ self.api_helpers.append(APIHelper(shared_base_url))
+ self.tmpdir = tempfile.mkdtemp()
+
+ def clear(self):
+ shutil.rmtree(self.tmpdir, ignore_errors=True)
+
+ def getURL(self, name, size=None):
+ """Return image URL for image with given name
+
+ @param name: image name (without namespace, i.e. without 'Image:')
+ @type name: unicode
+
+ @returns: URL to original image
+ @rtype: str
+ """
+
+ assert isinstance(name, unicode), 'name must be of type unicode'
+
+ for api_helper in self.api_helpers:
+ if size is None:
+ result = api_helper.page_query(titles='Image:%s' % name, prop='imageinfo', iiprop='url')
+ else:
+ result = api_helper.page_query(titles='Image:%s' % name, prop='imageinfo', iiprop='url', iiurlwidth=str(size))
+ if result is not None:
+ break
+ else:
+ return None
+
+ try:
+ imageinfo = result['imageinfo'][0]
+ if size is not None and 'thumburl' in imageinfo:
+ url = imageinfo['thumburl']
+ else:
+ url = imageinfo['url']
+ if url: # url can be False
+ if url.startswith('/'):
+ url = urlparse.urljoin(self.api_helpers[0].base_url, url)
+ return url
+ return None
+ except (KeyError, IndexError):
+ return None
+
+ def getDiskPath(self, name, size=None):
+ """Return filename for image with given name and size
+
+ @param name: image name (without namespace, i.e. without 'Image:')
+ @type name: unicode
+
+ @param size: if given, the image is converted to the given maximum width
+ @type size: int or NoneType
+
+ @returns: filename of image or None if image could not be found
+ @rtype: basestring
+ """
+
+ assert isinstance(name, unicode), 'name must be of type unicode'
+
+ url = self.getURL(name, size=size)
+ if url is None:
+ return None
+
+ data = fetch_url(url, ignore_errors=True)
+ if not data:
+ return None
+
+ ext = url.rsplit('.')[-1]
+ if size is not None:
+ ext = '%dpx.%s' % (size, ext)
+ else:
+ ext = '.%s' % ext
+ filename = os.path.join(self.tmpdir, utils.fsescape(name + ext))
+ f = open(filename, 'wb')
+ f.write(data)
+ f.close()
+ return filename
+
+ def getLicense(self, name):
+ """Return license of image as stated on image description page
+
+ @param name: image name without namespace (e.g. without "Image:")
+ @type name: unicode
+
+ @returns: license of image of None, if no valid license could be found
+ @rtype: unicode
+ """
+
+ assert isinstance(name, unicode), 'name must be of type unicode'
+
+ for api_helper in self.api_helpers:
+ result = api_helper.page_query(titles='Image:%s' % name, prop='templates')
+ if result is not None:
+ break
+ else:
+ return None
+
+ try:
+ templates = [t['title'] for t in result['templates']]
+ except KeyError:
+ return None
+
+ for t in templates:
+ try:
+ return lower2normal[t.split(':', 1)[-1].lower()]
+ except KeyError:
+ pass
+
+ return None
+
+
+# ==============================================================================
+
+
+class WikiDB(object):
+ print_template = u'Template:Print%s'
+
+ ip_rex = re.compile(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$')
+ bot_rex = re.compile(r'\bbot\b', re.IGNORECASE)
+
+ def __init__(self, base_url, license, template_blacklist=None):
+ """
+ @param base_url: base URL of a MediaWiki,
+ e.g. 'http://en.wikipedia.org/w/'
+ @type base_url: basestring
+
+ @param license: title of an article containing full license text
+ @type license: unicode
+
+ @param template_blacklist: title of an article containing blacklisted
+ templates (optional)
+ @type template_blacklist: unicode
+ """
+
+ self.base_url = base_url
+ self.license = license
+ self.api_helper = APIHelper(self.base_url)
+ self.template_cache = {}
+ self.template_blacklist = []
+ if template_blacklist is not None:
+ raw = self.getRawArticle(template_blacklist)
+ if raw is None:
+ log.error('Could not get template blacklist article %r' % template_blacklist)
+ else:
+ self.template_blacklist = [template.lower().strip()
+ for template in re.findall('\* *\[\[.*?:(.*?)\]\]', raw)]
+
+ def getURL(self, title, revision=None):
+ name = urllib.quote(title.replace(" ", "_").encode('utf-8'))
+ if revision is None:
+ return '%sindex.php?title=%s' % (self.base_url, name)
+ else:
+ return '%sindex.php?title=%s&oldid=%s' % (self.base_url, name, revision)
+
+ def getAuthors(self, title, revision=None, max_num_authors=10):
+ """Return at most max_num_authors names of non-bot, non-anon users for
+ non-minor changes of given article (before given revsion).
+
+ @returns: list of principal authors
+ @rtype: [unicode]
+ """
+
+ result = self.api_helper.page_query(
+ titles=title,
+ redirects=1,
+ prop='revisions',
+ rvprop='user|ids|flags|comment',
+ rvlimit=500,
+ )
+ if result is None:
+ return None
+
+ try:
+ revs = result['revisions']
+ except KeyError:
+ return None
+
+ if revision is not None:
+ revision = int(revision)
+ revs = [r for r in revs if r['revid'] < revision]
+
+ authors = [r['user'] for r in revs
+ if not r.get('anon')
+ and not self.ip_rex.match(r['user'])
+ and not r.get('minor')
+ and not self.bot_rex.search(r.get('comment', ''))
+ and not self.bot_rex.search(r['user'])
+ ]
+ author2count = {}
+ for a in authors:
+ try:
+ author2count[a] += 1
+ except KeyError:
+ author2count[a] = 1
+ author2count = author2count.items()
+ author2count.sort(key=lambda a: -a[1])
+ return [a[0] for a in author2count[:max_num_authors]]
+
+ def getTemplate(self, name, followRedirects=True):
+ """
+ Note: *Not* following redirects is unsupported!
+ """
+
+ try:
+ return self.template_cache[name]
+ except KeyError:
+ pass
+
+ if ":" in name:
+ name = name.split(':', 1)[1]
+
+ if name.lower() in self.template_blacklist:
+ log.info("ignoring blacklisted template:" , repr(name))
+ return None
+
+ for title in (self.print_template % name, 'Template:%s' % name):
+ log.info("Trying template %r" % (title,))
+ c = self.getRawArticle(title)
+ if c is not None:
+ self.template_cache[name] = c
+ return c
+
+ return None
+
+ def getRawArticle(self, title, revision=None):
+ if revision is None:
+ page = self.api_helper.page_query(titles=title, redirects=1, prop='revisions', rvprop='content')
+ else:
+ page = self.api_helper.page_query(revids=revision, prop='revisions', rvprop='content')
+ if page['title'] != title: # given revision could point to another article!
+ return None
+ if page is None:
+ return None
+ try:
+ return page['revisions'][0].values()[0]
+ except KeyError:
+ return None
+
+ def getMetaData(self):
+ result = self.api_helper.query(meta='siteinfo')
+ try:
+ g = result['general']
+ return {
+ 'license': {
+ 'name': g['rights'],
+ 'wikitext': self.getRawArticle(self.license),
+ },
+ 'url': g['base'],
+ 'name': '%s (%s)' % (g['sitename'], g['lang']),
+ }
+ except KeyError:
+ return None
+
+ def getParsedArticle(self, title, revision=None):
+ raw = self.getRawArticle(title, revision=revision)
+ if raw is None:
+ return None
+ a = uparser.parseString(title=title, raw=raw, wikidb=self)
+ return a
+
+
+class Overlay(WikiDB):
+ def __init__(self, wikidb, templates):
+ self.__dict__.update(wikidb.__dict__)
+ self.overlay_templates = templates
+
+ def getTemplate(self, name, followRedirects=False):
+ try:
+ return self.overlay_templates[name]
+ except KeyError:
+ pass
+
+ return super(Overlay, self).getTemplate(name, followRedirects=followRedirects)
+
diff --git a/mwlib/mwscan.py b/mwlib/mwscan.py
new file mode 100755
index 0000000..100ea35
--- /dev/null
+++ b/mwlib/mwscan.py
@@ -0,0 +1,315 @@
+#! /usr/bin/env python
+
+# Copyright (c) 2007-2008 PediaPress GmbH
+# See README.txt for additional licensing information.
+
+import sys
+import time
+import _mwscan
+import htmlentitydefs
+
+class token(object):
+ t_end = 0
+ t_text = 1
+ t_entity = 2
+ t_special = 3
+ t_magicword = 4
+ t_comment = 5
+ t_2box_open = 6
+ t_2box_close = 7
+ t_http_url = 8
+ t_break = 9
+ t_begin_table = 10
+ t_end_table = 11
+ t_html_tag = 12
+ t_style = 13
+ t_pre = 14
+ t_section = 15
+ t_section_end = 16
+ t_item = 17
+ t_colon = 18
+ t_semicolon = 19
+ t_hrule = 20
+ t_newline = 21
+ t_column = 22
+ t_row = 23
+ t_tablecaption = 24
+ t_urllink = 25
+
+ token2name = {}
+
+for d in dir(token):
+ token2name = token.token2name
+ if d.startswith("t_"):
+ token2name[getattr(token, d)] = d
+del d
+
+
+
+
+def dump_tokens(text, tokens):
+ for type, start, len in tokens:
+ print type, repr(text[start:start+len])
+
+def scan(text):
+ stime=time.time()
+ text += u"\0"*32
+ tokens = _mwscan.scan(text)
+ return scan_result(text, tokens)
+
+def resolve_entity(e):
+ if e[1]=='#':
+ if e[2]=='x' or e[2]=='X':
+ return unichr(int(e[3:-1], 16))
+ else:
+ return unichr(int(e[2:-1]))
+
+ else:
+ try:
+ return unichr(htmlentitydefs.name2codepoint[e[1:-1]])
+ except KeyError:
+ return e
+
+
+class scan_result(object):
+ def __init__(self, source, toks):
+ self.source = source
+ self.toks = toks
+
+ def rawtext(self, (type, start, tlen)):
+ return self.source[start:start+tlen]
+
+ def text(self, t):
+ r=self.rawtext(t)
+ if t[0] == token.t_entity:
+ return resolve_entity(r)
+ else:
+ return r
+
+ def dump(self, out=None):
+ if out is None:
+ out = sys.stdout
+ for x in self:
+ out.write("%s\n" % self.repr(x))
+
+
+
+ def repr(self, t):
+ return "(%s, %r)" % (token.token2name.get(t[0]), self.rawtext(t))
+
+
+ def __len__(self):
+ return len(self.toks)
+
+ def __iter__(self):
+ return iter(self.toks)
+
+ def __getitem__(self, idx):
+ return self.toks[idx]
+
+
+class _compat_scanner(object):
+ class ignore: pass
+ tok2compat = {
+ token.t_text: "TEXT",
+ token.t_special: "SPECIAL",
+ token.t_2box_open: "[[",
+ token.t_2box_close: "]]",
+ token.t_http_url: "URL",
+ token.t_break: "BREAK",
+ token.t_style: "STYLE",
+ token.t_pre: "PRE",
+ token.t_section: "SECTION",
+ token.t_section_end: "ENDSECTION",
+ token.t_magicword: ignore,
+ token.t_comment: ignore,
+ token.t_end: ignore,
+ token.t_item: "ITEM",
+ token.t_colon: "EOLSTYLE",
+ token.t_semicolon: "EOLSTYLE",
+ token.t_newline: "\n",
+ token.t_begin_table: "BEGINTABLE",
+ token.t_end_table: "ENDTABLE",
+ token.t_column: "COLUMN",
+ token.t_row: "ROW",
+ token.t_tablecaption: "TABLECAPTION",
+ token.t_urllink: "URLLINK",
+ }
+
+
+ def __call__(self, text):
+ tokens = scan(text)
+ scanres = scan_result(text, tokens)
+
+
+ res = []
+
+ def g():
+ return text[start:start+tlen]
+ a = lambda x: res.append((x,g()))
+
+
+ ignore = self.ignore
+ tok2compat = self.tok2compat
+
+ i = 0
+ numtokens = len(tokens)
+ while i < numtokens:
+ type, start, tlen = tokens[i]
+ n=tok2compat.get(type)
+ if n is ignore:
+ pass
+ elif n is not None:
+ a(n)
+ elif type==token.t_entity:
+ res.append(("TEXT", resolve_entity(g())))
+ elif type==token.t_hrule:
+ res.append((self.tagtoken("<hr />"), g()))
+ elif type==token.t_html_tag:
+ s = g()
+
+ tt = self.tagtoken(s)
+ isEndToken = isinstance(tt, EndTagToken)
+ closingOrSelfClosing = isEndToken or tt.selfClosing
+
+ if tt.t=="math":
+ if closingOrSelfClosing:
+ i+=1
+ continue
+
+ res.append(("MATH", g()))
+ i+=1
+ while i<numtokens:
+ type, start, tlen = tokens[i]
+ if type==token.t_html_tag:
+ tt = self.tagtoken(g())
+ if tt.t=="math":
+ res.append(("ENDMATH", g()))
+ break
+ res.append(("LATEX", g()))
+ i+=1
+ elif tt.t=="timeline":
+ if closingOrSelfClosing:
+ i+=1
+ continue
+ res.append(("TIMELINE", g()))
+ i+=1
+ while i<numtokens:
+ type, start, tlen = tokens[i]
+ if type==token.t_html_tag:
+ tt = self.tagtoken(g())
+ if tt.t=="timeline":
+ res.append(("TIMELINE", g()))
+ break
+ res.append(("TEXT", g()))
+ i+=1
+ elif tt.t=="nowiki":
+ i+=1
+ if isEndToken or tt.selfClosing:
+ continue
+ while i<numtokens:
+ type, start, tlen = tokens[i]
+ if type==token.t_html_tag:
+ tt = self.tagtoken(g())
+ if tt.t=="nowiki":
+ break
+ res.append(("TEXT", scanres.text((type, start, tlen))))
+ i+=1
+ elif tt.t in ["font", "noinclude", 'p', 'caption']:
+ pass
+ elif tt.t=="table":
+ if isEndToken:
+ res.append(("ENDTABLE", g()))
+ else:
+ res.append(("BEGINTABLE", g()))
+ elif tt.t in ['th', 'td']:
+ if isEndToken:
+ pass
+ else:
+ res.append(("COLUMN", g()))
+ elif tt.t=='tr':
+ if isEndToken:
+ pass
+ else:
+ res.append(("ROW", g()))
+ else:
+ res.append((tt, s))
+ else:
+ a(type)
+ i+=1
+
+
+ return res
+
+ def tagtoken(self, text):
+ selfClosing = False
+ if text.startswith(u"</"):
+ name = text[2:-1]
+ klass = EndTagToken
+ isEndToken = True
+ elif text.endswith("/>"):
+ name = text[1:-2]
+ klass = TagToken
+ selfClosing = True
+ isEndToken = False # ???
+ else:
+ name = text[1:-1]
+ klass = TagToken
+ isEndToken = False
+
+ name, values = (name.split(None, 1)+[u''])[:2]
+ from mwlib.parser import paramrx
+ values = dict(paramrx.findall(values))
+ name = name.lower()
+
+ if name=='br' or name=='references':
+ isEndToken = False
+ klass = TagToken
+
+ r = klass(name, text)
+ r.selfClosing = selfClosing
+ r.values = values
+ return r
+
+
+
+compat_scan = _compat_scanner()
+
+# from plexscanner import _BaseTagToken, TagToken, EndTagToken
+
+class _BaseTagToken(object):
+ def __eq__(self, other):
+ if isinstance(other, basestring):
+ return self.t == other
+ if isinstance(other, self.__class__):
+ return self.t == other.t
+ return False
+
+ def __ne__(self, other):
+ return not(self==other)
+
+ def __hash__(self):
+ return hash(self.t)
+
+class TagToken(_BaseTagToken):
+ values = {}
+ selfClosing=False
+
+ def __init__(self, t, text=''):
+ self.t = t
+ self.text = text
+
+ def __repr__(self):
+ return "<Tag:%s %r>" % (self.t, self.text)
+
+class EndTagToken(_BaseTagToken):
+ def __init__(self, t, text=''):
+ self.t = t
+ self.text = text
+
+ def __repr__(self):
+ return "<EndTag:%s>" % self.t
+
+def tokenize(input, name="unknown"):
+ assert input is not None, "must specify input argument in tokenize"
+ return compat_scan(input)
diff --git a/mwlib/netdb.py b/mwlib/netdb.py
new file mode 100755
index 0000000..ec38ee0
--- /dev/null
+++ b/mwlib/netdb.py
@@ -0,0 +1,529 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2008, PediaPress GmbH
+# See README.txt for additional licensing information.
+
+# An alternative solution to construct the hashpath of images would be to use
+# api.php, e.g.
+# fetch the page http://de.wikipedia.org/w/api.php?action=query&titles=Bild:SomePic.jpg&prop=imageinfo&iiprop=url&format=json
+
+import os
+import sys
+import urllib
+import urllib2
+try:
+ from hashlib import md5
+except ImportError:
+ from md5 import md5
+import shutil
+import sys
+import time
+import tempfile
+import re
+
+from mwlib import uparser, utils
+from mwlib.log import Log
+
+log = Log("netdb")
+
+# ==============================================================================
+
+def hashpath(name):
+ """Compute hashpath for an image in the same way as MediaWiki does
+
+ @param name: name of an image
+ @type name: unicode
+
+ @returns: hashpath to image
+ @type: str
+ """
+
+ name = name.replace(' ', '_')
+ name = name[:1].upper()+name[1:]
+ d = md5(name.encode('utf-8')).hexdigest()
+ return "/".join([d[0], d[:2], name])
+
+class ImageDB(object):
+ convert_command = 'convert' # name of/path to ImageMagick's convert tool
+
+ def __init__(self, baseurl, cachedir=None, wikidb=None, knownLicenses=None):
+ """Init ImageDB with a base URL (or a list of base URLs) and optionally
+ with a cache directory.
+
+ @param baseurl: base URL or sequence containing several base URLs
+ @type baseurl: unicode or (unicode,)
+
+ @param cachedir: image cache directory (optional)
+ @type cachedir: basestring or None
+
+ @param wikidb: WikiDB instance used to fetch image description pages to
+ find out image licenses
+ @type wikidb: object
+
+ @param knownLicenses: list of known license templates (whose name is the
+ name of the license) which may appear on image description pages
+ @type knownLicenses: [unicode]
+ """
+
+ if isinstance(baseurl, unicode):
+ self.baseurls = [baseurl.encode('ascii')]
+ else:
+ self.baseurls = []
+ for bu in baseurl:
+ if isinstance(bu, unicode):
+ bu = bu.encode('ascii')
+ self.baseurls.append(bu)
+
+ if cachedir:
+ self.cachedir = cachedir
+ self.tempcache = False
+ else:
+ self.cachedir = tempfile.mkdtemp()
+ self.tempcache = True
+ if self.cachedir[-1] != '/':
+ self.cachedir += '/' # needed for getPath() to work correctly
+
+ self.wikidb = wikidb
+
+ oredLicenses = '|'.join(['(%s)' % re.escape(license)
+ for license in (knownLicenses or [])])
+ self.licenseRegexp = re.compile(r'{{(?P<license>%s)}}' % oredLicenses)
+
+ self.name2license = {}
+
+ def clear(self):
+ """Delete temporary cache directory (i.e. only if no cachedir has been
+ passed to __init__().
+ """
+
+ if self.tempcache:
+ shutil.rmtree(self.cachedir)
+
+ def getURL(self, name, size=None):
+ """Return image URL for image with given name
+
+ @param name: image name (without namespace, i.e. without 'Image:')
+ @type name: unicode
+
+ @returns: URL to original image
+ @rtype: str
+ """
+
+ assert isinstance(name, unicode), 'name must be of type unicode'
+
+ # use getDiskPath() to fetch and cache (!) image
+ path = self.getDiskPath(name, size=size)
+ if path is None:
+ return None
+
+ # first, look for a cached image with that name (in any size)
+ for baseurl in self.baseurls:
+ urldir = self._getCacheDirForBaseURL(baseurl)
+ if not path.startswith(urldir):
+ continue
+ return self._getImageURLForBaseURL(baseurl, name)
+
+ def getPath(self, name, size=None):
+ """Return path to image with given parameters relative to cachedir"""
+
+ path = self.getDiskPath(name, size=size)
+ if path is None:
+ return None
+ assert path.startswith(self.cachedir), 'invalid path from getDiskPath()'
+ return path[len(self.cachedir):]
+
+ def getDiskPath(self, name, size=None):
+ """Return filename for image with given name. If the image is not found
+ in the cache, it is fetched per HTTP and converted.
+
+ @param name: image name (without namespace, i.e. without 'Image:')
+ @type name: unicode
+
+ @param size: if given, the image is converted to the given maximum size
+ (i.e. the image is scaled so that neither its width nor its height
+ exceed size)
+ @type size: int or NoneType
+
+ @returns: filename of image
+ @rtype: basestring
+ """
+
+ assert isinstance(name, unicode), 'name must be of type unicode'
+
+ path = self._getImageFromCache(name, size=size)
+ if path:
+ return path
+
+ tmpfile, baseurl = self._fetchImage(name)
+ if tmpfile is None:
+ return None
+
+ self.name2license[name] = self._fetchLicense(baseurl, name)
+
+ path = self._convertToCache(tmpfile, baseurl, name, size=size)
+
+ try:
+ os.unlink(tmpfile)
+ except IOError:
+ log.warn('Could not delete temp file %r' % tmpfile)
+
+ return path
+
+ def _fetchLicense(self, baseurl, name):
+ if self.wikidb is None:
+ return None
+
+ raw = self.wikidb.getImageDescription(name,
+ urlIndex=self.baseurls.index(baseurl),
+ )
+ if raw is None:
+ return None
+
+ mo = re.search(self.licenseRegexp, raw)
+ if mo is None:
+ return None
+
+ return mo.group('license')
+
+ def getLicense(self, name):
+ """Return license of image as stated on image description page
+
+ @param name: image name without namespace (e.g. without "Image:")
+ @type name: unicode
+
+ @returns: license of image of None, if no valid license could be found
+ @rtype: str
+ """
+
+ assert isinstance(name, unicode), 'name must be of type unicode'
+ return self.name2license.get(name)
+
+ def _getImageFromCache(self, name, size=None):
+ """Look in cachedir for an image with the given parameters"""
+
+ for baseurl in self.baseurls:
+ path = self._getCachedImagePath(baseurl, name, size=size)
+ if path is not None and os.path.exists(path):
+ return path
+ return None
+
+ def _getCacheDirForBaseURL(self, baseurl):
+ """Construct the path of the cache directory for the given base URL.
+ This directory doesn't need to exist.
+ """
+
+ return os.path.join(self.cachedir,
+ md5(baseurl.encode('utf-8')).hexdigest()[:8])
+
+ def _getCachedImagePath(self, baseurl, name, size=None, makedirs=False):
+ """Construct a filename for an image with the given parameters inside
+ the cache directory. The file doesn't need to exist. If makedirs is True
+ create all directories up to filename.
+ """
+
+ urlpart = self._getCacheDirForBaseURL(baseurl)
+ if size is not None:
+ sizepart = '%dpx' % size
+ else:
+ sizepart = 'orig'
+
+ if name.lower().endswith('.svg'):
+ if size is None:
+ log.warn('Cannot get SVG image when no size is given')
+ return None
+ name += '.png'
+ if name.lower().endswith('.gif'):
+ name += '.png'
+ name = (name[0].upper() + name[1:]).replace(' ', '_').replace("'", "-")
+
+ d = os.path.join(urlpart, sizepart)
+ if makedirs and not os.path.isdir(d):
+ os.makedirs(d)
+ return os.path.join(d, utils.fsescape(name))
+
+ def _fetchImage(self, name):
+ """Fetch image with given name in original (i.e. biggest) size per HTTP.
+
+ @returns: filename of written image and base URL used to retrieve the
+ image or (None, None) if the image could not be fetched
+ @rtype: (basestring, str) or (NoneType, NoneType)
+ """
+
+ for baseurl in self.baseurls:
+ path = self._fetchImageFromBaseURL(baseurl, name)
+ if path:
+ return path, baseurl
+ return None, None
+
+ def _getImageURLForBaseURL(self, baseurl, name):
+ """Construct a URL for the image with given name under given base URL"""
+
+ hp = hashpath(name).encode('utf-8')
+ return urllib.basejoin(baseurl, urllib.quote(hp))
+
+ def _fetchImageFromBaseURL(self, baseurl, name):
+ """Fetch image with given name under given baseurl and write it to a
+ tempfile.
+
+ @returns: filename of written image or None if image could not be fetched
+ @rtype: basestring or NoneType
+ """
+
+ url = self._getImageURLForBaseURL(baseurl, name)
+ log.info("fetching %r" % (url,))
+ opener = urllib2.build_opener()
+ opener.addheaders = [('User-agent', 'mwlib')]
+ try:
+ data = opener.open(url).read()
+ log.info("got image: %r" % url)
+ fd, filename = tempfile.mkstemp()
+ os.write(fd, data)
+ os.close(fd)
+ return filename
+ except urllib2.URLError, err:
+ log.error("%s - while fetching %r" % (err, url))
+ return None
+
+ def _convertToCache(self, srcfile, baseurl, name, size=None):
+ """Convert image in file named srcfile to have the given maximum size.
+ Save the converted image in the cache directory for the given baseurl.
+
+ @returns: filename of converted image
+ @rtype: basestring
+ """
+ destfile = self._getCachedImagePath(baseurl, name, size=size, makedirs=True)
+ if size is not None:
+ thumbnail = '-thumbnail "%dx%d>"' % (size, size)
+ else:
+ thumbnail = '-strip'
+
+ opts = '-background white -density 100 -flatten -coalesce %(thumbnail)s' % {
+ 'thumbnail': thumbnail,
+ }
+ cmd = "%(convert)s %(opts)s '%(src)s[0]' '%(dest)s'" % {
+ 'convert': self.convert_command,
+ 'opts': opts,
+ 'src': srcfile,
+ 'dest': destfile,
+ }
+ log.info('executing %r' % cmd)
+ rc = utils.shell_exec(cmd)
+ if rc != 0:
+ log.error('Could not convert %r: convert returned %d' % (name, rc))
+ return None
+
+ return destfile
+
+
+# ==============================================================================
+
+def normname(name):
+ name = name.strip().replace("_", " ")
+ name = name[:1].upper()+name[1:]
+ return name
+
+
+class NetDB(object):
+ redirect_rex = re.compile(r'^#Redirect:?\s*?\[\[(?P<redirect>.*?)\]\]', re.IGNORECASE)
+
+ def __init__(self, pagename,
+ imagedescriptionurls=None,
+ templateurls=None,
+ templateblacklist=None,
+ defaultauthors=None,
+ ):
+ """
+ @param pagename: URL to page in wikitext format. @TITLE@ gets replaced
+ with the page name and @REVISION@ gets replaced with the requested
+ revision/oldid. E.g.
+
+ "http://mw/index.php?title=@TITLE@&action=raw&oldid=@TITLE@"
+
+ @type pagename: str
+
+ @param imagedescriptionurls: list of URLs to image description pages in
+ wikitext format. @TITLE@ gets replaced with the image title w/out
+ its prefix. E.g.
+
+ ["http://mw/index.php?title=Image:@TITLE@s&action=raw"]
+
+ The list must be of the same length as the baseurl list of the
+ accompanying ImageDB, and the URL with the corresponding position
+ in the list is used to retrieve the description page.
+ @type imagedescriptionurls: [str]
+
+ @param templateurls: list of URLs to template pages in wikitext format.
+ @TITLE@ gets replaced with the template title. E.g.
+
+ ["http://mw/index.php?title=Template:@TITLE@s&action=raw"]
+
+ If more than one URL is specified, URLs are tried in given order.
+ @type templateurls: [str]
+
+ @param defaultauthors: list of default (principal) authors for articles
+ @type defaultauthors: [unicode]
+ """
+
+ self.pagename = pagename.replace("%", "%%").replace("@TITLE@", "%(NAME)s").replace("@REVISION@", "%(REVISION)s")
+
+ if templateurls is None:
+ templateurls = []
+ self.templateurls = [x.replace("%", "%%").replace("@TITLE@", "%(NAME)s")
+ for x in templateurls]
+
+ if imagedescriptionurls is None:
+ imagedescriptionurls = []
+ self.imagedescriptionurls = [x.replace("%", "%%").replace("@TITLE@", "%(NAME)s")
+ for x in imagedescriptionurls]
+
+ if templateblacklist:
+ self.templateblacklist = self._readTemplateBlacklist(templateblacklist)
+ else:
+ self.templateblacklist = []
+
+ if defaultauthors:
+ self.defaultauthors = defaultauthors
+ else:
+ self.defaultauthors = []
+
+ self.pages = {}
+
+ def _getpage(self, url, expectedContentType='text/x-wiki'):
+ try:
+ return self.pages[url]
+ except KeyError:
+ pass
+
+ stime=time.time()
+ response = urllib.urlopen(url)
+ data = response.read()
+ log.info('fetched %r in %ss' % (url, time.time()-stime))
+
+ if expectedContentType:
+ ct = response.info().gettype()
+ if ct != expectedContentType:
+ log.warn('Skipping page %r with content-type %r (%r was expected). Skipping.'\
+ % (url, ct, expectedContentType))
+ return None
+
+ self.pages[url] = data
+ return data
+
+ def _readTemplateBlacklist(self,templateblacklist):
+ if not templateblacklist:
+ return []
+ try:
+ content = urllib.urlopen(templateblacklist).read()
+ return [template.lower().strip() for template in re.findall('\* *\[\[.*?:(.*?)\]\]', content)]
+ except: # fixme: more sensible error handling...
+ log.error('Error fetching template blacklist from url:', templateblacklist)
+ return []
+
+ def _dummy(self, *args, **kwargs):
+ pass
+
+ startCache = _dummy
+
+ def getURL(self, title, revision=None):
+ name = urllib.quote(title.replace(" ", "_").encode('utf8'))
+ if revision is None:
+ return self.pagename % dict(NAME=name, REVISION='0')
+ else:
+ return self.pagename % dict(NAME=name, REVISION=revision)
+
+ def getAuthors(self, title, revision=None):
+ return list(self.defaultauthors)
+
+ def title2db(self, title):
+ assert isinstance(title, unicode), 'title must be of type unicode'
+ return title.encode('utf-8')
+
+ def db2title(self, dbtitle):
+ assert isinstance(dbtitle, str), 'dbtitle must be of type str'
+ return unicode(dbtitle, 'utf-8')
+
+ def getImageDescription(self, title, urlIndex=0):
+ """Fetch the image description page for the image with the given title.
+ If baseurl and self.imagedescriptions contains more than one URL, use
+ the one which starts with baseurl.
+
+ @param title: title of the image w/out prefix (like Image:)
+ @type title: unicode
+
+ @param urlIndex: index for imagedescriptionurls
+ @type urlIndex: int
+
+ @returns: wikitext of image description page or None
+ @rtype: unicode or None
+ """
+
+ if not self.imagedescriptionurls:
+ return None
+
+ raw = self._getpage(self.imagedescriptionurls[urlIndex] % {
+ 'NAME': urllib.quote(title.replace(" ", "_").encode('utf8')),
+ })
+ if raw is None:
+ return None
+
+ return unicode(raw, 'utf-8')
+
+ def getTemplate(self, name, followRedirects=False):
+ if ":" in name:
+ name = name.split(':', 1)[1]
+
+
+ if name.lower() in self.templateblacklist:
+ log.info("ignoring blacklisted template:" , repr(name))
+ return None
+ name = urllib.quote(name.replace(" ", "_").encode('utf8'))
+ for u in self.templateurls:
+ url = u % dict(NAME=name)
+ log.info("Trying %r" %(url,))
+ c=self._getpage(url)
+ if c:
+ log.info("got content from", url)
+ res=unicode(c, 'utf8')
+ mo = self.redirect_rex.search(res)
+ if mo:
+ redirect = mo.group('redirect')
+ redirect = normname(redirect.split("|", 1)[0].split("#", 1)[0])
+ return self.getTemplate(redirect)
+ return res
+
+
+
+
+ #return self.getRawArticle(u'Template:%s' % name)
+
+ def getRawArticle(self, title, revision=None):
+ r = self._getpage(self.getURL(title, revision=revision))
+ if r is None:
+ return None
+ return unicode(r, 'utf8')
+
+ def getRedirect(self, title):
+ return u""
+
+ def getParsedArticle(self, title, revision=None):
+ raw = self.getRawArticle(title, revision=revision)
+ if raw is None:
+ return None
+ a = uparser.parseString(title=title, raw=raw, wikidb=self)
+ return a
+
+
+class Overlay(NetDB):
+ def __init__(self, wikidb, templates):
+ self.__dict__.update(wikidb.__dict__)
+ self.overlay_templates = templates
+
+ def getTemplate(self, name, followRedirects=False):
+ try:
+ return self.overlay_templates[name]
+ except KeyError:
+ pass
+
+ return super(Overlay, self).getTemplate(name, followRedirects=followRedirects)
+
diff --git a/mwlib/overlay.py b/mwlib/overlay.py
new file mode 100644
index 0000000..68c4774
--- /dev/null
+++ b/mwlib/overlay.py
@@ -0,0 +1,22 @@
+
+# Copyright (c) 2007-2008 PediaPress GmbH
+# See README.txt for additional licensing information.
+
+import os
+
+class OverlayDB(object):
+ def __init__(self, db, basedir):
+ self.db = db
+ self.basedir = basedir
+
+ def getRawArticle(self, title):
+ p = os.path.join(self.basedir, title)
+ if os.path.isfile(p):
+ return unicode(open(p, 'rb').read(), 'utf-8')
+ return self.db.getRawArticle(title)
+
+ def getTemplate(self, title, followRedirects=False):
+ p = os.path.join(self.basedir, title)
+ if os.path.isfile(p):
+ return unicode(open(p, 'rb').read(), 'utf-8')
+ return self.db.getTemplate(title, followRedirects=followRedirects)
diff --git a/mwlib/parser.py b/mwlib/parser.py
new file mode 100755
index 0000000..08869c5
--- /dev/null
+++ b/mwlib/parser.py
@@ -0,0 +1,1416 @@
+#! /usr/bin/env python
+
+# Copyright (c) 2007-2008 PediaPress GmbH
+# See README.txt for additional licensing information.
+
+import sys
+import os
+import re
+
+from mwlib.scanner import tokenize, TagToken, EndTagToken
+from mwlib.log import Log
+
+log = Log("parser")
+
+
+tag_li = TagToken("li")
+tag_div = TagToken("div")
+
+class TokenSet(object):
+ def __init__(self, lst):
+ self.types = set()
+ self.values = set()
+
+ for x in lst:
+ if isinstance(x, type):
+ self.types.add(x)
+ else:
+ self.values.add(x)
+
+ def __contains__(self, x):
+ return x in self.values or type(x) in self.types
+
+FirstAtom = TokenSet(['TEXT', 'URL', 'SPECIAL', '[[', 'MATH', '\n',
+ 'BEGINTABLE', 'STYLE', 'TIMELINE', 'ITEM', 'URLLINK',
+ TagToken])
+
+FirstParagraph = TokenSet(['SPECIAL', 'URL', 'TEXT', 'TIMELINE', '[[', 'STYLE', 'BEGINTABLE', 'ITEM',
+ 'PRE', 'MATH', '\n', 'PRE', 'EOLSTYLE', 'URLLINK',
+ TagToken])
+
+
+def show(out, node, indent=0):
+ print >>out, " "*indent, node
+ for x in node:
+ show(out, x, indent+1)
+
+
+paramrx = re.compile("(?P<name>\w+) *= *(?P<value>(?:(?:\".*?\")|(?:(?:\w|[%:])+)))")
+def parseParams(s):
+ def style2dict(s):
+ res = {}
+ for x in s.split(';'):
+ if ':' in x:
+ var, value = x.split(':', 1)
+ var=var.strip()
+ value = value.strip()
+ res[var] = value
+
+ return res
+
+ def maybeInt(v):
+ try:
+ return int(v)
+ except:
+ return v
+
+ r = {}
+ for name, value in paramrx.findall(s):
+ if value.startswith('"'):
+ value = value[1:-1]
+
+ if name=='style':
+ value = style2dict(value)
+ r['style'] = value
+ else:
+ r[name] = maybeInt(value)
+ return r
+
+
+
+
+class Node(object):
+ caption = ''
+
+ def __init__(self, caption=''):
+ self.children = []
+ self.caption = caption
+
+ def hasContent(self):
+ for x in self.children:
+ if x.hasContent():
+ return True
+ return False
+
+ def append(self, c, merge=False):
+ if c is None:
+ return
+
+ if merge and type(c)==Text and self.children and type(self.children[-1])==Text:
+ self.children[-1].caption += c.caption
+ else:
+ self.children.append(c)
+
+ def __iter__(self):
+ for x in self.children:
+ yield x
+
+ def __repr__(self):
+ return "%s %r: %s children" % (self.__class__.__name__, self.caption, len(self.children))
+
+ def __eq__(self, other):
+ return (isinstance(other, self.__class__)
+ and self.caption == other.caption
+ and self.children == other.children)
+
+ def __ne__(self, other):
+ return not(self==other)
+
+ def allchildren(self): # name is broken, returns self, which is not a child
+ yield self
+ for c in self.children:
+ for x in c.allchildren():
+ yield x
+
+ def find(self, tp):
+ """find instances of type tp in self.allchildren()"""
+ return [x for x in self.allchildren() if isinstance(x, tp)]
+
+
+ def filter(self, fun):
+ for x in self.allchildren():
+ if fun(x):
+ yield x
+
+ def _asText(self, out):
+ out.write(self.caption)
+ for x in self.children:
+ x._asText(out)
+
+ def asText(self, ):
+ from StringIO import StringIO
+ out = StringIO()
+ self._asText(out)
+ return out.getvalue()
+
+class Math(Node): pass
+class Ref(Node): pass
+class Item(Node): pass
+class ItemList(Node):
+ numbered = False
+ def append(self, node, merge=False):
+ if not isinstance(node, Item):
+ c=Item()
+ c.append(node)
+ self.children.append(c)
+ else:
+ self.children.append(node)
+
+class Style(Node): pass
+class Book(Node): pass
+class Magic(Node): pass
+class Chapter(Node): pass
+class Article(Node): pass
+class Paragraph(Node): pass
+class Section(Node): pass
+class Timeline(Node): pass
+class TagNode(Node): pass
+class PreFormatted(TagNode): pass
+class URL(Node): pass
+class NamedURL(Node): pass
+
+
+
+class _VListNode(Node):
+ def __init__(self, caption=''):
+ Node.__init__(self, caption)
+ self.vlist = {}
+
+ def __repr__(self):
+ return "%s %r %s: %s children" % (self.__class__.__name__, self.caption, self.vlist, len(self.children))
+
+class Table(_VListNode):
+ pass
+
+class Row(_VListNode):
+ pass
+
+class Cell(_VListNode):
+ pass
+
+class Caption(_VListNode):
+ pass
+
+class Link(Node):
+ target = None
+
+ specialPrefixes = set([
+ # English
+ "wikipedia", "wiktionary", "wikibooks", "wikisource", "wikiquote", "meta", "talk",
+ "commons", "wikinews", "template", "wikitravel", "help",
+ # German
+ "vorlage",
+ # Spanish
+ ])
+
+ imageKeywords = set([
+ "image", "imagen", "bild"
+ ])
+
+ categoryKeywords = set([
+ "category", "kategorie"
+ ])
+
+ from mwlib.lang import languages
+ colon = False
+
+ def hasContent(self):
+ if self.target:
+ return True
+ return False
+
+ def _specialize(self):
+ if not self.children:
+ return
+
+ if type(self.children[0]) != Text:
+ return
+
+ self.target = target = self.children[0].caption.strip()
+ del self.children[0]
+ if self.children and self.children[0] == Control("|"):
+ del self.children[0]
+
+ pic = self.target
+ if pic.startswith(':'):
+ self.colon = True
+
+
+
+ # pic == "Bild:Wappen_von_Budenheim.png"
+
+ pic = pic.strip(': ')
+ if ':' not in pic:
+ return
+
+ linktype, pic = pic.split(':', 1)
+ linktype = linktype.lower().strip(" :")
+
+ if linktype in self.categoryKeywords:
+ self.__class__ = CategoryLink
+ self.target = pic.strip()
+ return
+
+ if linktype in self.specialPrefixes:
+ self.__class__ = SpecialLink
+ self.target = pic.strip()
+ self.ns = linktype
+
+ return
+
+ if linktype in self.languages:
+ self.__class__ = LangLink
+ return
+
+
+ if linktype not in self.imageKeywords:
+ # assume a LangLink
+ log.info("Unknown linktype:", repr(linktype))
+ if len(linktype)==2:
+ self.__class__ = LangLink
+ return
+
+
+ # pic == "Wappen_von_Budenheim.png"
+
+ # WTB: See es.wikipedia.org/wiki/Provincia_de_Lima
+ #try:
+ # prefix, suffix = pic.rsplit('.', 1)
+ #except ValueError:
+ # return
+ #if suffix.lower() in ['jpg', 'jpeg', 'gif', 'png', 'svg']:
+
+ self.__class__ = ImageLink
+ self.target = pic.strip()
+
+
+
+ idx = 0
+ last = []
+
+ while idx<len(self.children):
+ x = self.children[idx]
+ if x == Control("|"):
+ if idx:
+ last = self.children[:idx]
+
+ del self.children[:idx+1]
+ idx = 0
+ continue
+
+ if not type(x)==Text:
+ idx += 1
+ continue
+
+ x = x.caption.lower()
+
+ if x == 'thumb' or x=='thumbnail':
+ self.thumb = True
+ del self.children[idx]
+ continue
+
+ if x in ['left', 'right', 'center', 'none']:
+ self.align = x
+ del self.children[idx]
+ continue
+
+ if x == 'frame' or x=='framed' or x=='enframed':
+ self.frame = True
+ del self.children[idx]
+ continue
+
+
+ if x.endswith('px'):
+ # x200px
+ # 100x200px
+ # 200px
+ x = x[:-2]
+ width, height = (x.split('x')+['0'])[:2]
+ try:
+ width = int(width)
+ except ValueError:
+ width = 0
+
+ try:
+ height = int(height)
+ except ValueError:
+ height = 0
+
+ self.width = width
+ self.height = height
+ del self.children[idx]
+ continue
+
+ idx += 1
+
+ if not self.children:
+ self.children = last
+
+class ImageLink(Link):
+ target = None
+ width = None
+ height = None
+ align = ''
+ thumb = False
+ frame = False
+
+ def isInline(self):
+ return not bool(self.align or self.thumb or self.frame)
+
+class LangLink(Link):
+ pass
+
+class CategoryLink(Link):
+ pass
+
+class SpecialLink(Link):
+ pass
+
+
+class Text(Node):
+ def __repr__(self):
+ return repr(self.caption)
+
+ def __init__(self, txt):
+ self.caption = txt
+ self.children = []
+
+ def hasContent(self):
+ if self.caption.strip():
+ return True
+ return False
+
+class Control(Text):
+ pass
+
+def _parseAtomFromString(s):
+ from mwlib import scanner
+ tokens = scanner.tokenize(s)
+ p=Parser(tokens)
+ try:
+ return p.parseAtom()
+ except Exception, err:
+ log.error("exception while parsing %r: %r" % (s, err))
+ return None
+
+
+
+def parse_fields_in_imagemap(imap):
+
+ if imap.image:
+ imap.imagelink = _parseAtomFromString(u'[['+imap.image+']]')
+ if not isinstance(imap.imagelink, ImageLink):
+ imap.imagelink = None
+
+ # FIXME: the links of objects inside 'entries' array should also be parsed
+
+
+def append_br_tag(node):
+ """append a self-closing 'br' TagNode"""
+ br = TagNode("br")
+ br.starttext = '<br />'
+ br.endtext = ''
+ node.append(br)
+
+class Parser(object):
+ def __init__(self, tokens, name=''):
+ self.tokens = tokens
+ self.pos = 0
+ self.name = name
+ self.lastpos = 0
+ self.count = 0
+
+ @property
+ def token(self):
+ t=self.tokens[self.pos]
+ if self.pos == self.lastpos:
+ self.count += 1
+ if self.count > 500:
+ from mwlib.caller import caller
+
+ raise RuntimeError("internal parser error: %s" % ((self.pos, t, caller()), ))
+ else:
+ self.count = 0
+ self.lastpos = self.pos
+
+
+ return t
+
+
+
+ @property
+ def left(self):
+ return self.pos < len(self.tokens)
+
+ def next(self):
+ self.pos += 1
+
+ def parseAtom(self):
+ token = self.token
+
+ if token[0]=='TEXT':
+ self.next()
+ return Text(token[1])
+ elif token[0]=='URL':
+ self.next()
+ return URL(token[1])
+ elif token[0]=='URLLINK':
+ return self.parseUrlLink()
+ elif token[0]=='SPECIAL':
+ self.next()
+ return Text(token[1])
+ elif token[0]=='[[':
+ return self.parseLink()
+ elif token[0]=='MATH':
+ return self.parseMath()
+ elif token[0]=='\n':
+ self.next()
+ return Text(token[1])
+ elif token[0]=='BEGINTABLE':
+ return self.parseTable()
+ elif token[0]=='STYLE':
+ return self.parseStyle()
+ elif token[0]=='TIMELINE':
+ return self.parseTimeline()
+ elif token[0]=='ITEM':
+ return self.parseItemList()
+ elif isinstance(token[0], TagToken):
+ return self.parseTagToken()
+ else:
+ raise RuntimeError("not handled: %s" % (token,))
+
+ def parseUrlLink(self):
+ u = self.token[1][1:]
+ n = Node()
+ n.append(Text("["))
+ n.append(URL(u))
+
+ self.next()
+
+ while self.left:
+ if self.tokens[self.pos:self.pos+2] == [(']]', ']]'), ('SPECIAL', u']')]:
+ self.tokens[self.pos:self.pos+2] = [('SPECIAL', ']'), (']]', ']]')]
+
+ token = self.token
+
+
+ if token[0] == 'SPECIAL' and token[1]==']':
+ self.next()
+ n.__class__ = NamedURL
+ n.caption = u
+ del n.children[:2]
+ break
+ elif token[0] in FirstAtom:
+ n.append(self.parseAtom())
+ else:
+ break
+
+ return n
+
+
+ def parseArticle(self):
+ a=Article(self.name)
+
+ while self.left:
+ token = self.token
+ if token[0] == 'SECTION':
+ a.append(self.parseSection())
+ elif token[0]=='BREAK':
+ self.next()
+ elif token[0] in FirstParagraph:
+ a.append(self.parseParagraph())
+ else:
+ log.info("in parseArticle: skipping", token)
+ self.next()
+
+ return a
+
+ def parseLink(self):
+ break_at = TokenSet(['BREAK', EndTagToken, 'SECTION'])
+
+ obj = Link()
+ self.next()
+ while self.left:
+ token = self.token
+ if token[0] == ']]':
+ self.next()
+ break
+ elif token[0]=='SPECIAL' and token[1]==']':
+ self.next()
+ break
+ elif token[1] == '|' or token[1]=="||":
+ obj.append(Control('|'))
+ self.next()
+ elif token[0]=='TEXT' or token[0]=='SPECIAL' or token[0]=='\n':
+ obj.append(Text(token[1]), merge=True)
+ self.next()
+ elif token[0] in break_at:
+ break
+ elif token[0] in FirstAtom:
+ obj.append(self.parseAtom())
+ elif token[1].startswith("|"):
+ obj.append(Control("|"))
+ obj.append(Text(token[1][1:]))
+ self.next()
+ else:
+ log.info("assuming text in parseLink", token)
+ obj.append(Text(token[1]), merge=True)
+ self.next()
+
+ obj._specialize()
+
+ return obj
+
+ def parseTag(self):
+ token = self.token[0]
+
+ n = TagNode(token.t)
+ if token.values:
+ n.values = token.values
+ n.vlist = parseParams(self.token[1])
+
+ n.starttext = token.text
+ n.endtext = u'</%s>' % token.t
+ self.next()
+
+ if token.selfClosing:
+ return n
+
+
+ end = EndTagToken(token.t)
+
+ while self.left:
+ token = self.token
+ if token[0]==end:
+ n.endtext = token[0].text
+ self.next()
+ break
+ elif token[0]=='BREAK':
+ self.next()
+ else:
+ if token[0] not in FirstParagraph:
+ log.warn("tag not closed", n, token)
+ break
+ n.append(self.parseParagraph())
+
+ return n
+
+ def parsePRETag(self):
+ token = self.token[0]
+ if token.t.lower()=='pre':
+ n=PreFormatted()
+ else:
+ n=TagNode(token.t)
+
+ n.vlist = parseParams(self.token[1])
+
+ end = EndTagToken(self.token[0].t)
+ self.next()
+
+ txt = []
+ while self.left:
+ token = self.token
+ if token[0]==end:
+ self.next()
+ break
+ txt.append(token[1])
+ self.next()
+
+ n.append(Text("".join(txt)))
+ return n
+
+ parseCODETag = parsePRETag
+ parseSOURCETag = parsePRETag
+ def parseA7831D532A30DF0CD772BBC895944EC1Tag(self):
+ p = self.parseTag()
+ p.__class__ = Magic
+ return p
+
+ parseREFTag = parseTag
+ parseREFERENCESTag = parseTag
+
+ parseDIVTag = parseTag
+ parseSPANTag = parseTag
+ parseINDEXTag = parseTag
+ parseTTTag = parseTag
+
+ parseH1Tag = parseTag
+ parseH2Tag = parseTag
+ parseH3Tag = parseTag
+ parseH4Tag = parseTag
+ parseH5Tag = parseTag
+ parseH6Tag = parseTag
+
+ parseINPUTBOXTag = parseTag
+
+ parseRSSTag = parseTag
+
+ parseSTRIKETag = parseTag
+ parseCODETag = parseTag
+ parseDELTag = parseTag
+ parseINSTag = parseTag
+ parseCENTERTag = parseTag
+ parseSTARTFEEDTag = parseTag
+ parseENDFEEDTag = parseTag
+ parseCENTERTag = parseTag
+
+ def parseGALLERYTag(self):
+ node = self.parseTag()
+ txt = "".join(x.caption for x in node.find(Text))
+ #print "GALLERY:", repr(txt)
+
+ children=[]
+
+ lines = [x.strip() for x in txt.split("\n")]
+ for x in lines:
+ if not x:
+ continue
+
+ # either image link or text inside
+ # FIXME: Styles and links in text are ignored!
+ n=_parseAtomFromString(u'[['+x+']]')
+
+ if isinstance(n, ImageLink):
+ children.append(n)
+ else:
+ children.append(Text(x))
+
+ node.children=children
+
+ return node
+
+ def parseIMAGEMAPTag(self):
+ node = self.parseTag()
+ txt = "".join(x.caption for x in node.find(Text))
+ #from mwlib import imgmap
+ #node.imagemap = imgmap.ImageMapFromString(txt)
+
+ class FakeImageMap(object):
+ pass
+
+ node.imagemap = FakeImageMap()
+ node.imagemap.entries = []
+ node.imagemap.imagelink = None
+ match = re.search('Image:.*', txt)
+
+ if match:
+ node.imagemap.image = match.group(0)
+ else:
+ node.imagemap.image = None
+
+ parse_fields_in_imagemap(node.imagemap)
+
+ #print node.imagemap
+ return node
+
+ def parseSection(self):
+ s = Section()
+
+ level = self.token[1].count('=')
+ s.level = level
+ closelevel = 0
+
+ self.next()
+
+ title = Node()
+ while self.left:
+ token = self.token
+
+ if token[0] == 'ENDSECTION':
+ closelevel = self.token[1].count('=')
+ self.next()
+ break
+ elif token[0] == '[[':
+ title.append(self.parseLink())
+ elif token[0] == "STYLE":
+ title.append(self.parseStyle())
+ elif token[0] == 'TEXT':
+ self.next()
+ title.append(Text(token[1]))
+ elif isinstance(token[0], TagToken):
+ title.append(self.parseTagToken())
+ elif token[0] == 'URLLINK':
+ title.append(self.parseUrlLink())
+ elif token[0] == 'MATH':
+ title.append(self.parseMath())
+ else:
+ self.next()
+ title.append(Text(token[1]))
+
+ s.level = min(level, closelevel)
+ if s.level==0:
+ title.children.insert(0, Text("="*level))
+ s.__class__ = Node
+ else:
+ diff = closelevel-level
+ if diff>0:
+ title.append(Text("="*diff))
+ elif diff<0:
+ title.children.insert(0, Text("="*(-diff)))
+
+ s.append(title)
+
+
+ while self.left:
+ token = self.token
+ if token[0] == 'SECTION':
+ if token[1].count('=') <= level:
+ return s
+
+ s.append(self.parseSection())
+ elif token[0] in FirstParagraph:
+ s.append(self.parseParagraph())
+ else:
+ log.info("in parseSection: skipping", token)
+ break
+
+ return s
+
+ def parseStyle(self):
+ end = self.token[1]
+ b = Style(self.token[1])
+ self.next()
+
+ break_at = TokenSet(['BREAK', '\n', 'ENDEOLSTYLE', 'SECTION', 'ENDSECTION',
+ 'BEGINTABLE', ']]', 'ROW', 'COLUMN', 'ENDTABLE', EndTagToken])
+
+ while self.left:
+ token = self.token
+ if token[0]=="STYLE":
+ if token[1]==end:
+ self.next()
+ break
+ else:
+ new = token[1]
+ if end=="'''''":
+ if token[1]=="''":
+ new = "'''"
+ else:
+ new = "''"
+ elif end=="''":
+ if token[1]=="'''":
+ new = "'''''"
+ elif token[1]=="'''''":
+ new = "'''"
+ elif end=="'''":
+ if token[1]=="''":
+ new = "'''''"
+ elif token[1]=="'''''":
+ new = "''"
+
+ self.tokens[self.pos] = ("STYLE", new)
+ break
+ elif token[0] in break_at:
+ break
+ elif token[0] in FirstAtom:
+ b.append(self.parseAtom())
+ else:
+ log.info("assuming text in parseStyle", token)
+ b.append(Text(token[1]))
+ self.next()
+
+ return b
+
+
+ def parseColumn(self):
+ token = self.token
+ c = Cell()
+
+ params = ''
+ if "|" in token[1] or "!" in token[1]: # not a html cell
+ # search for the first occurence of "||", "|", "\n" in the next tokens
+ # if it's a "|" we have a parameter list
+ self.next()
+ savepos = self.pos
+
+ while self.left:
+ token = self.token
+ self.next()
+ if token[0] in ("\n", "BREAK", "[[", "ROW", "ENDTABLE"):
+ params = ''
+ self.pos = savepos
+ break
+ elif (token[0]=='SPECIAL' or token[0]=='COLUMN') and token[1]=='|':
+ break
+ params += token[1]
+
+ c.vlist = parseParams(params)
+
+ elif token[0]=='COLUMN': # html cell
+ params=parseParams(token[1])
+ #print "CELLTOKEN:", token
+ #print "PARAMS:", params
+ c.vlist = params
+ self.next()
+
+
+
+ while self.left:
+ token = self.token
+ if token[0] in ("COLUMN", "ENDTABLE", "ROW"):
+ break
+
+ if token[0] == 'BEGINTABLE':
+ c.append(self.parseTable())
+ elif token[0]=='SPECIAL' and token[1] == '|':
+ self.next()
+ elif token[0] == 'SECTION':
+ c.append(self.parseSection())
+ elif token[0] in FirstParagraph:
+ c.append(self.parseParagraph())
+ elif isinstance(token[0], EndTagToken):
+ log.info("ignoring %r in parseColumn" % (token,))
+ self.next()
+ else:
+ log.info("assuming text in parseColumn", token)
+ c.append(Text(token[1]))
+ self.next()
+
+ return c
+
+
+ def parseRow(self):
+ r = Row()
+ r.vlist={}
+
+ token = self.token
+ params = ''
+ if token[0]=='ROW':
+ self.next()
+ if "|-" in token[1]:
+ # everything till the next newline/break is a parameter list
+ while self.left:
+ token = self.token
+ if token[0]=='\n' or token[0]=='BREAK':
+ break
+ else:
+ params += token[1]
+ self.next()
+ r.vlist = parseParams(params)
+
+ else:
+ # html row
+ r.vlist = parseParams(token[1])
+
+
+ while self.left:
+ token = self.token
+ if token[0] == 'COLUMN':
+ r.append(self.parseColumn())
+ elif token[0] == 'ENDTABLE':
+ return r
+ elif token[0] == 'ROW':
+ return r
+ elif token[0] == 'BREAK':
+ self.next()
+ elif token[0]=='\n':
+ self.next()
+ else:
+ log.warn("skipping in parseRow: %r" % (token,))
+ self.next()
+ return r
+
+ def parseCaption(self):
+ token = self.token
+ self.next()
+ n = Caption()
+ params = ""
+ if token[1].strip().startswith("|+"):
+ # search for the first occurence of "||", "|", "\n" in the next tokens
+ # if it's a "|" we have a parameter list
+ savepos = self.pos
+ while self.left:
+ token = self.token
+ self.next()
+ if token[0] in ("\n", "BREAK", "[[", "ROW", "COLUMN", "ENDTABLE"):
+ params = ''
+ self.pos = savepos
+ break
+ elif token[0]=='SPECIAL' and token[1]=='|':
+ break
+ params += token[1]
+
+ n.vlist = parseParams(params)
+
+ while self.left:
+ token = self.token
+ if token[0] in ('TEXT' , 'SPECIAL', '\n'):
+ if token[1]!='|':
+ n.append(Text(token[1]))
+ self.next()
+ elif token[0] == 'STYLE':
+ n.append(self.parseStyle())
+ elif isinstance(token[0], TagToken):
+ n.append(self.parseTagToken())
+ elif token[0] == '[[':
+ n.append(self.parseLink())
+ else:
+ break
+ return n
+
+ def parseTable(self):
+ token = self.token
+ self.next()
+ t = Table()
+
+ params = ""
+ if "{|" in token[1]: # not a <table> tag
+ # everything till the next newline/break is a parameter list
+ while self.left:
+ token = self.token
+ if token[0]=='\n' or token[0]=='BREAK':
+ break
+ else:
+ params += token[1]
+ self.next()
+ t.vlist = parseParams(params)
+ else:
+ t.vlist = parseParams(token[1])
+
+ while self.left:
+ token = self.token
+ if token[0]=='ROW' or token[0]=='COLUMN':
+ t.append(self.parseRow())
+ elif token[0]=='TABLECAPTION':
+ t.append(self.parseCaption())
+ elif token[0]=='ENDTABLE':
+ self.next()
+ break
+ elif token[0]=='\n':
+ self.next()
+ else:
+ log.warn("skipping in parseTable", token)
+ self.next()
+ #t.append(self.parseRow())
+
+ return t
+
+ def parseMath(self):
+ self.next()
+ caption = u''
+ while self.left:
+ token = self.token
+ self.next()
+ if token[0]=='ENDMATH':
+ break
+ caption += token[1]
+ return Math(caption)
+
+ def parseTimeline(self):
+ t=Timeline()
+ self.next()
+ snippets = []
+ while self.left:
+ token = self.token
+ self.next()
+ if token[0]=='TIMELINE':
+ break
+ snippets.append(token[1])
+ t.caption = "".join(snippets)
+ return t
+
+ def parseEOLStyle(self):
+ token = self.token
+ maybe_definition = False
+ if token[1]==';':
+ p=Style(";")
+ maybe_definition = True
+ elif token[1].startswith(':'):
+ p=Style(token[1])
+ else:
+ p=Style(":")
+
+ assert p
+ retval = p
+
+ self.next()
+
+ last = None
+ # search for the newline and replace it with ENDEOLSTYLE
+ for idx in range(self.pos, len(self.tokens)-1):
+ if self.tokens[idx][0]=='BREAK' or self.tokens[idx][0]=='\n':
+ last = idx, self.tokens[idx]
+ self.tokens[idx] = ("ENDEOLSTYLE", self.tokens[idx][1])
+ break
+
+ break_at = TokenSet(['ENDEOLSTYLE', 'BEGINTABLE', 'BREAK', EndTagToken])
+
+ while self.left:
+ token = self.token
+ if token[0] in break_at:
+ break
+ elif maybe_definition and token[1]==':':
+ self.next()
+ maybe_definition = False
+ retval = Node()
+ retval.append(p)
+ p = Style(":")
+ retval.append(p)
+
+ elif token[0] in FirstAtom:
+ p.append(self.parseAtom())
+ else:
+ log.info("in parseEOLStyle: assuming text", token)
+ p.append(Text(token[1]))
+ self.next()
+
+ if last:
+ self.tokens[last[0]] = last[1]
+
+ return retval
+
+ def parseParagraph(self):
+ p = Node()
+
+ while self.left:
+ token = self.token
+ if token[0]=='EOLSTYLE':
+ p.append(self.parseEOLStyle())
+ elif token[0]=='PRE':
+ pre = self.parsePre()
+ if pre is None:
+ # empty line with spaces. handle like BREAK
+ p.__class__ = Paragraph
+ break
+ p.append(pre)
+ elif token[0] == 'BREAK':
+ self.next()
+ p.__class__ = Paragraph
+ break
+ elif token[0] == 'SECTION':
+ p.__class__ = Paragraph
+ break
+ elif token[0] == 'ENDSECTION':
+ p.append(Text(token[1]))
+ self.next()
+ elif token[0] in FirstAtom:
+ p.append(self.parseAtom())
+ else:
+ break
+
+ if not self.left:
+ p.__class__ = Paragraph
+
+ if p.children:
+ return p
+ else:
+ return None
+
+ def parseTagToken(self):
+ tag = self.token[0].t
+ try:
+ m=getattr(self, 'parse'+tag.upper()+'Tag')
+ except (AttributeError, UnicodeEncodeError):
+ t=Text(self.token[1])
+ self.next()
+ return t
+ else:
+ return m()
+
+ def parseEMTag(self):
+ return self._parseStyledTag(Style("''"))
+
+ def parseITag(self):
+ return self._parseStyledTag(Style("''"))
+
+ def parseBTag(self):
+ return self._parseStyledTag(Style("'''"))
+
+ def parseSTRONGTag(self):
+ return self._parseStyledTag(Style("'''"))
+
+ def parseBLOCKQUOTETag(self):
+ return self._parseStyledTag(Style(":"))
+
+ def _parseStyledTag(self, style=None):
+
+ token = self.token[0]
+ if style is None:
+ style = Style(token.t)
+
+ b = style
+ end = EndTagToken(token.t)
+ start = TagToken(token.t)
+ self.next()
+
+
+ if token.selfClosing:
+ return style
+
+ break_at = set(["ENDTABLE", "ROW", "COLUMN", "ITEM", "BREAK", "SECTION", "BEGINTABLE"])
+
+ while self.left:
+ token = self.token
+ if token[0] in break_at:
+ break
+ elif token[0]=='\n':
+ b.append(Text(token[1]))
+ self.next()
+ elif token[0]==end:
+ self.next()
+ break
+ elif isinstance(token[0], EndTagToken):
+ break
+ elif isinstance(token[0], TagToken):
+ if token[0]==start:
+ self.next() # 'Nuclear fuel' looks strange otherwise
+ break
+ b.append(self.parseTagToken())
+ elif token[0] in FirstAtom:
+ b.append(self.parseAtom())
+ else:
+ log.info("_parseStyledTag: assuming text", token)
+ b.append(Text(token[1]))
+ self.next()
+
+ return b
+
+ parseVARTag = parseCITETag = parseSTag = parseSUPTag = parseSUBTag = parseBIGTag = parseSMALLTag = _parseStyledTag
+
+ def parseBRTag(self):
+ token = self.token[0]
+ n = TagNode(token.t)
+ n.starttext = token.text
+ n.endtext = u''
+ self.next()
+ return n
+
+ parseHRTag = parseBRTag
+
+ def parseUTag(self):
+ token = self.token
+ if "overline" in self.token[1].lower():
+ s = Style("overline")
+ else:
+ s = None
+
+ return self._parseStyledTag(s)
+
+ def parsePre(self):
+ p = n = PreFormatted()
+ token = self.token
+ p.append(Text(token[1]))
+
+ self.next()
+
+ # find first '\n' not followed by a 'PRE' token
+ last = None
+ for idx in range(self.pos, len(self.tokens)-1):
+ if self.tokens[idx][0] in ['ROW', 'COLUMN', 'BEGINTABLE', 'ENDTABLE', 'TIMELINE', 'MATH']:
+ return None
+
+ if self.tokens[idx][0]=='BREAK':
+ break
+
+ if self.tokens[idx][0]=='\n' and self.tokens[idx+1][0]!='PRE':
+ last = idx, self.tokens[idx]
+ self.tokens[idx]=('ENDPRE', '\n')
+ break
+
+
+ while self.left:
+ token = self.token
+ if token[0] == 'ENDPRE' or token[0]=='BREAK':
+ break
+ if token[0]=='\n' or token[0]=='PRE' or token[0]=='TEXT':
+ p.append(Text(token[1]))
+ self.next()
+ elif token[0] == 'SPECIAL':
+ p.append(Text(token[1]))
+ self.next()
+ elif isinstance(token[0], EndTagToken):
+ break
+ elif isinstance(token[0], TagToken):
+ if token[0] == tag_div:
+ break
+
+ p.append(self.parseTagToken())
+ elif token[0] in FirstAtom:
+ p.append(self.parseAtom())
+ else:
+ log.info("in parsePre: assuming text", token)
+ p.append(Text(token[1]))
+ self.next()
+
+ if last:
+ self.tokens[last[0]] = last[1]
+
+ for x in p:
+ if not isinstance(x, Text):
+ return p
+ if x.caption.strip():
+ return p
+
+ return None
+
+
+
+ def parseOLTag(self):
+ numbered = parseParams(self.token[1]).get('type', '1')
+ return self._parseHTMLList(numbered)
+
+ def parseULTag(self):
+ return self._parseHTMLList(False)
+
+ def parseLITag(self):
+ p = item = Item()
+ self.next()
+ break_at = TokenSet([EndTagToken, 'ENDTABLE', 'SECTION'])
+ while self.left:
+ token = self.token
+ if token[0] == '\n':
+ p.append(Text(token[1]))
+ self.next()
+ elif token[0] == 'EOLSTYLE':
+ p.append(self.parseEOLStyle())
+ elif token[0]=='BREAK':
+ append_br_tag(p)
+ self.next()
+ elif token[0]==tag_li:
+ break
+ elif token[0]==EndTagToken("li"):
+ self.next()
+ break
+ elif token[0] in break_at:
+ break
+ elif token[0] in FirstAtom:
+ p.append(self.parseAtom())
+ else:
+ log.info("in parseLITag: assuming text", token)
+ p.append(Text(token[1]))
+ self.next()
+
+ return item
+
+
+ def _parseHTMLList(self, numbered):
+ lst = ItemList()
+ lst.numbered = numbered
+
+ end = EndTagToken(self.token[0].t)
+
+ self.next()
+ while self.left:
+ token = self.token
+ if token[0]==end:
+ self.next()
+ break
+ elif isinstance(token[0], TagToken):
+ lst.append(self.parseTagToken())
+ elif token[0]=='ITEM':
+ lst.append(self.parseItemList())
+ elif token[0] in FirstAtom:
+ lst.append(self.parseAtom())
+ else:
+ log.info("assuming text in _parseHTMLList", token)
+ lst.append(Text(token[1]))
+ self.next()
+
+ return lst
+
+
+ def parseItemList(self):
+ # actually this parses multiple nested item lists..
+ items = []
+ while self.left:
+ token = self.token
+ if token[0]=='ITEM':
+ items.append(self.parseItem())
+ else:
+ break
+
+ # hack
+ commonprefix = lambda x,y : os.path.commonprefix([x,y])
+
+ current_prefix = u''
+ stack = [Node()]
+
+ def append_item(parent, node):
+ if parent is stack[0]:
+ parent.append(node)
+ return
+
+ if not parent.children:
+ parent.children.append(Item())
+
+ parent.children[-1].append(node)
+
+ for item in items:
+ prefix = item.prefix.strip(":")
+ common = commonprefix(current_prefix, item.prefix)
+
+ stack = stack[:len(common)+1]
+
+ create = prefix[len(common):]
+ for x in create:
+ itemlist = ItemList()
+ itemlist.numbered = (x=='#')
+ append_item(stack[-1], itemlist)
+ stack.append(itemlist)
+ stack[-1].append(item)
+ current_prefix = prefix
+
+ return stack[0]
+
+ def parseItem(self):
+ p = item = Item()
+ p.prefix = self.token[1]
+
+ self.token[1]
+ break_at = TokenSet(["ENDTABLE", "COLUMN", "ROW"])
+
+ self.next()
+ while self.left:
+ token = self.token
+
+ if token[0] == '\n':
+ self.next()
+ break
+ elif token[0]=='BREAK':
+ break
+ elif token[0]=='SECTION':
+ break
+ elif isinstance(token[0], EndTagToken):
+ break
+ elif token[0] in break_at:
+ break
+ elif token[0] in FirstAtom:
+ p.append(self.parseAtom())
+ else:
+ log.info("in parseItem: assuming text", token)
+ p.append(Text(token[1]))
+ self.next()
+ return item
+
+
+ def parse(self):
+ log.info("Parsing", repr(self.name))
+ try:
+ return self.parseArticle()
+ except Exception, err:
+ log.error("error while parsing article", repr(self.name), repr(err))
+ raise
+
+def main():
+ #import htmlwriter
+ from mwlib.dummydb import DummyDB
+ db = DummyDB()
+
+ for x in sys.argv[1:]:
+ input = unicode(open(x).read(), 'utf8')
+ from mwlib import expander
+ te = expander.Expander(input, pagename=x, wikidb=db)
+ input = te.expandTemplates()
+
+
+ tokens = tokenize(input, x)
+
+ p=Parser(tokens, os.path.basename(x))
+ r = p.parse()
+
+ show(sys.stdout, r, 0)
+
+ #hw = htmlwriter.HTMLWriter(htmlout)
+
+if __name__=="__main__":
+ main()
diff --git a/mwlib/recorddb.py b/mwlib/recorddb.py
new file mode 100755
index 0000000..fe895cf
--- /dev/null
+++ b/mwlib/recorddb.py
@@ -0,0 +1,83 @@
+#! /usr/bin/env python
+
+# Copyright (c) 2007-2008 PediaPress GmbH
+# See README.txt for additional licensing information.
+
+import simplejson
+import zipfile
+from mwlib import uparser, parser
+import mwlib.log
+log = mwlib.log.Log("zip")
+
+
+class RecordDB(object):
+ def __init__(self, db):
+ assert db is not None, "db must not be None"
+ self.db = db
+ self.articles = {}
+ self.templates = {}
+
+ def getRawArticle(self, name, revision=None):
+ r = self.db.getRawArticle(name, revision=revision)
+ self.articles[name] = {
+ 'revision': revision,
+ 'content-type': 'text/x-wiki',
+ 'content': r,
+ 'url': self.db.getURL(name, revision=revision),
+ 'authors': self.db.getAuthors(name, revision=revision),
+ }
+ return r
+
+ def getTemplate(self, name, followRedirects=False):
+ r = self.db.getTemplate(name, followRedirects=followRedirects)
+ self.templates[name] = {
+ 'content-type': 'text/x-wiki',
+ 'content': r,
+ }
+ return r
+
+
+class ZipfileCreator(object):
+ def __init__(self, zf, wikidb=None, imgdb=None):
+ self.zf = zf
+ self.db = RecordDB(wikidb)
+ self.images = {}
+ self.imgdb = imgdb
+
+ def addObject(self, name, value):
+ """
+ @type name: unicode
+
+ @type value: str
+ """
+
+ self.zf.writestr(name.encode('utf-8'), value)
+
+ def addArticle(self, title, revision=None):
+ a = uparser.parseString(title, revision=revision, wikidb=self.db)
+ for x in a.allchildren():
+ if isinstance(x, parser.ImageLink):
+ name = x.target
+ self.images[name] = {}
+
+ def writeImages(self, size=None):
+ if self.imgdb is None:
+ return
+
+ for name in sorted(self.images.keys()):
+ dp = self.imgdb.getDiskPath(name, size=size)
+ if dp is None:
+ continue
+ self.zf.write(dp, (u"images/%s" % name.replace("'", '-')).encode("utf-8"))
+ self.images[name]['url'] = self.imgdb.getURL(name, size=size)
+ license = self.imgdb.getLicense(name)
+ if license:
+ self.images[name]['license'] = license
+
+ def writeContent(self):
+ self.addObject('content.json', simplejson.dumps(dict(
+ articles=self.db.articles,
+ templates=self.db.templates,
+ images=self.images,
+ )))
+
diff --git a/mwlib/rendermath.py b/mwlib/rendermath.py
new file mode 100755
index 0000000..10d6cdd
--- /dev/null
+++ b/mwlib/rendermath.py
@@ -0,0 +1,144 @@
+#! /usr/bin/env python
+
+# Copyright (c) 2007-2008 PediaPress GmbH
+# See README.txt for additional licensing information.
+
+import os
+import re
+try:
+ from hashlib import md5
+except ImportError:
+ from md5 import md5
+
+from mwlib import texmap
+import mwlib.log
+
+log = mwlib.log.Log("rendermath")
+
+latex = r"""
+%% %(ident)s
+\documentclass[%(fontsize)spt]{article}
+%(extra_header)s
+\usepackage{ucs}
+\usepackage{amsmath}
+\usepackage{amsfonts}
+\usepackage{amssymb}
+
+%% \newcommand{\R}[0]{\mathbb{R}}
+
+\def\Alpha{{A{}}}
+\def\Beta{{B{}}}
+\def\Epsilon{{E{}}}
+\def\Zeta{{Z{}}}
+\def\Eta{{H{}}}
+\def\Iota{{I{}}}
+\def\Kappa{{K{}}}
+\def\Mu{{M{}}}
+\def\Nu{{N{}}}
+\def\Rho{{P{}}}
+\def\Tau{{T{}}}
+\def\Chi{{C{}}}
+
+\usepackage[utf8x]{inputenc}
+\usepackage[dvips]{graphicx}
+\pagestyle{empty}
+\begin{document}
+%(source)s
+\end{document}
+"""
+
+
+
+
+
+
+def mysystem(cmd):
+ err=os.system(cmd)
+ if err:
+ raise RuntimeError("exit code %s while running %r" % (err, cmd))
+
+class Renderer(object):
+ basedir = os.path.expanduser("~/pngmath/")
+
+ def __init__(self, basedir=None, lazy=True):
+ if basedir:
+ self.basedir = os.path.realpath(os.path.join(basedir, 'pngmath/'))
+ if not os.path.exists(self.basedir):
+ #os.makedirs(self.basedir)
+ pass
+ self.lazy = lazy
+
+ def _render_file(self, name, format):
+ assert format in ('pdf', 'png', 'eps'), "rendermath: format %r not supported" % format
+
+ texfile = os.path.join(self.basedir, name+'.tex')
+ srcbase = os.path.join(self.basedir, name)
+
+ cwd = os.getcwd()
+ os.chdir(self.basedir)
+ try:
+ mysystem("latex -interaction=batchmode %s" % texfile)
+ mysystem("dvips -E %s.dvi -o %s.ps" % (srcbase, srcbase))
+ if format=='png':
+ mysystem("convert +adjoin -transparent white -density 300x300 %s.ps %s.png" % (srcbase, srcbase))
+ elif format=='pdf':
+ mysystem("epstopdf %s.ps" % srcbase)
+ elif format=='eps':
+ os.rename("%s.ps" % srcbase, "%s.eps" % srcbase)
+ finally:
+ for x in ['.dvi', '.aux', '.log', '.ps']:
+ p = os.path.join(self.basedir, name+x)
+ try:
+ os.unlink(p)
+ except OSError, err:
+ pass
+
+ os.chdir(cwd)
+
+ def _normalizeLatex(self, latexsource):
+ latexsource = re.compile("\n+").sub("\n", latexsource)
+ return latexsource
+
+ def convert(self, latexsource, lazy=True, format='pdf', addMathEnv=True):
+ assert format in ('pdf', 'png', 'eps'), "rendermath: format %r not supported" % format
+ latexsource = self._normalizeLatex(latexsource)
+ if addMathEnv:
+ latexsource = '$' + latexsource + '$'
+ if format in ('pdf', 'eps'):
+ extra_header = '\usepackage{geometry}\n\geometry{textwidth=3.0in}\n'
+ fontsize = 10
+ else:
+ fontsize = 12
+ extra_header = ''
+
+ latexsource = texmap.convertSymbols(latexsource)
+
+ source = latex % dict(source=latexsource,
+ ident=format,
+ fontsize=fontsize,
+ extra_header=extra_header)
+
+ m=md5()
+ m.update(source)
+ name = m.hexdigest()
+
+ srcbase = os.path.join(self.basedir, name)
+ texfile = os.path.join(self.basedir, name+'.tex')
+ outfile = os.path.join(self.basedir, name+'.'+format)
+
+ if os.path.exists(outfile):
+ return outfile # FIXME
+
+ open(texfile, 'w').write(source)
+
+ if not lazy:
+ self._render_file(name, format)
+
+
+ return outfile
+
+ def render(self, latexsource, lazy=None, addMathEnv=True):
+ if lazy is None:
+ lazy = self.lazy
+ return self.convert(latexsource, lazy=lazy, format='png', addMathEnv=addMathEnv)
+
diff --git a/mwlib/resources/__init__.py b/mwlib/resources/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/mwlib/resources/__init__.py
diff --git a/mwlib/resources/__init__.pyc b/mwlib/resources/__init__.pyc
new file mode 100644
index 0000000..d46b5c5
--- /dev/null
+++ b/mwlib/resources/__init__.pyc
Binary files differ
diff --git a/mwlib/resources/outgoing_link.gif b/mwlib/resources/outgoing_link.gif
new file mode 100644
index 0000000..d508fc4
--- /dev/null
+++ b/mwlib/resources/outgoing_link.gif
Binary files differ
diff --git a/mwlib/resources/pedia.css b/mwlib/resources/pedia.css
new file mode 100644
index 0000000..3e814ef
--- /dev/null
+++ b/mwlib/resources/pedia.css
@@ -0,0 +1,1250 @@
+/* ############# misc #################*/
+body, p, li, ul, a {
+ margin:0px;
+ padding:0px;
+}
+
+body {
+ background-color:#F4F5E7;
+ font-family:arial;
+ font-size:10pt;
+}
+
+img {
+ border:0;
+ }
+
+* {
+ font-size:10pt;
+ }
+
+h1, h1 * {
+ font-size:20pt;
+ }
+
+h2, h2 * {
+ font-size:14pt;
+ }
+
+h3, h3 * {
+ font-size:13pt;
+ }
+
+.invisible {
+ display: none;
+}
+
+/* ############### base ########### */
+
+* a, * a:hover, * a:visited, * a:active {
+ color:rgb(29,48,161);
+ }
+
+
+
+#header {
+ text-align:right;
+ height:74px;
+ padding:0px;
+ margin:0px;
+ table-layout:fixed;
+ overflow:hidden;
+}
+
+#main_nav {
+ padding-top:5px;
+ margin-right:20px;
+ height:47px !important;
+ /* height:50px !important; */
+ height:54px;
+ /* height:57px; */
+}
+
+#main_nav a {
+ font-size:10pt;
+ font-weight:bold;
+ color: black;
+ text-decoration:none;
+}
+
+#main_nav ul {
+ line-height:12pt;
+}
+
+#main_nav li {
+ display:inline;
+}
+
+#tab_nav {
+ position:absolute;
+ /* top:60px !important;
+ top:57px; */
+
+ top:66px !important;
+ top:63px;
+ right:30px;
+ border:0px;
+ }
+
+
+#errorarea, #busyarea {
+ position:absolute;
+ top:40%;
+ left:30%;
+ width:40%;
+ height:10%;
+ border:2px solid rgb(196,196,196);
+ background-color: rgb(255,136,0);
+ text-align:center;
+ }
+
+head:first-child+body #busyarea { /* this is a hack to prevent IE to read this*/
+ position:fixed;
+ }
+
+#busyarea {
+ /* background-color:#F4F5E7; */
+ background-color:white;
+ }
+
+#errorarea {
+ background-color: rgb(255,136,0);
+ }
+
+a#errorclose {
+ color:black;
+ font-weight:bold;
+ }
+
+/* ######### misc table formatting ################*/
+
+/*
+#managebook table {
+ padding:0px;
+ margin:0px;
+ border-collapse:collapse;
+ border-spacing:0px;
+ }
+*/
+
+.roundbox, .roundbox tr, .roundbox td, .roundbox img, .clean, #tab_nav img, .snippet img {
+ padding:0px;
+ margin:0px;
+ border:0px;
+ border-collapse:collapse;
+ border-spacing:0px;
+ /* border-style:hidden; */
+}
+
+.tablefull {
+ margin:0px;
+ border:0px;
+ border-spacing:0px;
+ width:100%;
+ padding:5px;
+}
+
+/*
+.roundbox td {
+ background-color:white;
+}
+*/
+
+.roundboxContent {
+ background-color:white;
+ }
+
+/* ################## managebook ################## */
+
+#book_nav {
+ line-height:16pt;
+ font-size:10pt;
+}
+
+#book_nav input {
+ margin:0px;
+ vertical-align:bottom;
+ font-size:10pt;
+ width:220px;
+}
+
+ .link {
+ color:black;
+ margin-right:15px;
+}
+#book_nav img {
+ border:0px;
+ padding:0px;
+ margin:0px;
+ margin-left:10px;
+ margin-right:10px;
+ vertical-align:bottom;
+}
+.info li {
+ list-style-type:square;
+}
+
+#info li {
+ list-style-type:square;
+ margin-left:35px;
+}
+
+#info h2 {
+ margin:10px;
+ margin-top:15px;
+ }
+
+#info h2 a {
+ color:black;
+ }
+
+#info h3 {
+ margin-bottom:5px;
+ }
+
+#info p {
+ margin:10px;
+ }
+
+#tab_nav .info {
+ border:1px solid rgb(128,128,128);
+ font-size:8pt;
+ padding-left:20px;
+}
+
+.no_list_style li{
+ list-style-type:none !important;
+}
+
+#info h3 {
+ margin-left:10px;
+ }
+
+/* ################## main ################### */
+
+#main {
+ margin-top:0px;
+ margin-right:0px;
+}
+
+#main_content {
+ padding-left:10px;
+ padding-right:0px;
+ padding-top:5px;
+}
+
+#fromsearch {
+ margin-right:350px;
+ margin-bottom:10px;
+ margin-left:10px;
+ /* background-color: rgb(200, 220, 255); */
+ background-color:rgb(240,240,240);
+ border:2px solid rgb(251,88,33);
+ padding:5px;
+ }
+
+#wikipagecontent {
+ padding: 0px 10px;
+ }
+
+#add_pages {
+ float:right;
+ margin:0px;
+ background-color:white;
+ border:0px;
+}
+
+#add_pages img {
+ border:0px;
+ margin:5px 10px;
+}
+
+/* ******************** wikipedia article formatting ******************* */
+
+#main {
+ font-family: "Trebuchet MS", Trebuchet, Verdana, sans-serif;
+ /* font-family: Palatino Linotype, Book Antiqua, Palatino; */
+ font-size:10pt;
+ overflow:hidden;
+}
+
+#main h1 {
+ font-size:20pt;
+ margin-bottom:30px;
+ margin-right:10px;
+ border-bottom:1px solid rgb(128,128,128);
+}
+
+#main h2 {
+ z-index:-1;
+ font-size:14pt;
+ margin-right:20px;
+ border-bottom:1px solid rgb(128,128,128);
+}
+
+#main h3 {
+ margin-top:10px;
+ margin-bottom:5px;
+ font-size:13pt;
+}
+
+
+#main p {
+ margin-top:5px;
+}
+
+#main a {
+ text-decoration:none;
+}
+
+#main a:hover {
+ text-decoration:underline;
+}
+
+#main ul {
+ margin: 5px 15px;
+}
+#main ul li {
+ list-style-type:square;
+ margin-left:20px;
+}
+
+
+#main table {
+ empty-cells:show;
+ background-color:white;
+ margin:10px;
+}
+
+
+
+#main th {
+ font-weight:bold;
+ }
+
+
+#main .deadlink, #main .deadlink:hover {
+ text-decoration:none;
+ color:black;
+ }
+
+
+#main .infobox {
+ float:right;
+ border:1px solid grey;
+ padding:2px;
+ }
+
+#main .infobox td {
+ padding:2px 4px;
+ }
+
+#main .bordered {
+ border-collapse:collapse;
+ }
+
+#main .bordered td {
+ border:1px solid grey;
+ }
+
+#main .borderless *, #main .borderless {
+ border:0 !important;
+ }
+
+
+#main .infobox * {
+ font-size: 8pt;
+ }
+
+#main sub, #main sup {
+ font-size: 8pt;
+ margin-right:3px;
+ }
+
+
+#main .small * {
+ font-size:8pt;
+ }
+
+
+.border {
+ border:1px solid rgb(128,128,128);
+ border-collapse:collapse;
+}
+
+.image {
+ margin:10px;
+ padding:2px;
+ border:1px solid rgb(128,128,128);
+ background:white;
+}
+
+.clear {
+ clear:both;
+ }
+
+.right {
+ clear:right;
+ float:right;
+}
+
+/*
+.right + .right {
+ clear:right;
+ }
+*/
+
+.left {
+ clear:left;
+ float:left;
+}
+
+/*
+.left + .left {
+ clear:left;
+ }
+*/
+
+#main .formula {
+ vertical-align:middle;
+ }
+
+
+.imagecaption {
+ display:block;
+ font-size:8pt;
+ padding:2px;
+}
+
+.imagecaption *{
+ font-size:8pt;
+}
+
+/* .wikitable, .wikitable tr, */
+
+.wikitable {
+ border-collapse:collapse;
+}
+
+.wikitable td {
+ border:1px solid black;
+ border-spacing:0px;
+ padding:0px 2px;
+ }
+
+.toccolours {
+ border:1px solid black;
+ }
+
+
+/* ###### misc... ##### */
+
+.addButton img {
+ width:12px;
+ height:12px;
+ border:0;
+ margin-right:10px;
+ }
+
+.addButton div {
+ margin-right:10px;
+ width:12px;
+ display:inline;
+ }
+
+
+.addButton a {
+ margin-left:0px;
+ }
+
+
+/* ############## pagelist ####################### */
+
+#pagelist {
+ padding:10px;
+ }
+
+#collTitle {
+ /* margin-left:10px; */
+ font-size:12pt;
+ font-weight:bold;
+ border-bottom:1px solid rgb(128,128,128);
+ }
+
+#collEditTable {
+ margin:10px 0px;
+ }
+
+.box_content {
+ margin:10px 0px;
+ font-size:9pt;
+}
+
+#collInfo {
+ margin-top:10px;
+ font-size:10pt;
+ }
+
+.del_col {
+ /* width:2em; */
+ width:21px;
+ padding:0px 2px;
+ }
+
+.priceInfo {
+ margin-bottom:10px;
+ font-size:8pt;
+ }
+
+#collNumPages {
+ margin-top:10px;
+}
+
+#collNumPages, #collPrice {
+ font-weight:bold;
+ }
+
+#clearbook {
+ margin-top:10px;
+ }
+
+
+div.uitable table{
+ border-collapse: collapse;
+ /* border:1px solid rgb(196,196,196); */
+ cursor:pointer;
+}
+
+div.uitable table tbody tr {
+ padding:3px;
+ margin:3px;
+}
+
+/*
+div.uitable table tbody tr.ui_hover td{
+ background-color:#BACFE4;
+}
+*/
+
+div.uitable table tbody tr.ui_active td {
+ /* background-color:#F4F5E7; */
+ font-weight:bold;
+
+}
+
+div.uitable td {
+ padding:0px 2px;
+ }
+
+div.uitable td:hover {
+ text-decoration:underline;
+ background-color:#BACFE4;
+ }
+
+
+/* ############################# index.html ########################### */
+
+#home_left, #home_right {
+ padding:0px 10px;
+ }
+
+#home_right span {
+ /* font-style:italic; */
+ border-bottom:1px solid rgb(128,128,128);
+
+}
+
+#home_right .heading {
+ width:450px;
+ border-bottom:1px solid rgb(128,128,128);
+ }
+
+
+#home_right li {
+ margin-left:20px;
+ font-style:normal;
+ font-weight: normal;
+ }
+
+#home_left p, #home_right p {
+ margin:0px 0px;
+ }
+
+#home_right h1 {
+ font-size:14pt;
+ /* color:rgb(64,64,64); */
+ color:rgb(32,32,32);
+ margin-right:15px;
+ margin-top:0px;
+ text-decoration:underline;
+ }
+
+#home_right h2 {
+ font-size:14pt;
+ color:rgb(64,64,64);
+ color:rgb(32,32,32);
+ margin-right:15px;
+ display:inline;
+ }
+
+#home_right ul {
+ margin-bottom:10px;
+ list-style-type:square;
+ }
+
+
+#home_left h2 {
+ font-size:14pt;
+ color:rgb(64,64,64);
+ color:rgb(32,32,32);
+ margin-top:5px;
+ margin-bottom:5px;
+ }
+
+#home_left li {
+ list-style-type:none;
+ margin-bottom:5px;
+ margin-left:0px;
+ padding-left:0px;
+ }
+
+#home_left .title {
+ font-weight:bold;
+ font-style:italic;
+ }
+
+#home_left td {
+ padding:3px 2px;
+ }
+
+
+
+#finish_book {
+ line-height:18pt;
+ }
+
+#finish_book .label {
+ font-weight:bold;
+ vertical-align:bottom;
+ text-align:right;
+ padding-right:5px;
+ }
+
+#finish_book .input {
+ vertical-align:bottom;
+ text-align:left;
+ padding-left:5px;
+ }
+
+#finish_book input {
+ padding: 0px 5px;
+ margin:2px 0px;
+ width:400px;
+ }
+
+#booktitle {
+ font-size:14pt;
+}
+
+#bookeditor {
+ font-size:12pt;
+ }
+
+
+#finish_book table {
+ margin-bottom:5px;
+}
+
+#wp_footer {
+ clear:both;
+ float:right;
+ margin-top:10px;
+ margin-bottom:10px;
+ padding:5px;
+ /* border:2px solid rgb(230,230,230); */
+ border:2px solid rgb(251,88,33);
+ background-color:rgb(240,240,240);
+ }
+
+/* ++++++++++++++++++ order ++++++++++++++ */
+
+#order_pay h2 {
+ margin-top:0px;
+ }
+
+#order_content h2, #order_content h3, #showbook_content h2, #showbook_content h3 {
+ margin:5px 0px;
+ }
+
+#order_content h3, #showbook_content h3 {
+ margin-top:15px;
+ }
+
+
+#order_content li, #showbook_content li {
+ margin-right:10px;
+ list-style-type:none;
+ border-bottom:1px dotted grey;
+ }
+
+#order_content, #order_pay, #showbook_content {
+ margin:10px;
+ }
+
+#order_content a, #showbook_content a {
+ text-decoration:none;
+ }
+
+#order_content a:hover, #showbook_content a:hover {
+ text-decoration:underline;
+ }
+
+#cost .label, #ship .label, #contact_form .label {
+ text-align:right;
+ vertical-align:top;
+ }
+#cost .value #ship .value, #contact_form .value {
+ text-align:left;
+ }
+
+#cost td, #ship td, #contact_form td {
+ padding:2px 5px;
+ }
+
+#cost {
+ border:1px solid grey;
+ }
+
+#ship input, #ship textarea, #ship select {
+ width:250px;
+ }
+
+#terms_ok {
+ margin-left:10px;
+ }
+
+#terms_ok input {
+ margin-left:0px;
+
+ }
+
+#contact_form input, #contact_form select {
+ width: 250px;
+ }
+
+ #contact_form textarea {
+ width:400px;
+ }
+
+#contact h2 {
+ margin-top:0px;
+ }
+
+
+.tooltip {
+ background-color: rgb(255, 250, 200);
+ border: 1px solid black;
+ position: absolute;
+ z-index: 10000;
+ width: 200px;
+ padding:2px;
+ text-align:center;
+}
+
+.snippet {
+ background-color: rgb(200, 220, 255);
+ border: 1px solid black;
+ padding: 5px;
+ z-index: 10000;
+ position: absolute;
+ font-size: 10px;
+}
+
+#snippetid, #snippetid * {
+ font-size: 10pt;
+ font-style:normal;
+ font-weight:normal;
+}
+
+#snippetid em {
+ font-style:italic;
+ }
+
+#snippetid strong {
+ font-weight:bold;
+ }
+
+
+.hasborder {
+ border: 2px solid rgb(220,220,220);
+}
+
+.noborder {
+ border-width: 0px;
+ border: 2px solid white;
+}
+
+.greyedout {
+ color: rgb(127, 127, 127);
+}
+
+.notgreyedout {
+ color: black;
+}
+
+#terms dt {
+ margin-top:10px;
+ font-weight:bold;
+ }
+
+#terms .toc li {
+ margin-left:15px;
+ list-style-type:none;
+ }
+
+#terms p {
+ margin-top:10px;
+
+ }
+
+
+#terms p, #terms dt, #terms dd, #terms li, #terms a, #terms strong{
+ font-size:8pt;
+ }
+
+#terms h2, #terms h2 a {
+ font-size:12pt;
+ color:black;
+ }
+
+#terms h3 {
+ font-size:10pt;
+ }
+
+#terms li {
+ list-style-type:square;
+ }
+
+#refreshproposals {
+ padding:5px;
+ margin:5px;
+ margin-right:0px;
+ }
+
+
+.site_link {
+ border-bottom:1px dotted black;
+ color:black;
+ text-decoration:none;
+}
+
+.site_link:hover {
+ text-decoration:none !important;
+ }
+
+
+.topcats, .topcats * {
+ font-weight:bold;
+ list-style-type:none !important;
+ margin:0px;
+ padding:0px;
+ font-size:12pt;
+ border-bottom:1px solid rgb(128,128,128);
+ margin-top:5px;
+ }
+
+.topcats {
+ margin-right:5px;
+}
+
+.subcats {
+ margin-right:0px;
+ }
+
+
+#preview {
+ margin:10px;
+ }
+
+#preview h1 {
+ margin-top:0px;
+ margin-bottom:10px;
+ }
+
+#adobe_info {
+ float:right;
+ width:33%;
+ border:1px solid rgb(128,128,128);
+ border:1px solid rgb(196,196,196);
+ margin:10px;
+ margin-right:0px;
+ margin-top:0px;
+ padding:5px;
+
+ }
+
+#generating {
+ margin:0px;
+ }
+
+#finished p {
+ margin-bottom:10px;
+ }
+
+#affiliate {
+ padding:5px;
+ padding-top:0px;
+ }
+#affiliate p{
+ margin-bottom:10px;
+ }
+
+#affiliate li {
+ list-style-type:square;
+ margin-left:25px;
+ }
+#affiliate ul {
+ margin-bottom:10px;
+}
+
+#affiliatetable td{
+ padding:10px;
+ }
+
+#affiliatetable li {
+ list-style-type:none;
+ }
+
+#affiliatetable input {
+ margin-left:0px;
+ }
+
+.affiliatebox {
+ float:left;
+ width:45%;
+ margin:10px;
+ /* padding:10px; */
+ /* border:2px solid rgb(210,210,210); */
+ /* border:2px solid rgb(251,88,33); */
+ }
+
+.affiliateboxcontent {
+ margin:10px;
+ }
+
+.affiliatebox h2 {
+ margin:0px;
+ margin-bottom:10px;
+ }
+
+.affiliatebox ul li {
+ list-style-type:square;
+ margin-left:15px;
+ }
+
+.affiliatebox .nobullets li {
+ list-style-type:none;
+ margin-left:0px;
+ }
+
+
+
+#welcome {
+ padding:10px;
+ padding-top:5px;
+ margin:0px;
+ }
+
+#welcome h1 {
+ margin:0px;
+ margin-bottom:10px;
+ font-size:16pt;
+ color:rgb(32,32,32);
+ }
+
+
+#home_howto td{
+ padding:5px;
+ vertical-align:top;
+ }
+
+#home_howto .step_num {
+ font-size:20pt;
+ color:rgb(128,128,128);
+ float:left;
+ margin:5px;
+ }
+
+#home_howto li {
+ list-style-type:square;
+ margin-left:20px;
+ }
+
+#news .newsitem {
+ margin: 10px;
+ max-width:500px;
+ }
+
+#news .newsitem h3 {
+ font-size:12pt;
+ margin-bottom:5px;
+ margin-top:10px;
+ }
+
+.newsitem + .newsitem {
+ border-top:1px dotted rgb(128,128,128);
+ }
+
+#news .date {
+ float:right;
+ margin-top:5px;
+ margin-left:10px;
+ font-size:8pt;
+ }
+
+.deadlink {
+ color:black;
+}
+
+.smalltext, .smalltext em, .smalltext * {
+ font-size:8pt !important;
+ }
+
+.template_warningsmart {
+ border:3px solid red;
+}
+.template_warningmanual{
+ border:3px solid orange;
+ }
+
+.template_warningsmart * {
+ background-color:rgb(210,210,210);
+ }
+
+.template_warningmanual * {
+ background-color:rgb(230,230,230);
+ }
+
+
+ #startcats {
+ padding:5px;
+ }
+
+ #startcats h2, #startcats h3 {
+ margin:5px 0px;
+ }
+
+#startcats .topcats {
+ margin-bottom:5px;
+}
+
+#startcats .topcats *, #startcats .subcats * {
+ text-decoration:none;
+ }
+
+#startcats .topcats a {
+ border-bottom:0;
+ }
+
+.startcatbox {
+ width:30%;
+ float:left;
+ padding:10px;
+ padding-bottom:0px;
+ }
+
+.startcatbox h3 {
+ border-bottom:1px solid rgb(128,128,128);
+ }
+
+
+#lowpagecount, #highpagecount {
+ margin:10px 0px;
+ padding:10px;
+ border:2px solid rgb(251,88,33);
+ background-color:rgb(240,240,240);
+ }
+
+#highpagecount {
+ margin-left:10px;
+ }
+
+
+#examplebook {
+ padding:10px;
+ }
+
+#examplebook li {
+ list-style-type: none;
+ text-align:center;
+ margin-bottom:5px;
+ }
+
+#examplebook .book_img img {
+ border:1px dotted rgb(128,128,128);
+ padding:2px;
+ }
+
+#examplebook .info {
+ float:right;
+ border:1px dotted rgb(128,128,128);
+ padding:5px;
+ }
+
+#examplebook .navigation {
+ position:absolute;
+ margin:15px 0px;
+ border:1px dotted rgb(128,128,128);
+ padding:5px 15px;
+ }
+
+#examplebook .navigation li {
+ text-align:left;
+ }
+
+#examplebook .navigation a {
+ text-decoration:none;
+ }
+
+#examplebook .navigation a:hover {
+ text-decoration:underline;
+ }
+
+
+
+
+
+
+/* ######## pssearch */
+
+
+
+.pssnippet strong {
+ font-weight:normal;
+ }
+
+.pssnippet li {
+ margin-left:15px;
+ }
+
+.pssnippethl {
+ background-color: rgb(196, 224, 255);
+}
+
+
+#pssearch a {
+ color:rgb(28,53,110);
+ }
+
+/*
+#pssearch li {
+ margin-bottom:10px;
+ list-style-type:square;
+ }
+*/
+
+li.result {
+ margin-bottom:10px;
+ list-style-type:square;
+ }
+
+#pssearch .details, #pssearch .details * {
+ font-size:8pt;
+ }
+
+.marginbox {
+ padding:5px 10px;
+ }
+
+#searchresultlist .result {
+ margin: 0px 5px;
+ padding: 8px 0px;
+ list-style-type:none;
+ border-bottom:1px dotted rgb(128,128,128);
+ /* border-top:1px dotted rgb(128,128,128);*/
+ /*background:rgb(245, 250, 250);*/
+}
+
+/*
+.alternate {
+ background: rgb(234, 238, 245);
+}
+*/
+
+.clustered {
+ margin-left: 40px;
+}
+
+.resultdetails * {
+ text-align:middle;
+ }
+
+/* display styles for snippets search result snippet*/
+.pssnippet hr {
+ display:none;
+}
+
+.pssnippet li {
+ margin-left:15px;
+}
+
+/* end filter */
+
+#categories li {
+ list-style-type: none;
+}
+
+#categories h2 {
+ margin:0px;
+ font-size: 12pt;
+ }
+
+#categories a:hover {
+ background-color:rgb(1,160,199);
+ color:white;
+ text-decoration:none;
+}
+
+#pssearch .subcats {
+ margin-bottom:0px;
+ margin-left:15px;
+ list-style-type:none;
+}
+
+#searchselection {
+ margin-left:10px;
+ vertical-align:middle;
+}
+
+#searchselection input {
+ color:green;
+ margin-left:10px;
+ margin-right:5px;
+ vertical-align:middle;
+}
+
+.searchresulturl {
+ color: rgb(0, 96, 120);
+}
+
+.psformat {
+ font-size: 8pt;
+}
+
+.psrf {
+ float: right;
+}
+
+.pscategorylink, .psarticlelink {
+ white-space: nowrap;
+}
+
+.matchedcategories {
+ border: 1px dotted grey;
+ /*background-color: rgb(245, 245, 245);*/
+ margin-left: 10px;
+ margin-right: 10px;
+ margin-bottom: 15px;
+ padding: 4px;
+}
+
diff --git a/mwlib/sanitychecker.py b/mwlib/sanitychecker.py
new file mode 100644
index 0000000..78d25b8
--- /dev/null
+++ b/mwlib/sanitychecker.py
@@ -0,0 +1,205 @@
+# Copyright (c) 2007-2008 PediaPress GmbH
+# See README.txt for additional licensing information.
+"""
+class for defining DTD-Like Rules for the tree
+"""
+from advtree import Article
+
+from mwlib.log import Log
+log = Log("sanitychecker")
+
+# -----------------------------------------------------------
+# Constraints
+# -----------------------------------------------------------
+
+class ConstraintBase(object):
+ def __init__(self, *klasses):
+ self.klasses = klasses
+
+ def test(self, nodes):
+ return True,None # passed
+
+ def __repr__(self):
+ return "%s(%s)" %(self.__class__.__name__, ", ".join(k.__name__ for k in self.klasses))
+
+
+class Forbid(ConstraintBase):
+ "forbid any of the classes"
+ def test(self, nodes):
+ for n in nodes:
+ if n.__class__ in self.klasses:
+ return False, n
+ return True, None
+
+
+class Allow(ConstraintBase):
+ "allow only these classes"
+ def test(self, nodes):
+ for n in nodes:
+ if not n.__class__ in self.klasses:
+ return False, n
+ return True, None
+
+
+class Require(ConstraintBase):
+ "require any of these classes"
+ def test(self, nodes):
+ for n in nodes:
+ if n.__class__ in self.klasses:
+ return True, n
+ return False, None
+
+class Equal(ConstraintBase):
+ "node classes and their order must be equal to these klasses"
+ def test(self, nodes):
+ if len(nodes) != len(self.klasses):
+ return False, None # FIXME what could we report?
+ for i,n in enumerate(nodes):
+ if n.__class__ != self.klasses[i]:
+ return False, n
+ return True, None
+
+
+# -----------------------------------------------------------
+# Rules regarding [Children, AllChildren, Parents, ...]
+# -----------------------------------------------------------
+
+class RuleBase:
+ def __init__(self, klass, constraint):
+ self.klass = klass
+ self.constraint = constraint
+
+ def _tocheck(self, node):
+ return []
+
+ def test(self, node):
+ if node.__class__ == self.klass:
+ return self.constraint.test( self._tocheck(node) )
+ return True, None
+
+ def __repr__(self):
+ return "%s(%s, %r)" %(self.__class__.__name__, self.klass.__name__, self.constraint)
+
+class ChildrenOf(RuleBase):
+ def _tocheck(self, node):
+ return node.children
+
+class AllChildrenOf(RuleBase):
+ def _tocheck(self, node):
+ return node.getAllChildren()
+
+class ParentsOf(RuleBase):
+ def _tocheck(self, node):
+ return node.parents
+
+class ParentOf(RuleBase):
+ def _tocheck(self, node):
+ if node.parent:
+ return [node.parent]
+ return []
+
+class SiblingsOf(RuleBase):
+ def _tocheck(self, node):
+ return node.siblings
+
+
+
+# example custom rules
+
+class RequireChild(RuleBase):
+
+ def __init__(self, klass):
+ self.klass = klass
+
+ def __repr__(self):
+ return "%s(%s)" %(self.__class__.__name__, self.klass.__name__)
+
+ def test(self, node):
+ if node.__class__ == self.klass:
+ if not len(node.children):
+ return False, node
+ return True, None
+
+
+
+
+# -----------------------------------------------------------
+# Callbacks
+# -----------------------------------------------------------
+"""
+callbacks get called if added to rules
+callback return values should be:
+ * True if it modified the tree and the sanity check needs to restart
+ * False if the tree is left unmodified
+"""
+class SanityException(Exception):
+ pass
+
+def exceptioncb(rule, node=None, parentnode=None):
+ raise SanityException("%r err:%r" %(rule, node or parentnode) )
+
+def warncb(rule, node=None, parentnode=None):
+ log.warn("%r node:%r parent:%r" %(rule, node, parentnode))
+ return False
+
+def removecb(rule, node=None, parentnode=None):
+ assert node and node.parent
+ node.parent.removeChild(node)
+ return True
+
+
+
+# -----------------------------------------------------------
+# Container for sanity rules
+# -----------------------------------------------------------
+
+class SanityChecker(object):
+
+ def __init__(self):
+ self.rules = []
+
+ def addRule(self, rule, actioncb=exceptioncb):
+ self.rules.append((rule, actioncb))
+
+ def check(self, tree):
+ """
+ check each node with each rule
+ on failure call callback
+ """
+ modified = True
+ while modified:
+ modified = False
+ for node in tree.allchildren():
+ #if node.__class__ == Article:
+ # log.info("checking article:", node.caption.encode('utf-8'))
+ for r,cb in self.rules:
+ passed, errnode = r.test(node)
+ if not passed and cb:
+ if cb(r, errnode or node):
+ modified = True
+ break
+ if modified:
+ break
+
+def demo(tree):
+ "for documentation only, see tests for more demos"
+ from mwlib.advtree import Table, Row, Cell, Text, ImageLink, PreFormatted
+
+ sc = SanityChecker()
+ rules = [ChildrenOf(Table, Allow(Row)),
+ ChildrenOf(Row, Allow(Cell)),
+ AllChildrenOf(Cell, Require(Text, ImageLink)),
+ AllChildrenOf(Cell, Forbid(PreFormatted)),
+ ChildrenOf(PreFormatted, Equal(Text)),
+ ]
+
+ def mycb(rule, node=None, parentnode=None):
+ print "failed", rule, node or parentnode
+ modifiedtree = False
+ return modifiedtree
+
+ for r in rules:
+ sc.addRule( r, mycb)
+ #sc.check(anytree)
+
+
diff --git a/mwlib/scanfile.py b/mwlib/scanfile.py
new file mode 100755
index 0000000..96519d2
--- /dev/null
+++ b/mwlib/scanfile.py
@@ -0,0 +1,29 @@
+#! /usr/bin/env python
+
+"""used for debugging/testing"""
+
+import sys
+import time
+import mwscan
+
+d=unicode(open(sys.argv[1]).read(), 'utf-8')
+
+stime=time.time()
+r=mwscan.scan(d)
+needed = time.time()-stime
+for x in r:
+ print r.repr(x)
+
+print needed, len(d), len(r)
+
+
+
+# stime=time.time()
+# r=mwscan.compat_scan(d)
+# needed = time.time()-stime
+
+# print "COMPAT:", needed, len(d), len(r)
+
+
+# #mwscan.dump_tokens(d,r)
+# #print needed, len(d), len(r)
diff --git a/mwlib/scanner.py b/mwlib/scanner.py
new file mode 100755
index 0000000..d4d7167
--- /dev/null
+++ b/mwlib/scanner.py
@@ -0,0 +1,6 @@
+#! /usr/bin/env python
+
+if 0:
+ from plexscanner import TagToken, EndTagToken, tokenize
+else:
+ from mwscan import TagToken, EndTagToken, tokenize
diff --git a/mwlib/texmap.py b/mwlib/texmap.py
new file mode 100755
index 0000000..f3071e6
--- /dev/null
+++ b/mwlib/texmap.py
@@ -0,0 +1,95 @@
+#! /usr/bin/env python
+
+# Copyright (c) 2007-2008 PediaPress GmbH
+# See README.txt for additional licensing information.
+
+import re
+
+def convertSymbols(latexsource):
+ def repl(mo):
+ name=mo.group(0)
+ return symbolMap.get(name, name)
+
+ latexsource = texcmd.sub(repl, latexsource)
+ return latexsource
+
+texcmd = re.compile(r"\\[a-zA-Z]+")
+
+symbolMap = {'\\Bbb': '\\mathbb',
+ '\\Complex': '\\mathbb{C}',
+ '\\Dagger': '\\ddagger',
+ '\\Darr': '\\Downarrow',
+ '\\Harr': '\\Leftrightarrow',
+ '\\Larr': '\\Leftarrow',
+ '\\Lrarr': '\\Leftrightarrow',
+ '\\N': '\\mathbb{N}',
+ '\\O': '\\emptyset',
+ '\\R': '\\mathbb{R}',
+ '\\Rarr': '\\Rightarrow',
+ '\\Reals': '\\mathbb{R}',
+ '\\Uarr': '\\Uparrow',
+ '\\Z': '\\mathbb{Z}',
+ '\\alef': '\\aleph',
+ '\\alefsym': '\\aleph',
+ '\\and': '\\land',
+ '\\ang': '\\angle',
+ '\\arccos': '\\mathop{\\mathrm{arccos}}',
+ '\\arccot': '\\mathop{\\mathrm{arccot}}',
+ '\\arccsc': '\\mathop{\\mathrm{arccsc}}',
+ '\\arcsec': '\\mathop{\\mathrm{arcsec}}',
+ '\\bold': '\\mathbf',
+ '\\bull': '\\bullet',
+ '\\clubs': '\\clubsuit',
+ '\\cnums': '\\mathbb{C}',
+ '\\dArr': '\\Downarrow',
+ '\\darr': '\\downarrow',
+ '\\diamonds': '\\diamondsuit',
+ '\\empty': '\\emptyset',
+ '\\exist': '\\exists',
+ '\\ge': '\\geq',
+ '\\hAar': '\\Leftrightarrow',
+ '\\harr': '\\leftrightarrow',
+ '\\hearts': '\\heartsuit',
+ '\\image': '\\Im',
+ '\\infin': '\\infty',
+ '\\isin': '\\in',
+ '\\lArr': '\\Leftarrow',
+ '\\lang': '\\langle',
+ '\\larr': '\\leftarrow',
+ '\\le': '\\leq',
+ '\\lrArr': '\\Leftrightarrow',
+ '\\lrarr': '\\leftrightarrow',
+ '\\natnums': '\\mathbb{N}',
+ '\\ne': '\\neq',
+ '\\or': '\\lor',
+ '\\part': '\\partial',
+ '\\plusmn': '\\pm',
+ '\\rArr': '\\Rightarrow',
+ '\\rang': '\\rangle',
+ '\\rarr': '\\rightarrow',
+ '\\real': '\\Re',
+ '\\reals': '\\mathbb{R}',
+ '\\sdot': '\\cdot',
+ '\\sect': '\\S',
+ '\\sgn': '\\mathop{\\mathrm{sgn}}',
+ '\\spades': '\\spadesuit',
+ '\\sub': '\\subset',
+ '\\sube': '\\subseteq',
+ '\\supe': '\\supseteq',
+ '\\thetasym': '\\vartheta',
+ '\\uArr': '\\Uparrow',
+ '\\uarr': '\\uparrow',
+ '\\weierp': '\\wp',
+ '\\Alpha': 'A{}',
+ '\\Beta': 'B{}',
+ '\\Epsilon': 'E{}',
+ '\\Zeta': 'Z{}',
+ '\\Eta': 'H{}',
+ '\\Iota': 'I{}',
+ '\\Kappa' : 'K{}',
+ '\\Mu': 'M{}',
+ '\\Nu': 'N{}',
+ '\\Rho': 'P{}',
+ '\\Tau': 'T{}',
+ '\\Chi': 'C{}',
+ }
diff --git a/mwlib/timeline.py b/mwlib/timeline.py
new file mode 100755
index 0000000..e85dd84
--- /dev/null
+++ b/mwlib/timeline.py
@@ -0,0 +1,52 @@
+#! /usr/bin/env python
+
+# Copyright (c) 2007-2008 PediaPress GmbH
+# See README.txt for additional licensing information.
+
+"""implement http://meta.wikimedia.org/wiki/EasyTimeline
+"""
+
+import os
+import tempfile
+try:
+ from hashlib import md5
+except ImportError:
+ from md5 import md5
+
+
+def drawTimeline(script, basedir=None):
+ if isinstance(script, unicode):
+ script = script.encode('utf8')
+ if basedir is None:
+ basedir = os.path.join(tempfile.gettempdir(), "timeline-%s" % (os.getuid(),))
+ if not os.path.exists(basedir):
+ os.mkdir(basedir)
+
+ m=md5()
+ m.update(script)
+ ident = m.hexdigest()
+
+ pngfile = os.path.join(basedir, ident+'.png')
+
+ if os.path.exists(pngfile):
+ return pngfile
+
+ scriptfile = os.path.join(basedir, ident+'.txt')
+ open(scriptfile, 'w').write(script)
+ et = os.path.join(os.path.dirname(__file__), "EasyTimeline.pl")
+
+ err = os.system("perl %s -P /usr/bin/ploticus -T /tmp/ -i %s" % (et, scriptfile))
+ if err != 0:
+ return None
+
+ svgfile = os.path.join(basedir, ident+'.svg')
+
+ if os.path.exists(svgfile):
+ os.unlink(svgfile)
+
+ if os.path.exists(pngfile):
+ return pngfile
+
+ return None
+
+
diff --git a/mwlib/uparser.py b/mwlib/uparser.py
new file mode 100755
index 0000000..8565c74
--- /dev/null
+++ b/mwlib/uparser.py
@@ -0,0 +1,126 @@
+#! /usr/bin/env python
+
+# Copyright (c) 2007-2008 PediaPress GmbH
+# See README.txt for additional licensing information.
+
+"""usable/user parser"""
+
+from mwlib import parser, scanner, expander
+
+def simplify(node):
+ "concatenates textnodes in order to reduce the number of objects"
+ Text = parser.Text
+
+ last = None
+ toremove = []
+ for i,c in enumerate(node.children):
+ if c.__class__ == Text: # would isinstance be safe?
+ if last:
+ last.caption += c.caption
+ toremove.append(i)
+ else:
+ last = c
+ else:
+ simplify(c)
+ last = None
+
+ for i,ii in enumerate(toremove):
+ del node.children[ii-i]
+
+def fixlitags(node):
+ Text = parser.Text
+
+ if not isinstance(node, parser.ItemList):
+ idx = 0
+ while idx < len(node.children):
+ if isinstance(node.children[idx], parser.Item):
+ lst = parser.ItemList()
+ lst.append(node.children[idx])
+ node.children[idx] = lst
+ idx += 1
+ while idx<len(node.children):
+ if isinstance(node.children[idx], parser.Item):
+ lst.append(node.children[idx])
+ del node.children[idx]
+ elif node.children[idx]==Text("\n"):
+ del node.children[idx]
+ else:
+ break
+ else:
+ idx += 1
+
+ for x in node.children:
+ fixlitags(x)
+
+def removeBoilerplate(node):
+ i = 0
+ while i < len(node.children):
+ x = node.children[i]
+ if isinstance(x, parser.TagNode) and x.caption=='div':
+ try:
+ klass = x.values.get('class', '')
+ except AttributeError:
+ klass = ''
+
+ if 'boilerplate' in klass:
+ del node.children[i]
+ continue
+
+ i += 1
+
+ for x in node.children:
+ removeBoilerplate(x)
+
+
+
+
+postprocessors = [removeBoilerplate, simplify, fixlitags]
+
+def parseString(title=None, raw=None, wikidb=None, revision=None):
+ """parse article with title from raw mediawiki text"""
+ assert title is not None
+
+ if raw is None:
+ raw = wikidb.getRawArticle(title, revision=revision)
+ assert raw is not None, "cannot get article %r" % (title,)
+ if wikidb:
+ te = expander.Expander(raw, pagename=title, wikidb=wikidb)
+ input = te.expandTemplates()
+ else:
+ input = raw
+
+ tokens = scanner.tokenize(input, title)
+
+ a = parser.Parser(tokens, title).parse()
+ a.caption = title
+ for x in postprocessors:
+ x(a)
+ return a
+
+
+def simpleparse(raw): # !!! USE FOR DEBUGGING ONLY !!! does not use post processors
+ import sys
+ from mwlib import dummydb
+ db = dummydb.DummyDB()
+
+ tokens = scanner.tokenize(raw)
+ r=parser.Parser(tokens, "unknown").parse()
+ parser.show(sys.stdout, r, 0)
+ return r
+
+def main():
+ from mwlib.dummydb import DummyDB
+
+ import os
+ import sys
+
+ db = DummyDB()
+
+ for x in sys.argv[1:]:
+ input = unicode(open(x).read(), 'utf8')
+ title = unicode(os.path.basename(x))
+ parseString(title, input, db)
+
+if __name__=="__main__":
+ main()
+
diff --git a/mwlib/utils.py b/mwlib/utils.py
new file mode 100644
index 0000000..4fd6b55
--- /dev/null
+++ b/mwlib/utils.py
@@ -0,0 +1,112 @@
+import os
+import sys
+import errno
+import time
+
+# provide all for python 2.4
+try:
+ from __builtin__ import all
+except ImportError:
+ def all(items):
+ for x in items:
+ if not x:
+ return False
+ return True
+
+def fsescape(s):
+ res = []
+ for x in s:
+ c = ord(x)
+ if c>127:
+ res.append("~%s~" % c)
+ elif c==126: # ord("~")==126
+ res.append("~~")
+ else:
+ res.append(x)
+ return "".join(res)
+
+def start_logging(path):
+ sys.stderr.flush()
+ sys.stdout.flush()
+
+ f = open(path, "a")
+ fd = f.fileno()
+ os.dup2(fd, 1)
+ os.dup2(fd, 2)
+
+ null=os.open('/dev/null', os.O_RDWR)
+ os.dup2(null, 0)
+ os.close(null)
+
+def daemonize(dev_null=False):
+ # See http://www.erlenstar.demon.co.uk/unix/faq_toc.html#TOC16
+ if os.fork(): # launch child and...
+ os._exit(0) # kill off parent
+ os.setsid()
+ if os.fork(): # launch child and...
+ os._exit(0) # kill off parent again.
+ os.umask(077)
+ if dev_null:
+ null=os.open('/dev/null', os.O_RDWR)
+ for i in range(3):
+ try:
+ os.dup2(null, i)
+ except OSError, e:
+ if e.errno != errno.EBADF:
+ raise
+ os.close(null)
+
+def shell_exec(cmd):
+ """Execute cmd in a subshell
+
+ @param cmd: command to execute with os.system(), if given as unicode its
+ converted to str using sys.getfilesystemencoding()
+ @type cmd: basestring
+
+ @returns: exit code of command
+ @rtype: int
+ """
+ if isinstance(cmd, unicode):
+ enc = sys.getfilesystemencoding()
+ assert enc is not None, 'no filesystem encoding (set LANG)'
+ cmd = cmd.encode(enc, 'ignore')
+ return os.system(cmd)
+
+
+def get_multipart(filename, data, name):
+ """Build data in format multipart/form-data to be used to POST binary data.
+
+ @param filename: filename to be used in multipart request
+ @type filenaem: basestring
+
+ @param data: binary data to include
+ @type data: str
+
+ @param name: name to be used in multipart request
+ @type name: basestring
+
+ @returns: tuple containing content-type and body for the request
+ @rtype: (str, str)
+ """
+
+ if isinstance(filename, unicode):
+ filename = filename.encode('utf-8', 'ignore')
+ if isinstance(name, unicode):
+ name = name.encode('utf-8', 'ignore')
+
+ boundary = "-"*20 + ("%f" % time.time()) + "-"*20
+
+ items = []
+ items.append("--" + boundary)
+ items.append('Content-Disposition: form-data; name="%(name)s"; filename="%(filename)s"'\
+ % {'name': name, 'filename': filename})
+ items.append('Content-Type: application/octet-stream')
+ items.append('')
+ items.append(data)
+ items.append('--' + boundary + '--')
+ items.append('')
+
+ body = "\r\n".join(items)
+ content_type = 'multipart/form-data; boundary=%s' % boundary
+
+ return content_type, body
diff --git a/mwlib/web.py b/mwlib/web.py
new file mode 100755
index 0000000..5e32fb8
--- /dev/null
+++ b/mwlib/web.py
@@ -0,0 +1,122 @@
+#! /usr/bin/env python
+
+"""simple wsgi app for serving mediawiki content
+"""
+
+import os
+import mimetypes
+import StringIO
+from mwlib import uparser, htmlwriter, rendermath
+
+class Pngmath(object):
+ def __init__(self, basedir):
+ self.basedir = basedir
+
+ def __call__(self, env, start_response):
+ pi = env['PATH_INFO']
+ path = pi.split('/', 2)[-1]
+ path = path.strip("/")
+ path = path[:-len(".png")]
+
+ pngfile = os.path.join(self.basedir, path+'.png')
+ if not os.path.exists(pngfile):
+ texfile = os.path.join(self.basedir, path+'.tex')
+ if not os.path.exists(texfile):
+ start_response('404 Not found', [('Content-Type', 'text/plain')])
+ return ["404 not found"]
+
+ r = rendermath.Renderer()
+ r._render_file(path, 'png')
+
+
+ d=open(pngfile, 'rb').read()
+
+
+ start_response('200 Ok', [('Content-Type', 'image/png')])
+ return [d]
+
+class Files(object):
+ def __init__(self, basedir):
+ self.basedir = basedir
+
+ def __call__(self, env, start_response):
+ pi = env['PATH_INFO']
+ path = pi.split('/', 2)[-1]
+ path = path.strip("/")
+ assert ".." not in path, "path must not contain '..'"
+
+ mt, enc = mimetypes.guess_type(path)
+
+ try:
+ f=open(os.path.join(self.basedir, path), 'rb')
+ except (IOError, OSError), err:
+ print "ERROR:", err
+ start_response('404 Not found', [('Content-Type', 'text/plain')])
+ return ["404 not found"]
+
+ send = start_response('200 OK', [('Content-type', mt or 'text/plain; charset=utf-8')])
+ while 1:
+ data=f.read(0x20000)
+ if not data:
+ break
+ send(data)
+ return []
+
+
+class Serve(object):
+ head = """<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="content-type" content="text/html; charset="utf-8"></meta>
+<link rel="stylesheet" href="/resources/pedia.css" />
+</head>
+<body>
+"""
+ def __init__(self, db, images):
+ self.db = db
+ self.images = images
+ from mwlib import resources
+ self.resources = Files(os.path.dirname(resources.__file__)) # FIXME
+ self.image_files = Files(os.path.expanduser("~/images")) # FIXME
+ self.pngmath = Pngmath(os.path.expanduser("~/pngmath")) # FIXME
+ self.timeline = Files(os.path.expanduser("~/timeline")) # FIXME
+
+ def show(self, env, start_response):
+ article = unicode(env['PATH_INFO'], 'utf-8').strip('/').replace("_", " ")
+ article = article[:1].upper()+article[1:] # FIXME: we should redirect instead.
+
+ raw=self.db.getRawArticle(article)
+ if not raw:
+ start_response('404 Not found', [('Content-Type', 'text/plain')])
+ return ["Article %r not found" % (article,)]
+
+ send = start_response('200 OK', [('Content-type', 'text/html; charset=utf-8')])
+ send(self.head)
+
+ out=StringIO.StringIO(u"")
+
+ a=uparser.parseString(article, raw=raw, wikidb=self.db)
+ w=htmlwriter.HTMLWriter(out, self.images)
+ w.write(a)
+
+ return [out.getvalue().encode('utf-8')]
+
+ def __call__(self, env, start_response):
+ path = env['PATH_INFO']
+
+
+ if path.startswith("/resources/"):
+ return self.resources(env, start_response)
+ if path.startswith("/images"):
+ return self.image_files(env, start_response)
+ if path.startswith("/pngmath/"):
+ return self.pngmath(env, start_response)
+ if path.startswith("/timeline/"):
+ return self.timeline(env, start_response)
+
+ return self.show(env, start_response)
+
+
+ start_response('404 Not found', [('Content-Type', 'text/plain')])
+ return ["404 Not found"]
diff --git a/mwlib/wiki.py b/mwlib/wiki.py
new file mode 100755
index 0000000..96378ed
--- /dev/null
+++ b/mwlib/wiki.py
@@ -0,0 +1,135 @@
+#! /usr/bin/env python
+
+# Copyright (c) 2007-2008 PediaPress GmbH
+# See README.txt for additional licensing information.
+
+import os
+from ConfigParser import ConfigParser
+
+def wiki_mwapi(base_url=None, license=None, template_blacklist=None):
+ from mwlib import mwapidb
+ return mwapidb.WikiDB(base_url, license, template_blacklist)
+
+def wiki_zip(path=None, url=None, name=None):
+ from mwlib import zipwiki
+ return zipwiki.Wiki(path)
+
+def wiki_net(articleurl=None, url=None, name=None, imagedescriptionurls=None,
+ templateurls=None, templateblacklist=None, defaultarticlelicense=None,
+ defaultauthors=None, **kwargs):
+ from mwlib import netdb
+
+ if templateurls:
+ templateurls = [x for x in templateurls.split() if x]
+ else:
+ raise RuntimeError("templateurls parameter for netdb not set in [wiki] section")
+
+ if imagedescriptionurls:
+ imagedescriptionurls = [x for x in imagedescriptionurls.split() if x]
+ else:
+ raise RuntimeError("imagedescriptionurls parameter for netdb not set in [wiki] section")
+
+ if defaultauthors:
+ defaultauthors = [a.strip() for a in defaultauthors.split(',')]
+
+ return netdb.NetDB(articleurl,
+ imagedescriptionurls=imagedescriptionurls,
+ templateurls=templateurls,
+ templateblacklist=templateblacklist,
+ defaultauthors=defaultauthors,
+ )
+
+def wiki_cdb(path=None, **kwargs):
+ from mwlib import cdbwiki
+ path = os.path.expanduser(path)
+ db=cdbwiki.WikiDB(path)
+ return db
+
+def image_mwapi(base_url=None, shared_base_url=None):
+ from mwlib import mwapidb
+ return mwapidb.ImageDB(base_url, shared_base_url)
+
+def image_download(url=None, localpath=None, knownlicenses=None):
+ assert url, "must supply url in [images] section"
+ from mwlib import netdb
+
+ if localpath:
+ localpath = os.path.expanduser(localpath)
+ urls = [x for x in url.split() if x]
+ assert urls
+
+ if knownlicenses:
+ knownlicenses = [x for x in knownlicenses.split() if x]
+ else:
+ knownlicenses = None
+
+ imgdb = netdb.ImageDB(urls, cachedir=localpath, knownLicenses=knownlicenses)
+ return imgdb
+
+def image_zip(path=None):
+ from mwlib import zipwiki
+ return zipwiki.ImageDB(path)
+
+
+
+dispatch = dict(
+ images = dict(mwapi=image_mwapi, download=image_download, zip=image_zip),
+ wiki = dict(mwapi=wiki_mwapi, cdb=wiki_cdb, net=wiki_net, zip=wiki_zip)
+)
+
+def _makewiki(conf):
+ res = {}
+
+ # yes, I really don't want to type this everytime
+ wc = os.path.join(conf, "wikiconf.txt")
+ if os.path.exists(wc):
+ conf = wc
+
+ if conf.startswith("http://") or conf.startswith("https://"):
+ res['wiki'] = wiki_mwapi(conf)
+ res['images'] = image_mwapi(conf)
+ return res
+
+
+ if conf.lower().endswith(".zip"):
+ from mwlib import zipwiki
+ res['wiki'] = zipwiki.Wiki(conf)
+ res['images'] = zipwiki.ImageDB(conf)
+ return res
+
+ cp=ConfigParser()
+
+ if not cp.read(conf):
+ raise RuntimeError("could not read config file %r" % (conf,))
+
+
+ for s in ['images', 'wiki']:
+ if not cp.has_section(s):
+ continue
+
+ args = dict(cp.items(s))
+ if "type" not in args:
+ raise RuntimeError("section %r does not have key 'type'" % s)
+ t = args['type']
+ del args['type']
+ try:
+ m = dispatch[s][t]
+ except KeyError:
+ raise RuntimeError("cannot handle type %r in section %r" % (t, s))
+
+ res[s] = m(**args)
+
+ assert "wiki" in res
+ return res
+
+def makewiki(conf):
+ res = _makewiki(conf)
+
+ try:
+ overlaydir = os.environ['MWOVERLAY']
+ assert os.path.isdir(overlaydir)
+ import mwlib.overlay
+ res['wiki'] = mwlib.overlay.OverlayDB(res['wiki'], overlaydir)
+ except:
+ pass
+ return res
diff --git a/mwlib/zipwiki.py b/mwlib/zipwiki.py
new file mode 100755
index 0000000..c6893b8
--- /dev/null
+++ b/mwlib/zipwiki.py
@@ -0,0 +1,167 @@
+#! /usr/bin/env python
+
+# Copyright (c) 2008, PediaPress GmbH
+# See README.txt for additional licensing information.
+
+import os
+import shutil
+import simplejson
+import tempfile
+from zipfile import ZipFile
+
+from mwlib.metabook import MetaBook
+from mwlib import uparser
+
+class Wiki(object):
+ def __init__(self, zipfile):
+ """
+ @type zipfile: basestring or ZipFile
+ """
+
+ if isinstance(zipfile, ZipFile):
+ self.zf = zipfile
+ else:
+ self.zf = ZipFile(zipfile)
+ self.metabook = MetaBook()
+ self.metabook.loadJson(self.zf.read("metabook.json"))
+ content = simplejson.loads(self.zf.read('content.json'))
+ self.articles = content['articles']
+ self.templates = content['templates']
+
+ def _getArticle(self, title, revision=None):
+ try:
+ article = self.articles[title]
+ if revision is None or article['revision'] == revision:
+ return article
+ except KeyError:
+ pass
+ return None
+
+ def getRawArticle(self, title, revision=None):
+ article = self._getArticle(title, revision=revision)
+ if article:
+ return article['content']
+ return None
+
+ def getParsedArticle(self, title, revision=None):
+ raw = self.getRawArticle(title, revision=revision)
+ if raw is None:
+ return None
+ a = uparser.parseString(title=title, raw=raw, wikidb=self)
+ return a
+
+ def getURL(self, title, revision=None):
+ article = self._getArticle(title, revision=revision)
+ if article:
+ return article['url']
+ return None
+
+ def getAuthors(self, title, revision=None):
+ article = self._getArticle(title, revision=revision)
+ if article:
+ return article.get('authors', [])
+ return None
+
+ def getTemplate(self, name, followRedirects=True):
+ try:
+ return self.templates[name]['content']
+ except KeyError:
+ pass
+ return None
+
+
+class ImageDB(object):
+ def __init__(self, zipfile, tmpdir=None):
+ """
+ @type zipfile: basestring or ZipFile
+ """
+
+ if isinstance(zipfile, ZipFile):
+ self.zf = zipfile
+ else:
+ self.zf = ZipFile(zipfile)
+ content = simplejson.loads(self.zf.read('content.json'))
+ self.images = content['images']
+ self._tmpdir = tmpdir
+ self.diskpaths = {}
+
+ @property
+ def tmpdir(self):
+ if self._tmpdir is None:
+ self._tmpdir = unicode(tempfile.mkdtemp())
+ return self._tmpdir
+
+ def getDiskPath(self, name, size=None):
+ try:
+ return self.diskpaths[name]
+ except KeyError:
+ pass
+ try:
+ data = self.zf.read('images/%s' % name.replace("'", '-').encode('utf-8'))
+ except KeyError: # no such file
+ return None
+
+ try:
+ ext = '.' + name.rsplit('.', 1)[1]
+ except IndexError:
+ ext = ''
+ if ext.lower() == '.svg':
+ ext = '.svg.png'
+ elif ext.lower() == '.gif':
+ ext = '.gif.png'
+ res = os.path.join(self.tmpdir, 'image%04d%s' % (len(self.diskpaths), ext))
+ self.diskpaths[name] = res
+ f=open(res, "wb")
+ f.write(data)
+ f.close()
+ return res
+
+ def getLicense(self, name):
+ try:
+ return self.images[name]['license']
+ except KeyError:
+ return None
+
+ def getPath(self):
+ raise NotImplemented('getPath() does not work with zipwiki.ImageDB!')
+
+ def getURL(self, name):
+ try:
+ return self.images[name]['url']
+ except KeyError:
+ return None
+
+ def clean(self):
+ if self._tmpdir:
+ shutil.rmtree(self._tmpdir, ignore_errors=True)
+
+
+
+
+class FakeImageDB(ImageDB):
+
+ imagedata = '\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x03 \x00\x00\x01\xe0\x01\x03\x00\x00\x00g\xc9\x9b\xb6\x00\x00\x00\x01sRGB\x00\xae\xce\x1c\xe9\x00\x00\x00\x06PLTE\xff\xff\xff\x00\x00\x00U\xc2\xd3~\x00\x00\x00\tpHYs\x00\x00\x0b\x13\x00\x00\x0b\x13\x01\x00\x9a\x9c\x18\x00\x00\x00EIDATx\xda\xed\xc1\x01\x01\x00\x00\x00\x82 \xff\xafnH@\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00/\x06\xbd`\x00\x01`<5\x84\x00\x00\x00\x00IEND\xaeB`\x82'
+
+ def __init__(self, tmpdir=None):
+ """
+ @type zipfile: basestring or ZipFile
+ """
+ self._tmpdir = tmpdir
+
+ def getDiskPath(self, name, size=None):
+ res = os.path.join(self.tmpdir, 'blank.png')
+ if not os.path.exists(res):
+ open(res, "w").write(self.imagedata)
+ return res
+
+ def getPath(self):
+ raise NotImplemented('getPath() does not work with zipwiki.FakeImageDB!')
+
+ def getURL(self, name):
+ raise NotImplemented('getURL() does not work with zipwiki.FakeImageDB!')
+
+ def getLicense(self, name):
+ raise NotImplemented('getLicense() does not work with zipwiki.FakeImageDB!')
+
+
+