Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/mwlib/metabook.py
blob: e36e70df1dae70b91440e8aa9034a4864b8d05c2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#! /usr/bin/env python
#! -*- coding:utf-8 -*-

import re
import simplejson

"""
See METABOOK.txt for description of Metabook data
"""

class MetaBook(object):
    """Encapsulate meta information about an article collection"""

    title = u""
    subtitle = u""
    
    def __init__(self):
        self.type = 'collection'
        self.version = 1
        self.items = []
    
    def addArticles(self, articleTitles, chapterTitle=None, contentType='text/x-wiki'):
        """
        @param articleTitles: sequence of article titles or dicts containing
            article title (value for key 'title') and optionally display title
            (value for key 'displaytitle').
        @type articleTitles: [unicode|{str: unicode}]
        """
        
        articles = []
        for title in articleTitles:
            article = {
                'type': 'article',
                'content-type': contentType,
            }
            if isinstance(title, dict):
                article.update(title)
            else:
                article['title'] = title
            articles.append(article)
        if chapterTitle:
            self.items.append({
                'type': 'chapter',
                'title': chapterTitle,
                'items': articles,
            })
        else:
            self.items.extend(articles)
    
    def dumpJson(self):
        return simplejson.dumps(vars(self))

    def loadJson(self, jsonStr):
        for (var, value) in simplejson.loads(jsonStr).items():
            setattr(self, var, value)
    
    def readJsonFile(self, filename):
        self.loadJson(open(filename, 'rb').read())
    
    def loadCollectionPage(self, mwcollection):
        """Parse wikitext of a MediaWiki collection page
        
        @param mwcollection: wikitext of a MediaWiki collection page as created by
            the Collection extension for MediaWiki
        @type mwcollection: unicode
        """
        
        titleRe = '^==\s+(?P<title>.*?)\s+==$'
        subtitleRe = '^===\s+(?P<subtitle>.*?)\s+===$'
        chapterRe = '^;(?P<chapter>.*?)$'
        articleRe = '^:\[\[:?(?P<article>.*?)(?:\|(?P<displaytitle>.*?))?\]\]$'
        alltogetherRe = re.compile("(%s)|(%s)|(%s)|(%s)" % (titleRe, subtitleRe, chapterRe, articleRe))
        gotChapter = False
        chapter = ''
        articles =  []
        for line in mwcollection.splitlines():
            res = alltogetherRe.search(line.strip())
            if not res:
                continue
            if res.group('title'):
                self.title = res.group('title')
            elif res.group('subtitle'):
                self.subtitle = res.group('subtitle')
            elif res.group('chapter'):
                self.addArticles(articles, chapter)
                articles = []
                chapter = res.group('chapter')
            elif res.group('article'):
                d = {'title': res.group('article')}
                if res.group('displaytitle'):
                    d['displaytitle'] = res.group('displaytitle')
                articles.append(d)
        
        if len(articles):
            self.addArticles(articles, chapter)
    
    def getArticles(self):
        """Generator that produces a sequence of (title, revision) pairs for
        each article contained in this collection. If no revision is specified,
        None is returned for the revision item.
        """
        
        for item in self.getItems():
            if item['type'] == 'article':
                yield item['title'], item.get('revision', None)
    
    def getItems(self):
        """Generator that produces a flattened list of chapters and articles
        in this collection.
        """
        
        for item in self.items:
            if item['type'] == 'article':
                yield item
            elif item['type'] == 'chapter':
                yield item
                for article in item.get('items', []):
                    yield article