Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/util/odf/odfmanifest.py
blob: 07754fd2f3429009f26461ab02eac058b09fce2b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2006-2007 Søren Roug, European Environment Agency
# 
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
# 
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
# 
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
#
# Contributor(s):
#

# This script lists the content of the manifest.xml file
import zipfile
from xml.sax import make_parser,handler
from xml.sax.xmlreader import InputSource
import xml.sax.saxutils
from cStringIO import StringIO

MANIFESTNS="urn:oasis:names:tc:opendocument:xmlns:manifest:1.0"

#-----------------------------------------------------------------------------
#
# ODFMANIFESTHANDLER
#
#-----------------------------------------------------------------------------

class ODFManifestHandler(handler.ContentHandler):
    """ The ODFManifestHandler parses a manifest file and produces a list of
        content """

    def __init__(self):
        self.manifest = {}

        # Tags
        # FIXME: Also handle encryption data
        self.elements = {
        (MANIFESTNS, 'file-entry'): (self.s_file_entry, self.donothing),
        }

    def handle_starttag(self, tag, method, attrs):
        method(tag,attrs)

    def handle_endtag(self, tag, method):
        method(tag)

    def startElementNS(self, tag, qname, attrs):
        method = self.elements.get(tag, (None, None))[0]
        if method:
            self.handle_starttag(tag, method, attrs)
        else:
            self.unknown_starttag(tag,attrs)

    def endElementNS(self, tag, qname):
        method = self.elements.get(tag, (None, None))[1]
        if method:
            self.handle_endtag(tag, method)
        else:
            self.unknown_endtag(tag)

    def unknown_starttag(self, tag, attrs):
        pass

    def unknown_endtag(self, tag):
        pass

    def donothing(self, tag, attrs=None):
        pass

    def s_file_entry(self, tag, attrs):
        m = attrs.get((MANIFESTNS, 'media-type'),"application/octet-stream")
        p = attrs.get((MANIFESTNS, 'full-path'))
        self.manifest[p] = { 'media-type':m, 'full-path':p }


#-----------------------------------------------------------------------------
#
# Reading the file
#
#-----------------------------------------------------------------------------

def manifestlist(manifestxml):
    odhandler = ODFManifestHandler()
    parser = make_parser()
    parser.setFeature(handler.feature_namespaces, 1)
    parser.setContentHandler(odhandler)
    parser.setErrorHandler(handler.ErrorHandler())

    inpsrc = InputSource()
    inpsrc.setByteStream(StringIO(manifestxml))
    parser.parse(inpsrc)

    return odhandler.manifest

def odfmanifest(odtfile):
    z = zipfile.ZipFile(odtfile)
    manifest = z.read('META-INF/manifest.xml')
    z.close()
    return manifestlist(manifest)

if __name__ == "__main__":
    import sys
    result = odfmanifest(sys.argv[1])
    for file in result.values():
        print "%-40s %-40s" % (file['media-type'], file['full-path'])