From 826855778765d7b013bf4acb2d31709ff707b8dd Mon Sep 17 00:00:00 2001 From: Walter Bender Date: Wed, 26 Sep 2012 14:35:59 +0000 Subject: new project: datastore analysis scripts --- diff --git a/dsparse.py b/dsparse.py new file mode 100755 index 0000000..6ed5f57 --- /dev/null +++ b/dsparse.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python +#Copyright (c) 2012, Walter Bender + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# You should have received a copy of the GNU General Public License +# along with this library; if not, write to the Free Software +# Foundation, 51 Franklin Street, Suite 500 Boston, MA 02110-1335 USA + +# Simple parser of datastore backups + +import os +import glob +import time + + +DIROFINTEREST = 'datastore-current' + + +class ParseMain(): + + def __init__(self): + self._dsdict = {} + + abspath = os.path.abspath('.') + for path in glob.glob(os.path.join(abspath, '*')): + if isdsdir(path): + self._dsdict[os.path.basename(path)] = [] + dsobjdirs = glob.glob( + os.path.join(path, DIROFINTEREST, '??')) + for dsobjdir in dsobjdirs: + dsobjs = glob.glob(os.path.join(dsobjdir, '*')) + for dsobj in dsobjs: + self._dsdict[os.path.basename(path)].append({}) + activity = isactivity(dsobj) + if not activity: + self._dsdict[os.path.basename(path)][-1][ + 'activity'] = 'media object' + else: + self._dsdict[os.path.basename(path)][-1][ + 'activity'] = activity + if activity == 'TurtleArtActivity': + score = hasturtleblocks(dsobj) + if score: + self._dsdict[os.path.basename(path)][-1][ + 'score'] = score + mime_type = hascomponent(dsobj, 'mime_type') + if mime_type: + self._dsdict[os.path.basename(path)][-1][ + 'mime_type'] = mime_type + mtime = hascomponent(dsobj, 'mtime') + if mtime: + self._dsdict[os.path.basename(path)][-1][ + 'mtime'] = mtime + creation_time = hascomponent(dsobj, 'creation_time') + if creation_time: + self._dsdict[os.path.basename(path)][-1][ + 'creation_time'] = \ + time.strftime('%Y-%m-%dT%H:%M:%S', + time.gmtime(float(creation_time))) + activity_count = hascomponent(dsobj, 'activity count') + if activity_count: + self._dsdict[os.path.basename(path)][-1][ + 'activity count'] = activity_count + + for k, v in self._dsdict.iteritems(): + for i in v: + line = '%s, ' % (k) + if 'activity' in i: + line += '%s, ' % (i['activity']) + else: + line += ', ' + if 'mime_type' in i: + line += '%s, ' % (i['mime_type']) + else: + line += ', ' + if 'activity count' in i: + line += '%s, ' % (i['activity count']) + else: + line += ', ' + if 'creation_time' in i: + line += '%s, ' % (i['creation_time']) + else: + line += ', ' + if 'mtime' in i: + line += '%s, ' % (i['mtime']) + else: + line += ', ' + if 'score' in i: + line += '%s, ' % (i['score']) + else: + line += ', ' + print line + + +def hascomponent(path, component): + ''' Return metadata attribute, if any ''' + if not os.path.exists(os.path.join(path, 'metadata')): + return False + if not os.path.exists(os.path.join(path, 'metadata', component)): + return False + fd = open(os.path.join(path, 'metadata', component)) + data = fd.readline() + fd.close() + if len(data) == 0: + return False + return data + + + +TACAT = {'clean':'forward', 'forward':'forward', 'back':'forward', + 'left':'forward', 'right':'forward', 'arc': 'arc', + 'xcor': 'coord', 'ycor': 'coord', 'heading': 'coord', + 'setxy': 'setxy', 'seth': 'setxy', 'penup': 'pen', 'pendown': 'pen', + 'setpensize': 'pen', 'setcolor': 'pen', 'pensize': 'pen', + 'color': 'pen', 'setshade': 'pen', 'setgray': 'pen', 'shade': 'pen', + 'gray': 'pen', 'fillscreen': 'pen', 'startfill': 'fill', + 'stopfill': 'fill', 'plus2': 'number', 'minus2': 'number', + 'product2': 'number', 'division2': 'number', 'remainder2': 'number', + 'sqrt': 'number', 'identity2': 'number', 'and2': 'boolean', + 'or2': 'boolean', 'not': 'boolean', 'greater2': 'boolean', + 'less2': 'boolean', 'equal2': 'boolean', 'random': 'random', + 'repeat': 'repeat', 'forever': 'repeat', 'if': 'ifthen', + 'ifelse': 'ifthen', 'while': 'ifthen', 'until': 'ifthen', + 'hat': 'action', 'stack': 'action', 'storein': 'box', 'box': 'box', + 'luminance': 'sensor', 'mousex': 'sensor', 'mousey': 'sensor', + 'mousebutton2': 'sensor', 'keyboard': 'sensor', 'kbinput': 'sensor', + 'readpixel': 'sensor', 'see': 'sensor', 'time': 'sensor', + 'sound': 'sensor', 'volume': 'sensor', 'pitch': 'sensor', + 'resistance': 'sensor', 'voltage': 'sensor', 'video': 'media', + 'wait': 'media', 'camera': 'media', 'journal': 'media', + 'audio': 'media', 'show': 'media', 'setscale': 'media', + 'savepix': 'media', 'savesvg': 'media', 'mediawait': 'media', + 'mediapause': 'media', 'mediastop': 'media', 'mediaplay': 'media', + 'speak': 'media', 'sinewave': 'media', 'description': 'media', + 'push':'extras', 'pop':'extras', 'printheap':'extras', + 'clearheap':'extras', 'isheapempty2':'extras', 'chr':'extras', + 'int':'extras', 'myfunction': 'python', 'userdefined': 'python', + 'loadblock': 'python', 'loadpalette': 'python'} +TAPAL = {'forward': 'turtlep', 'arc': 'turtlep', 'coord': 'turtlep', + 'setxy': 'turtlep', 'pen': 'penp', 'fill': 'penp', 'number': 'numberp', + 'random': 'numberp', 'boolean': 'numberp', 'repeat': 'flowp', + 'ifthen': 'flowp', 'action': 'boxp', 'box': 'boxp', + 'sensor': 'sensorp', 'media': 'mediap', 'extras': 'extrasp', + 'python': 'extrasp'} +TASCORE = {'forward': 3, 'arc': 3, 'setxy': 2.5, 'coord': 4, 'turtlep': 5, + 'pen': 2.5, 'fill': 2.5, 'penp': 5, + 'number': 2.5, 'boolean': 2.5, 'random': 2.5, 'numberp': 0, + 'repeat': 2.5, 'ifthen': 7.5, 'flowp': 10, + 'box': 7.5, 'action': 7.5, 'boxp': 0, + 'media': 5, 'mediap': 0, + 'python': 5, 'extras': 5, 'extrasp': 0, + 'sensor': 5, 'sensorp': 0} + + +def hasturtleblocks(path): + ''' Parse turtle block data and generate score based on rubric ''' + + if not os.path.exists(os.path.join(path, 'data')): + return None + fd = open(os.path.join(path, 'data')) + blocks = [] + # block name is second token in each line + for line in fd: + tokens = line.split(',') + if len(tokens) > 1: + token = tokens[1].strip('" [') + blocks.append(token) + + score = 0 + cats = [] + pals = [] + + for b in blocks: + if b in TACAT: + if not TACAT[b] in cats: + cats.append(TACAT[b]) + for c in cats: + if c in TAPAL: + if not TAPAL[c] in pals: + pals.append(TAPAL[c]) + for c in cats: + if c in TASCORE: + score += TASCORE[c] + for p in pals: + if p in TASCORE: + score += TASCORE[p] + + return str(score) + + +def isactivity(path): + ''' Return activity name ''' + activity = hascomponent(path, 'activity') + if not activity: + return False + else: + return activity.split('.')[-1] + + +def isdsdir(path): + ''' Only interested if it is a datastore directory ''' + if not os.path.isdir(path): + return False + if not os.path.exists(os.path.join(path, DIROFINTEREST)): + return False + return True + + +if __name__ == '__main__': + ParseMain() diff --git a/olpcdsparse.py b/olpcdsparse.py new file mode 100755 index 0000000..244c7cb --- /dev/null +++ b/olpcdsparse.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python +#Copyright (c) 2012, Walter Bender + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# You should have received a copy of the GNU General Public License +# along with this library; if not, write to the Free Software +# Foundation, 51 Franklin Street, Suite 500 Boston, MA 02110-1335 USA + +# Simple parser of datastore backups + +import os +import glob +import time + + +DIROFINTEREST = 'datastore' + + +class ParseMain(): + + def __init__(self): + self._dsdict = {} + + homepath = os.environ['HOME'] + for path in glob.glob(os.path.join(homepath, '.sugar', '*')): + if isdsdir(path): + self._dsdict[os.path.basename(path)] = [] + dsobjdirs = glob.glob( + os.path.join(path, DIROFINTEREST, '??')) + for dsobjdir in dsobjdirs: + dsobjs = glob.glob(os.path.join(dsobjdir, '*')) + for dsobj in dsobjs: + self._dsdict[os.path.basename(path)].append({}) + activity = isactivity(dsobj) + if not activity: + self._dsdict[os.path.basename(path)][-1][ + 'activity'] = 'media object' + else: + self._dsdict[os.path.basename(path)][-1][ + 'activity'] = activity + if activity == 'TurtleArtActivity': + score = hasturtleblocks(dsobj) + if score: + self._dsdict[os.path.basename(path)][-1][ + 'score'] = score + mime_type = hascomponent(dsobj, 'mime_type') + if mime_type: + self._dsdict[os.path.basename(path)][-1][ + 'mime_type'] = mime_type + mtime = hascomponent(dsobj, 'mtime') + if mtime: + self._dsdict[os.path.basename(path)][-1][ + 'mtime'] = mtime + creation_time = hascomponent(dsobj, 'creation_time') + if creation_time: + self._dsdict[os.path.basename(path)][-1][ + 'creation_time'] = \ + time.strftime('%Y-%m-%dT%H:%M:%S', + time.gmtime(float(creation_time))) + activity_count = hascomponent(dsobj, 'activity count') + if activity_count: + self._dsdict[os.path.basename(path)][-1][ + 'activity count'] = activity_count + + for k, v in self._dsdict.iteritems(): + for i in v: + line = '%s, ' % (k) + if 'activity' in i: + line += '%s, ' % (i['activity']) + else: + line += ', ' + if 'mime_type' in i: + line += '%s, ' % (i['mime_type']) + else: + line += ', ' + if 'activity count' in i: + line += '%s, ' % (i['activity count']) + else: + line += ', ' + if 'creation_time' in i: + line += '%s, ' % (i['creation_time']) + else: + line += ', ' + if 'mtime' in i: + line += '%s, ' % (i['mtime']) + else: + line += ', ' + if 'score' in i: + line += '%s, ' % (i['score']) + else: + line += ', ' + print line + + +def hascomponent(path, component): + ''' Return metadata attribute, if any ''' + if not os.path.exists(os.path.join(path, 'metadata')): + return False + if not os.path.exists(os.path.join(path, 'metadata', component)): + return False + fd = open(os.path.join(path, 'metadata', component)) + data = fd.readline() + fd.close() + if len(data) == 0: + return False + return data + + +TACAT = {'clean':'forward', 'forward':'forward', 'back':'forward', + 'left':'forward', 'right':'forward', 'arc': 'arc', + 'xcor': 'coord', 'ycor': 'coord', 'heading': 'coord', + 'setxy': 'setxy', 'seth': 'setxy', 'penup': 'pen', 'pendown': 'pen', + 'setpensize': 'pen', 'setcolor': 'pen', 'pensize': 'pen', + 'color': 'pen', 'setshade': 'pen', 'setgray': 'pen', 'shade': 'pen', + 'gray': 'pen', 'fillscreen': 'pen', 'startfill': 'fill', + 'stopfill': 'fill', 'plus2': 'number', 'minus2': 'number', + 'product2': 'number', 'division2': 'number', 'remainder2': 'number', + 'sqrt': 'number', 'identity2': 'number', 'and2': 'boolean', + 'or2': 'boolean', 'not': 'boolean', 'greater2': 'boolean', + 'less2': 'boolean', 'equal2': 'boolean', 'random': 'random', + 'repeat': 'repeat', 'forever': 'repeat', 'if': 'ifthen', + 'ifelse': 'ifthen', 'while': 'ifthen', 'until': 'ifthen', + 'hat': 'action', 'stack': 'action', 'storein': 'box', 'box': 'box', + 'luminance': 'sensor', 'mousex': 'sensor', 'mousey': 'sensor', + 'mousebutton2': 'sensor', 'keyboard': 'sensor', 'kbinput': 'sensor', + 'readpixel': 'sensor', 'see': 'sensor', 'time': 'sensor', + 'sound': 'sensor', 'volume': 'sensor', 'pitch': 'sensor', + 'resistance': 'sensor', 'voltage': 'sensor', 'video': 'media', + 'wait': 'media', 'camera': 'media', 'journal': 'media', + 'audio': 'media', 'show': 'media', 'setscale': 'media', + 'savepix': 'media', 'savesvg': 'media', 'mediawait': 'media', + 'mediapause': 'media', 'mediastop': 'media', 'mediaplay': 'media', + 'speak': 'media', 'sinewave': 'media', 'description': 'media', + 'push':'extras', 'pop':'extras', 'printheap':'extras', + 'clearheap':'extras', 'isheapempty2':'extras', 'chr':'extras', + 'int':'extras', 'myfunction': 'python', 'userdefined': 'python', + 'loadblock': 'python', 'loadpalette': 'python'} +TAPAL = {'forward': 'turtlep', 'arc': 'turtlep', 'coord': 'turtlep', + 'setxy': 'turtlep', 'pen': 'penp', 'fill': 'penp', 'number': 'numberp', + 'random': 'numberp', 'boolean': 'numberp', 'repeat': 'flowp', + 'ifthen': 'flowp', 'action': 'boxp', 'box': 'boxp', + 'sensor': 'sensorp', 'media': 'mediap', 'extras': 'extrasp', + 'python': 'extrasp'} +TASCORE = {'forward': 3, 'arc': 3, 'setxy': 2.5, 'coord': 4, 'turtlep': 5, + 'pen': 2.5, 'fill': 2.5, 'penp': 5, + 'number': 2.5, 'boolean': 2.5, 'random': 2.5, 'numberp': 0, + 'repeat': 2.5, 'ifthen': 7.5, 'flowp': 10, + 'box': 7.5, 'action': 7.5, 'boxp': 0, + 'media': 5, 'mediap': 0, + 'python': 5, 'extras': 5, 'extrasp': 0, + 'sensor': 5, 'sensorp': 0} + + +def hasturtleblocks(path): + ''' Parse turtle block data and generate score based on rubric ''' + + if not os.path.exists(os.path.join(path, 'data')): + return None + fd = open(os.path.join(path, 'data')) + blocks = [] + # block name is second token in each line + for line in fd: + tokens = line.split(',') + if len(tokens) > 1: + token = tokens[1].strip('" [') + blocks.append(token) + + score = 0 + cats = [] + pals = [] + + for b in blocks: + if b in TACAT: + if not TACAT[b] in cats: + cats.append(TACAT[b]) + for c in cats: + if c in TAPAL: + if not TAPAL[c] in pals: + pals.append(TAPAL[c]) + for c in cats: + if c in TASCORE: + score += TASCORE[c] + for p in pals: + if p in TASCORE: + score += TASCORE[p] + + return str(score) + + +def isactivity(path): + ''' Return activity name ''' + activity = hascomponent(path, 'activity') + if not activity: + return False + else: + return activity.split('.')[-1] + + +def isdsdir(path): + ''' Only interested if it is a datastore directory ''' + if not os.path.isdir(path): + return False + if not os.path.exists(os.path.join(path, DIROFINTEREST)): + return False + return True + + +if __name__ == '__main__': + ParseMain() diff --git a/olpctaparse.py b/olpctaparse.py new file mode 100755 index 0000000..421cf96 --- /dev/null +++ b/olpctaparse.py @@ -0,0 +1,202 @@ +#!/usr/bin/env python +#Copyright (c) 2012, Walter Bender + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# You should have received a copy of the GNU General Public License +# along with this library; if not, write to the Free Software +# Foundation, 51 Franklin Street, Suite 500 Boston, MA 02110-1335 USA + +# Simple parser of datastore backups + +import os +import glob +import time + +DIROFINTEREST = 'datastore' + + +class ParseMain(): + + def __init__(self): + self._dsdict = {} + + homepath = os.environ['HOME'] + for path in glob.glob(os.path.join(homepath, '.sugar', '*')): + if isdsdir(path): + self._dsdict[os.path.basename(path)] = [] + dsobjdirs = glob.glob( + os.path.join(path, DIROFINTEREST, '??')) + for dsobjdir in dsobjdirs: + dsobjs = glob.glob(os.path.join(dsobjdir, '*')) + for dsobj in dsobjs: + if not isactivity(dsobj) == 'TurtleArtActivity': + continue + mime_type = hascomponent(dsobj, 'mime_type') + if mime_type != 'application/x-turtle-art': + continue + self._dsdict[os.path.basename(path)].append({}) + activity = isactivity(dsobj) + self._dsdict[os.path.basename(path)][-1][ + 'activity'] = activity + if activity == 'TurtleArtActivity': + score = hasturtleblocks(dsobj) + if score: + self._dsdict[os.path.basename(path)][-1][ + 'score'] = score + creation_time = hascomponent(dsobj, 'creation_time') + if creation_time: + self._dsdict[os.path.basename(path)][-1][ + 'creation_time'] = \ + time.strftime('%Y-%m-%dT%H:%M:%S', + time.gmtime(float(creation_time))) + activity_count = hascomponent(dsobj, 'activity count') + if activity_count: + self._dsdict[os.path.basename(path)][-1][ + 'activity count'] = activity_count + + for k, v in self._dsdict.iteritems(): + for i in v: + line = '%s, ' % (k) + if 'activity' in i: + line += '%s, ' % (i['activity']) + else: + line += ', ' + if 'activity count' in i: + line += '%s, ' % (i['activity count']) + else: + line += ', ' + if 'creation_time' in i: + line += '%s, ' % (i['creation_time']) + else: + line += ', ' + if 'score' in i: + for j in i['score']: + line += '%s, ' % (str(j)) + else: + line += ', ' + print line + + +def hascomponent(path, component): + ''' Return metadata attribute, if any ''' + if not os.path.exists(os.path.join(path, 'metadata')): + return False + if not os.path.exists(os.path.join(path, 'metadata', component)): + return False + fd = open(os.path.join(path, 'metadata', component)) + data = fd.readline() + fd.close() + if len(data) == 0: + return False + return data + + +TACAT = {'clean':'forward', 'forward':'forward', 'back':'forward', + 'left':'forward', 'right':'forward', 'arc': 'arc', + 'xcor': 'coord', 'ycor': 'coord', 'heading': 'coord', + 'setxy': 'setxy', 'seth': 'setxy', 'penup': 'pen', 'pendown': 'pen', + 'setpensize': 'pen', 'setcolor': 'pen', 'pensize': 'pen', + 'color': 'pen', 'setshade': 'pen', 'setgray': 'pen', 'shade': 'pen', + 'gray': 'pen', 'fillscreen': 'pen', 'startfill': 'fill', + 'stopfill': 'fill', 'plus2': 'number', 'minus2': 'number', + 'product2': 'number', 'division2': 'number', 'remainder2': 'number', + 'sqrt': 'number', 'identity2': 'number', 'and2': 'boolean', + 'or2': 'boolean', 'not': 'boolean', 'greater2': 'boolean', + 'less2': 'boolean', 'equal2': 'boolean', 'random': 'random', + 'repeat': 'repeat', 'forever': 'repeat', 'if': 'ifthen', + 'ifelse': 'ifthen', 'while': 'ifthen', 'until': 'ifthen', + 'hat': 'action', 'stack': 'action', 'storein': 'box', 'box': 'box', + 'luminance': 'sensor', 'mousex': 'sensor', 'mousey': 'sensor', + 'mousebutton2': 'sensor', 'keyboard': 'sensor', 'kbinput': 'sensor', + 'readpixel': 'sensor', 'see': 'sensor', 'time': 'sensor', + 'sound': 'sensor', 'volume': 'sensor', 'pitch': 'sensor', + 'resistance': 'sensor', 'voltage': 'sensor', 'video': 'media', + 'wait': 'media', 'camera': 'media', 'journal': 'media', + 'audio': 'media', 'show': 'media', 'setscale': 'media', + 'savepix': 'media', 'savesvg': 'media', 'mediawait': 'media', + 'mediapause': 'media', 'mediastop': 'media', 'mediaplay': 'media', + 'speak': 'media', 'sinewave': 'media', 'description': 'media', + 'push':'extras', 'pop':'extras', 'printheap':'extras', + 'clearheap':'extras', 'isheapempty2':'extras', 'chr':'extras', + 'int':'extras', 'myfunction': 'python', 'userdefined': 'python', + 'loadblock': 'python', 'loadpalette': 'python'} +TAPAL = {'forward': 'turtlep', 'arc': 'turtlep', 'coord': 'turtlep', + 'setxy': 'turtlep', 'pen': 'penp', 'fill': 'penp', 'number': 'numberp', + 'random': 'numberp', 'boolean': 'numberp', 'repeat': 'flowp', + 'ifthen': 'flowp', 'action': 'boxp', 'box': 'boxp', + 'sensor': 'sensorp', 'media': 'mediap', 'extras': 'extrasp', + 'python': 'extrasp'} +TASCORE = {'forward': 3, 'arc': 3, 'setxy': 2.5, 'coord': 4, 'turtlep': 5, + 'pen': 2.5, 'fill': 2.5, 'penp': 5, + 'number': 2.5, 'boolean': 2.5, 'random': 2.5, 'numberp': 0, + 'repeat': 2.5, 'ifthen': 7.5, 'flowp': 10, + 'box': 7.5, 'action': 7.5, 'boxp': 0, + 'media': 5, 'mediap': 0, + 'python': 5, 'extras': 5, 'extrasp': 0, + 'sensor': 5, 'sensorp': 0} +PALS = ['turtlep', 'penp', 'numberp', 'flowp', 'boxp', 'sensorp', 'mediap', + 'extrasp'] + + +def hasturtleblocks(path): + ''' Parse turtle block data and generate score based on rubric ''' + + if not os.path.exists(os.path.join(path, 'data')): + return None + fd = open(os.path.join(path, 'data')) + blocks = [] + # block name is second token in each line + for line in fd: + tokens = line.split(',') + if len(tokens) > 1: + token = tokens[1].strip('" [') + blocks.append(token) + + score = [] + for i in range(len(PALS)): + score.append(0) + cats = [] + pals = [] + + for b in blocks: + if b in TACAT: + if not TACAT[b] in cats: + cats.append(TACAT[b]) + for c in cats: + if c in TAPAL: + if not TAPAL[c] in pals: + pals.append(TAPAL[c]) + for c in cats: + if c in TASCORE: + score[PALS.index(TAPAL[c])] += TASCORE[c] + for p in pals: + if p in TASCORE: + score[PALS.index(p)] += TASCORE[p] + + return score + + +def isactivity(path): + ''' Return activity name ''' + activity = hascomponent(path, 'activity') + if not activity: + return False + else: + return activity.split('.')[-1] + + +def isdsdir(path): + ''' Only interested if it is a datastore directory ''' + if not os.path.isdir(path): + return False + if not os.path.exists(os.path.join(path, DIROFINTEREST)): + return False + return True + + +if __name__ == '__main__': + ParseMain() diff --git a/taparse.py b/taparse.py new file mode 100755 index 0000000..f89cf3d --- /dev/null +++ b/taparse.py @@ -0,0 +1,220 @@ +#!/usr/bin/env python +#Copyright (c) 2012, Walter Bender + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# You should have received a copy of the GNU General Public License +# along with this library; if not, write to the Free Software +# Foundation, 51 Franklin Street, Suite 500 Boston, MA 02110-1335 USA + +# Simple parser of turtleart backups + +import os +import glob +import time + + +DIROFINTEREST = 'datastore-current' + + +class ParseMain(): + + def __init__(self): + self._dsdict = {} + + abspath = os.path.abspath('.') + for path in glob.glob(os.path.join(abspath, '*')): + if isdsdir(path): + self._dsdict[os.path.basename(path)] = [] + dsobjdirs = glob.glob( + os.path.join(path, DIROFINTEREST, '??')) + for dsobjdir in dsobjdirs: + dsobjs = glob.glob(os.path.join(dsobjdir, '*')) + for dsobj in dsobjs: + if not isactivity(dsobj) == 'TurtleArtActivity': + continue + self._dsdict[os.path.basename(path)].append({}) + activity = isactivity(dsobj) + if not activity: + self._dsdict[os.path.basename(path)][-1][ + 'activity'] = 'media object' + else: + self._dsdict[os.path.basename(path)][-1][ + 'activity'] = activity + if activity == 'TurtleArtActivity': + score = hasturtleblocks(dsobj) + if score: + self._dsdict[os.path.basename(path)][-1][ + 'score'] = score + mime_type = hascomponent(dsobj, 'mime_type') + if mime_type: + self._dsdict[os.path.basename(path)][-1][ + 'mime_type'] = mime_type + mtime = hascomponent(dsobj, 'mtime') + if mtime: + self._dsdict[os.path.basename(path)][-1][ + 'mtime'] = mtime + creation_time = hascomponent(dsobj, 'creation_time') + if creation_time: + self._dsdict[os.path.basename(path)][-1][ + 'creation_time'] = \ + time.strftime('%Y-%m-%dT%H:%M:%S', + time.gmtime(float(creation_time))) + activity_count = hascomponent(dsobj, 'activity count') + if activity_count: + self._dsdict[os.path.basename(path)][-1][ + 'activity count'] = activity_count + + for k, v in self._dsdict.iteritems(): + for i in v: + line = '%s, ' % (k) + if 'activity' in i: + line += '%s, ' % (i['activity']) + else: + line += ', ' + if 'mime_type' in i: + line += '%s, ' % (i['mime_type']) + else: + line += ', ' + if 'activity count' in i: + line += '%s, ' % (i['activity count']) + else: + line += ', ' + if 'creation_time' in i: + line += '%s, ' % (i['creation_time']) + else: + line += ', ' + if 'mtime' in i: + line += '%s, ' % (i['mtime']) + else: + line += ', ' + if 'score' in i: + for j in i['score']: + line += '%s, ' % (str(j)) + else: + line += ', ' + print line + + +def hascomponent(path, component): + ''' Return metadata attribute, if any ''' + if not os.path.exists(os.path.join(path, 'metadata')): + return False + if not os.path.exists(os.path.join(path, 'metadata', component)): + return False + fd = open(os.path.join(path, 'metadata', component)) + data = fd.readline() + fd.close() + if len(data) == 0: + return False + return data + + +TACAT = {'clean':'forward', 'forward':'forward', 'back':'forward', + 'left':'forward', 'right':'forward', 'arc': 'arc', + 'xcor': 'coord', 'ycor': 'coord', 'heading': 'coord', + 'setxy': 'setxy', 'seth': 'setxy', 'penup': 'pen', 'pendown': 'pen', + 'setpensize': 'pen', 'setcolor': 'pen', 'pensize': 'pen', + 'color': 'pen', 'setshade': 'pen', 'setgray': 'pen', 'shade': 'pen', + 'gray': 'pen', 'fillscreen': 'pen', 'startfill': 'fill', + 'stopfill': 'fill', 'plus2': 'number', 'minus2': 'number', + 'product2': 'number', 'division2': 'number', 'remainder2': 'number', + 'sqrt': 'number', 'identity2': 'number', 'and2': 'boolean', + 'or2': 'boolean', 'not': 'boolean', 'greater2': 'boolean', + 'less2': 'boolean', 'equal2': 'boolean', 'random': 'random', + 'repeat': 'repeat', 'forever': 'repeat', 'if': 'ifthen', + 'ifelse': 'ifthen', 'while': 'ifthen', 'until': 'ifthen', + 'hat': 'action', 'stack': 'action', 'storein': 'box', 'box': 'box', + 'luminance': 'sensor', 'mousex': 'sensor', 'mousey': 'sensor', + 'mousebutton2': 'sensor', 'keyboard': 'sensor', 'kbinput': 'sensor', + 'readpixel': 'sensor', 'see': 'sensor', 'time': 'sensor', + 'sound': 'sensor', 'volume': 'sensor', 'pitch': 'sensor', + 'resistance': 'sensor', 'voltage': 'sensor', 'video': 'media', + 'wait': 'media', 'camera': 'media', 'journal': 'media', + 'audio': 'media', 'show': 'media', 'setscale': 'media', + 'savepix': 'media', 'savesvg': 'media', 'mediawait': 'media', + 'mediapause': 'media', 'mediastop': 'media', 'mediaplay': 'media', + 'speak': 'media', 'sinewave': 'media', 'description': 'media', + 'push':'extras', 'pop':'extras', 'printheap':'extras', + 'clearheap':'extras', 'isheapempty2':'extras', 'chr':'extras', + 'int':'extras', 'myfunction': 'python', 'userdefined': 'python', + 'loadblock': 'python', 'loadpalette': 'python'} +TAPAL = {'forward': 'turtlep', 'arc': 'turtlep', 'coord': 'turtlep', + 'setxy': 'turtlep', 'pen': 'penp', 'fill': 'penp', 'number': 'numberp', + 'random': 'numberp', 'boolean': 'numberp', 'repeat': 'flowp', + 'ifthen': 'flowp', 'action': 'boxp', 'box': 'boxp', + 'sensor': 'sensorp', 'media': 'mediap', 'extras': 'extrasp', + 'python': 'extrasp'} +TASCORE = {'forward': 3, 'arc': 3, 'setxy': 2.5, 'coord': 4, 'turtlep': 5, + 'pen': 2.5, 'fill': 2.5, 'penp': 5, + 'number': 2.5, 'boolean': 2.5, 'random': 2.5, 'numberp': 0, + 'repeat': 2.5, 'ifthen': 7.5, 'flowp': 10, + 'box': 7.5, 'action': 7.5, 'boxp': 0, + 'media': 5, 'mediap': 0, + 'python': 5, 'extras': 5, 'extrasp': 0, + 'sensor': 5, 'sensorp': 0} +PALS = ['turtlep', 'penp', 'numberp', 'flowp', 'boxp', 'sensorp', 'mediap', + 'extrasp'] + + +def hasturtleblocks(path): + ''' Parse turtle block data and generate score based on rubric ''' + + if not os.path.exists(os.path.join(path, 'data')): + return None + fd = open(os.path.join(path, 'data')) + blocks = [] + # block name is second token in each line + for line in fd: + tokens = line.split(',') + if len(tokens) > 1: + token = tokens[1].strip('" [') + blocks.append(token) + + score = [] + for i in range(len(PALS)): + score.append(0) + cats = [] + pals = [] + + for b in blocks: + if b in TACAT: + if not TACAT[b] in cats: + cats.append(TACAT[b]) + for c in cats: + if c in TAPAL: + if not TAPAL[c] in pals: + pals.append(TAPAL[c]) + for c in cats: + if c in TASCORE: + score[PALS.index(TAPAL[c])] += TASCORE[c] + for p in pals: + if p in TASCORE: + score[PALS.index(p)] += TASCORE[p] + + return score + + +def isactivity(path): + ''' Return activity name ''' + activity = hascomponent(path, 'activity') + if not activity: + return False + else: + return activity.split('.')[-1] + + +def isdsdir(path): + ''' Only interested if it is a datastore directory ''' + if not os.path.isdir(path): + return False + if not os.path.exists(os.path.join(path, DIROFINTEREST)): + return False + return True + + +if __name__ == '__main__': + ParseMain() -- cgit v0.9.1