#!/usr/bin/perl use strict; use Data::Dumper; # # Remove already created screenshots from a spip xml dump # # Copyright (C) 2006 Bruno Coudoin. # # spip_screenshot_remove is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # version 2 published by the Free Software Foundation. # # Intltool is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # # Authors: Bruno Coudoin # # ----------------------------------------------------------------------------------------- # USAGE: # This tool requires the spip xml database as the first parameter. # The new base will go on stdout #------------------------------------------------------------------------------------------ if (! $ARGV[0] || ! -f $ARGV[0]) { print "Usage: spip_screenshot_remove \n"; } open(SPIP, "<$ARGV[0]"); my $in = 0; my $date; my $tmp_txt; my $current_article; while (){ my $line = $_; my $article = ($line =~ /([0-9]+)<\/id_article>/)[0]; $current_article = $article if($article); chomp($line); #print ":$line:\n"; if($line =~ /\/) { $in = 1; if ($tmp_txt != "\n") { print $tmp_txt; } $tmp_txt = $tmp_txt . $line . "\n"; } elsif($line =~ /<\/article>/) { if($in == 0) { # It's a valid article print $tmp_txt . $line . "\n"; } else { #print "FILTER OUT: $current_article\n"; } $in = 0; $date = ""; $tmp_txt = ""; } else { $tmp_txt = $tmp_txt . $line . "\n"; } if($in) { # We are in an article, track the date fields # If all 3 dates are the same, then it's a generated article. for my $field qw/date date_redac date_modif/ { if($line =~ /<$field>/) { my $newdate = ($line =~ /<$field>([0-9\-: ]+)<\/$field>/)[0]; if($date == "") { $date = $newdate; } elsif($newdate != $date) { $in = 0; $date = ""; } } } } } print $tmp_txt; close SPIP; exit 0;