#!/usr/bin/perl #-*-cperl-*- unless (@ARGV) { my $self=$0; $self=~s(^.*/)(); print "$self - fix broken Latin-2 encoding in OpenOffice.org\n"; print " documents imported from M\$ Office\n\n"; print "May be distributed under GNU General Public Licence (Version 2)\n"; print "Copyright (c) 2002 by Petr Pajas\n\n"; print "Usage: $self [ ...]\n\n"; } my %map = ( 'È' => 'Č', '©' => 'Š', 'è' => 'č', '«' => 'Ť', 'Ì' => 'Ě', '®' => 'Ž', 'ì' => 'ě', 'Ï' => 'Ď', 'ï' => 'ď', 'Ò' => 'Ň', 'ò' => 'ň', '¹' => 'š', 'Ø' => 'Ř', 'Ù' => 'Ů', 'ø' => 'ř', 'ù' => 'ů', '»' => 'ť', '¾' => 'ž', ); my $re=join('|',keys(%map)); foreach my $f (@ARGV) { print "Fixing $f:\n"; my $tmpzip="/tmp/oo__convert-$$.zip"; my $tmpdir="/tmp/oo__convert-$$"; (system("unzip -d $tmpdir $f")==0) || die "$f: unzip failed!"; chmod 0700, $tmpdir; foreach my $file (glob("$tmpdir/*.xml"),glob("$tmpdir/*/*.xml")) { print " fixing XML file $file\n"; open my $F,$file; my $content=join("",<$F>); close $F; $content=~s/$re/$map{$&}/eg; $content=~s/font(-name|-family)?=(["'])([^"']+) CE\d?\1/font$1=$2$3$2/ig; open my $F,">$file"; print $F $content; close $F; } rename $f,"$f~"; system "cd $tmpdir; zip -r $tmpzip *"; system "mv $tmpzip $f"; system "rm -rf $tmpdir"; }