User:Eira/fix-cp

These files will help you convert Google Groups HTML files from the CP and Fab Five archives to something that has fully establishing URLs. Copy-paste to file names given, and run in a POSIX environment. (Tested on CYGWIN.)

fix-cp.sh: #!/bin/sh

mkdir fixed find CP -type d -exec mkdir "fixed/{}" \; find Fab\ Five -type d -exec mkdir "fixed/{}" \;

find CP -iname "*.htm" -exec ./fix-html.pl "{}" "fixed/{}" \; find Fab\ Five -iname "*.htm" -exec ./fix-html.pl "{}" "fixed/{}" \;

fix-html.pl: #!/usr/bin/perl

my $infile = shift @ARGV; my $outfile = shift @ARGV;

print "converting: $infile > $outfile\n";

open(INFILE, "<", $infile) or die "Can't open $infile for reading."; open(OUTFILE, ">", $outfile) or die "Can't open $outfile for writing.";

while() { my $line = $_;

$line =~ s/\"\//\"http:\/\/groups.google.com\//g; $line =~ s/url\(\//url\(http:\/\/groups.google.com\//g;

print OUTFILE $line; }

close(INFILE); close(OUTFILE);