#! /usr/bin/perl # Fix-camwest-pages.pl # To automate the editing of CAMWEST web pages to # remove the old table-based layout, replace with new CSS based layout. # Also replace old menu with new dynamic CSS menu. # CAMWEST web site is at http://camwest.pps.com.au/ # foreach html file, if there is no .shtml file, # 0. slurp old html file into memory since some tags are multiline. # (no html files on the site challenge RAM in this machine) # 1. read memory string, from the extract: # a. tag and contents # b. all <meta> tags # 2. continue reading memory string, from the body extract: # content, starting by extracting the first heading after # '#BeginEditable "Title"' # then skipping until reach comment '#BeginEditable "Main"' # finishing just before at the last </td> before # '<!--Second Column-->' # 3. Write the .shtml file, using a template file, # substituting the title and meta in the head, # substituting the content in the body. # If there is no '#BeginEditable "Main"' or '<!--Second Column-->' # then we print an error message and stop processing that html file. # Copyright (C) 2006 Nick Urbanik <nicku@nicku.org> # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. use warnings; use strict; use Getopt::Long; use Carp; sub check_file($) { my ( $htmlfile ) = @_; return unless $htmlfile =~ /^(.*)\.html?$/; my $base = $1; return if -e "$base.shtml"; return $base; } sub slurp_file($) { my ( $htmlfile ) = @_; open my $html_fh, '<', $htmlfile or die "unable to open $htmlfile: $!"; return do { local $/; <$html_fh> }; } sub extract_header_info(\$) { my ( $text_ref ) = @_; my ( $title, @meta ); if ( $$text_ref =~ m{(<title>.*?) }xms ) { $title = $1; } while ( $$text_ref =~ m{ () }gxms ) { push @meta, $1; } return unless $title; return ( $title, @meta ); } sub extract_body(\$) { my ( $text_ref ) = @_; my ( $first_header, $content ); if ( $$text_ref =~ m{ \s* (.*?) }xms ) { $first_header = $1; } if ( $$text_ref =~ m{ \s* (\S.*?) (?:)? \s* \s* ]*> \s* }xms ) { $content = $1; $content =~ s{\s*

(?:\ )?\s*

\s*}{}gxms; } return unless $first_header and $content; return ( $first_header, \$content ); } sub write_using_template($$$$\$@) { my ( $output_filename, $template_filename, $title, $first_heading, $content_ref, @meta ) = @_; ### print "TITLE='$title'\n"; ### print "META='@meta'\n"; ### print "FIRST_HEADING='$first_heading'\n"; ### print "CONTENT=\n'$$content_ref'"; die "WAS ABOUT TO CLOBBER '$output_filename'\n" if -e $output_filename; open my $template_fh, '<', $template_filename or die "Unable to open template file '$template_filename'\n: $!"; open my $shtml_fh, '>', $output_filename or die "Unable to write to '$output_filename': $!"; my $meta = join "\n ", @meta; while ( <$template_fh> ) { s{__META__}{$meta}xms; s{__TITLE__}{$title}xms; s{__FIRST_HEADER__}{$first_heading}xms; s{__MAIN_BODY__}{$$content_ref}xms; print $shtml_fh $_; } } sub check_ok($$$) { my ( $name, $var, $file ) = @_; warn "$name is empty in file '$file'\n" unless $var; } sub usage() { ( my $prog = $0 ) =~ s{^.*/}{}xms; print < \$template_filename ) or usage; usage unless $template_filename; HTMLFILE: foreach my $htmlfile ( @ARGV ) { my $base = check_file $htmlfile; check_ok '$base', $base, $htmlfile; next HTMLFILE unless $base; my $text = slurp_file $htmlfile; check_ok '$text', $text, $htmlfile; my ( $title, @meta ) = extract_header_info $text; check_ok '$title', $title, $htmlfile; check_ok '$meta[ 0 ]', $meta[ 0 ], $htmlfile; next HTMLFILE unless $title; my ( $first_heading, $content_ref ) = extract_body $text; warn "Problem with $htmlfile\n" unless $content_ref; next HTMLFILE unless $content_ref; write_using_template "$base.shtml", $template_filename, $title, $first_heading, $$content_ref, @meta; print qx/tidy -modify -indent -asxhtml -clean --doctype strict "$base.shtml"/; } } main();