#!/usr/bin/perl # Author: Peter R. Wood, http://prwdot.org/ use strict; use LWP::Simple; use XML::RAI; my $output_dir = ""; my $feed_url = ""; my $content = LWP::Simple::get($feed_url); die "Couldn't get content for $feed_url: $!" unless $content; my $rai = XML::RAI->parse($content); print $rai->channel->title . "\n"; print $rai->channel->description . "\n"; print $rai->channel->link . "\n"; my $plainchanneltitle = $rai->channel->title; $plainchanneltitle =~ s/\W//g; my $channeldir = "$output_dir/$plainchanneltitle"; if ( !-d "$channeldir" ) { mkdir("$channeldir"); mkdir("$channeldir/sections"); } foreach my $item ( @{ $rai->items } ) { print $item->title . " "; my $plaintitle = $item->title; $plaintitle =~ s/\W//g; my $item_content = LWP::Simple::get( $item->link ); # Remove all but text $item_content =~ s/.*?(.*?).*?/$1/sg; # Mark P tags for later $item_content =~ s/
/--STARTP--/sgi;
$item_content =~ s/<\/p>/--ENDP--/sgi;
# Remove BR tags
$item_content =~ s/ /sg;
$item_content =~ s/--ENDP--/<\/P>/sg;
my @sections = split( /[\n\r]+/, $item_content );
my $section_text = '';
my $section_num = 1;
foreach my $section (@sections) {
$section =~ s/[\n\r]//sg;
if ( length( $section_text . $section ) > 2000 ) {
my $sectionfile =
"$channeldir/sections/$plaintitle" . "_" . $section_num;
open( SECTION, ">$sectionfile" );
print SECTION "
\n";
print SECTION "Continue Reading"
if ( $sections[$section_num] );
close(SECTION);
$section_text = '';
print "$section_num ";
$section_num++;
}
else {
$section_text = join( "\n", ( $section_text, $section ) );
}
}
print "\n";
my $masterfile = "$channeldir/$plaintitle.link";
open( MASTER, ">$masterfile" );
print MASTER "Section 1";
close(MASTER);
}