#!/usr/bin/perl # Author: Peter R. Wood, http://prwdot.org/ use strict; use LWP::Simple; use XML::RAI; my $output_dir = ""; my $feed_url = ""; my $content = LWP::Simple::get($feed_url); die "Couldn't get content for $feed_url: $!" unless $content; my $rai = XML::RAI->parse($content); print $rai->channel->title . "\n"; print $rai->channel->description . "\n"; print $rai->channel->link . "\n"; my $plainchanneltitle = $rai->channel->title; $plainchanneltitle =~ s/\W//g; my $channeldir = "$output_dir/$plainchanneltitle"; if ( !-d "$channeldir" ) { mkdir("$channeldir"); mkdir("$channeldir/sections"); } foreach my $item ( @{ $rai->items } ) { print $item->title . " "; my $plaintitle = $item->title; $plaintitle =~ s/\W//g; my $item_content = LWP::Simple::get( $item->link ); # Remove all but text $item_content =~ s/.*?(.*?).*?/$1/sg; # Mark P tags for later $item_content =~ s/

/--STARTP--/sgi; $item_content =~ s/<\/p>/--ENDP--/sgi; # Remove BR tags $item_content =~ s///sgi; # Get rid of all other tags $item_content =~ s/<.+?>//sgi; # Put paragraphs back in $item_content =~ s/--STARTP--/

/sg; $item_content =~ s/--ENDP--/<\/P>/sg; my @sections = split( /[\n\r]+/, $item_content ); my $section_text = ''; my $section_num = 1; foreach my $section (@sections) { $section =~ s/[\n\r]//sg; if ( length( $section_text . $section ) > 2000 ) { my $sectionfile = "$channeldir/sections/$plaintitle" . "_" . $section_num; open( SECTION, ">$sectionfile" ); print SECTION "" . $item->title . ""; print SECTION "$section_text\n"; print SECTION "
\n"; print SECTION "Continue Reading" if ( $sections[$section_num] ); close(SECTION); $section_text = ''; print "$section_num "; $section_num++; } else { $section_text = join( "\n", ( $section_text, $section ) ); } } print "\n"; my $masterfile = "$channeldir/$plaintitle.link"; open( MASTER, ">$masterfile" ); print MASTER "Section 1"; close(MASTER); }