#!/usr/bin/perl ############################################################################## # By BumbleBeeWare.com 2006 # Simple RRS XML grabber and parser # rss.cgi # simple program to display rss from text based xml files on your web pages ############################################################################## # the url of the xml file you want to access include the http:// $feedurl = "http://bumblebeeware.com/rssxml/test.xml"; # set max number of seconds to access feed $timeout = 30; ################################################### # MAIN PROGRAM ################################################### use LWP::UserAgent; $ua = new LWP::UserAgent; # identify this user agent to the website being crawled $ua->agent("http://bumblebeeware.com RSS for PERL"); # set time out for accessing page $ua->timeout($timeout); # get page from actual website $req = HTTP::Request->new(GET => "$feedurl"); $res = $ua->request($req); if ($res->is_success) { $tempdata = $res->content; # strip the file and make a datafile &print_page; } ################################################### # SUBROUTINES ################################################### sub print_page { print "Content-Type: text/html\n\n"; &parse_data; exit; } sub parse_data { @lines = $tempdata; @lines = split(/\n/,$tempdata); # count pages accessed foreach $line (@lines) { chomp $line; # break down the file into seperate records using the tag if ($line =~ /\/i){ $linecount++; $newrecord = 1;} if ($line =~ /\<\/item\>/i){$newrecord = 0;} if ($newrecord == 1){$LINE{$linecount} = "$LINE{$linecount} $line";} } foreach $linecount (sort keys %LINE) { ##################################################################################### # you may need to modify these fields to match the xml feed you are accessing # these seem to be the most common fields used although anything is possible with xml # title link url description image ##################################################################################### $title = ""; $link = ""; $description = ""; $url = ""; $image = ""; # get each field if ($LINE{$linecount} =~ /(.*)<\/title>/i){$title = $1;} if ($LINE{$linecount} =~ /<link>(.*)<\/link>/i){$link = $1;} if ($LINE{$linecount} =~ /<description>(.*)<\/description>/i){$description = $1;} if ($LINE{$linecount} =~ /<url>(.*)<\/url>/i){$url = $1;} if ($LINE{$linecount} =~ /<image>(.*)<\/image>/i){$image = $1;} # remove any CDATA junk used to hold the xml together if ($title =~ /\<\!\[CDATA\[(.*)\]\]/){$title = $1;} if ($link =~ /\<\!\[CDATA\[(.*)\]\]/){$link = $1;} if ($description =~ /\<\!\[CDATA\[(.*)\]\]/){$description = $1;} if ($url =~ /\<\!\[CDATA\[(.*)\]\]/){$url = $1;} if ($image =~ /\<\!\[CDATA\[(.*)\]\]/){$image = $1;} # change this line to customize your actual html output print "<p align=left><a href=\"$link\"><b><font face=\"arial\">$title</font></b></a><font face=\"arial\"> - $description</font></p>\n"; } }