#! /usr/bin/perl -w

# NAME
#     fetch.pl - RSS/Atom tiny aggregator
#
# SYNOPSIS
#
#     fetch.pl [config.conf]
#
# DESCRIPTION
#
#     Fetch RSS/Atom feeds read from config file, and generate HTML.
#     If no argument is supplied, config is read from "fetch.conf".
#     HTML file name is filename minus its extension: so config file
#     "fetch.conf" generates "fetch.html".
#
#     Change '$base' to set another HTML directory, and '$max_entries'
#     to set the maximum number of entries shown per feed.
#
#     Mandatory perl modules are:
#       - Encode
#       - LWP::Simple
#       - XML::RSS::Feed
#       - XML::Atom
#
# AUTHOR
#     oz (@tuxaco dot net)
#     http://oz.tuxaco.net/

use strict;
use warnings;

use Encode;
use LWP::Simple qw ( $ua );
use XML::RSS::Feed;
use XML::Atom::Entry;

# Base conf settings you may change
my $base = '/home/oz/whatever';
my $agent = 'Mozilla 5.0';
my $conf = shift || 'fetch.conf' || die("I need a conf. file.\n");
my $max_entries = 12;
my $encoding = 'utf8';

# -----------------------------------------------------------------------------
my $name = $conf;
$name =~ s/^(.*\/)?(.*)\.conf$/$2/g;
my $files = read_conf($conf);
my $output = "$base/$name.html";
my $now = scalar localtime();
my $out = '';

defined $files or die("No feeds ? $!");

# Begin HMTL file
add(<<"HTML");
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml"; xml:lang="en" lang="en">
<html>
  <head>
    <title>My tiny aggregator</title>
    <link rel="stylesheet" href="style.css" media="screen" name="default" />
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
  </head>
  <body>
    <div id="itemlist">
    <div id="lastupdate">
      Reading: $name<br />
      Last update: $now
    </div>
HTML

status("Start parsing: \n");
$ua->agent($agent);

# Each feed in config file is fetched and parsed
foreach my $feed ( @$files ) {
    my $item_count = 0;
    my $file = $feed->{'url'};
    my $data = undef;

    $file =~ s|.*/||;
    status("    * $feed->{'name'}... ");

    # Get feed
    my $query = $ua->get($feed->{'url'});
    if ($query->is_success) {
        $data = $query->content;
    } else {
        status("Failed (HTTP error).\n");
        next;
    }

    # Parse feed
    my $channel = ($data =~ /<(rss|rdf)/imo) ? rss($data) : atom($data);
    unless ($channel) {
        status("Failed parsing.\n");
        next;
    };

    # Print feed
    my $desc  = encode( $encoding, $channel->{'desc'} );
    my $title = encode( $encoding, $channel->{'title'} );
    add(<<"HTML");
  <div class="item">
HTML

    add(qq|    <h1><a href="$channel->{'link'}" title="$feed->{'name'}">|);
    add(qq|$title</a></h1>\n|);
    add(qq|    <p>$desc</p>\n|);
    add("    <ul>\n");

    # Print feed entries
    foreach my $item ( @{ $channel->{'entries'} } )
    {
        my $title = encode(
            $encoding,
            defined ($item->{'title'}) ? $item->{'title'} : $feed->{'name'}
        );
        add(qq|      <li><a href="$item->{'link'}">$title</a></li>\n|);
        $item_count++;
        last if ($item_count >= $max_entries);
    }

    add(<<HTML);
    </ul>
  </div>
HTML
    status("Ok.\n");
}

# End HTML
add(<<HTML);
    </div>
    </body>
</html>
HTML

umask(0022) or die("Can't change file creation mask: $!");
open (OUT, "> $output") or die("Output file ($output): $!");
print OUT $out;
close OUT;

status("\nAll done, bye.\n");

# -----------------------------------------------------------------------------
# Returns entry list for an RSS feed (in $data);
sub rss
{
    my ($data) = @_;

    # ugly hack 'cause lib-expat often misses the 15 encoding while 
    # the first version remains compatible in 99.99% cases.
    if ( $data =~ /encoding="iso-8859-15"\?>/io ) {
        $data =~ s/encoding="iso-8859-15"/encoding="iso-8859-1"/i;
    }

    my $rss = XML::RSS->new();
    status(" [R] ");
    eval {
        $rss->parse($data);
    } or return undef;
    return { 'link'    => $rss->{'channel'}->{'link'},
             'title'   => $rss->{'channel'}->{'title'},
             'desc'    => $rss->{'channel'}->{'description'},
             'entries' => $rss->{'items'},
    };
}

# Returns entry list for an Atom feed (in $data);
sub atom
{
    my ($data) = @_;
    my $feed;
    my @my_entries;

    status(" [A] ");
    eval {
        $feed = XML::Atom::Feed->new(\$data);
    } or return undef;
    return undef unless $feed;

    map { push @my_entries, {
            'title' => $_->title ? $_->title : 'No title ?',
            'link'  => atom_html_link($_),
            }
        } $feed->entries;
    return {
        'title'   => $feed->title   ? $feed->title   : 'No title ?',
        'desc'    => $feed->tagline ? $feed->tagline : 'No description',
        'link'    => atom_html_link($feed),
        'entries' => \@my_entries,
    };
}

# Get first text/html link from an XML::Atom::Entry link list.
sub atom_html_link
{
    my ($link) = @_;
    my @linklist = $link->link;

    foreach my $link (@linklist) {
        next if ($link->type ne 'text/html'
             or  $link->rel  ne 'alternate');
       return $link->href;
    }
    return '';
}

# Read config.
sub read_conf
{
    my ($file) = @_;
    my @items = ();

    open (CONF, $file) or die("Can't open '$file': $!.\n");
    while (my $line = <CONF>) {
        chomp $line;
        next if $line =~ /^\s*#|^\s*$/;

        my $feed;
        if ( $line =~ /^(.*?)\s*=\s*(.*)$/ ) {
            $feed->{'name'} = $1;
            $feed->{'url'} = $2;
        }
        push @items, $feed;
    }
    close CONF;
    return \@items;
}

# Append args to $out
sub add
{
    $out .= shift;
}

# Print status message on standard error
sub status
{
    my ($str) = @_;
    return unless $str;
    print STDERR $str;
}