#! /usr/bin/env perl
BEGIN{$^W=1}  use strict;

use Data::Dumper;
use LWP::Simple;
use HTML::Entities;
use HTTP::Date qw(time2str str2time);

# Fichier temporaire à utiliser comme cache
my $cache_file="/tmp/brice.xml";
# Delay de rafraichissement.
my $refresh_delay = 36000;

###########################################
# CGI : Try to use already processed file
###########################################

if ( -e $cache_file )   {
        # Récupération timestamp
        my @etat=stat($cache_file);
        my $fn_time = $etat[9];

        # Si fichier là depuis moins de 2 heures, réutiliser
        if(time() - $fn_time < $refresh_delay)       {
			# Vérifier que c'est bien nécessaire
			if( ($ENV{'HTTP_IF_MODIFIED_SINCE'}) && (str2time($ENV{'HTTP_IF_MODIFIED_SINCE'}) >= $fn_time) ) {
				print "Status: 304\n";
			}
			else	{
				print "Last-Modified: " . time2str($fn_time) ."\n"; 
#				print "Expires: " . time2str($fn_time+$refresh_delay) . "\n";
				print "Content-type: text/xml\n\n";
			
				open CACHE, "<$cache_file";
               	while(<CACHE>)  {
               		print $_;
               	}
			}
            exit;
        }
}

# Ecriture headers CGI
print "Last-Modified: " . time2str() ."\n"; 
#print "Expires: " . time2str(time()+$refresh_delay) . "\n";
print "Content-type: text/xml\n\n";


#############################################
# Load the main page, parse posts
#############################################
	# Idiots don't implement etag or last-modified, so we have to eat up 70k every time
my $main_page = get('http://brice.the-asw.com/')
	or die "Unable to fetch page.\n\t";

decode_entities($main_page);

my @stories;
while ($main_page =~ m#<div class="news">\s*<div class="date">\s*.{10,200}\s*</div>\s*<h3>(.{1,200})</h3>\s*<div class="texte">\s*(.+?)</div>\s*<div class="auteur">\s*<p>(.{1,200})</p>\s*</div>\s*<hr />\s*</div>#gis) {
	my $story = {};

#	print "Found $1\n";
	
	my ($link, $title, $body, $summary, $auteur) = ("http://brice.the-asw.com/", $1, $2, $2, $3);

	# Images
	$body =~ s#src="(?!(http|ftp|https)://)#src='http://brice.the-asw.com/#gi;
	# Links
	$body =~ s#href="(?!(mailto|http|ftp|https):)#href='http://brice.the-asw.com/#gi;
	
	# Remove HTML stuff
	$summary =~ s/<(?:[^>'"]*|(['"]).*?\1)*>//gs;
	if(length($summary) > 400 )	{
		my $i=300;
		while( ($i < length($summary)) && (substr($summary, $i, 1) !~ /\s/) ) {	
			$i++;
		}
		$summary = substr($summary,0,$i) . '...';
	}
	
	$story->{link} = $link;
	$story->{summary} = HTML::Entities::encode_entities($summary, '<>"\'&');
	$story->{title} = HTML::Entities::encode_entities($title, '<>"\'&');  
	$story->{description} = HTML::Entities::encode_entities($body, '<>"\'&');  
	$story->{auteur} = $auteur;
	
	push(@stories, $story);
}



#############################################
# Output RSS file
#############################################
#my $zero_dir = dirname $0;
open RSS, ">$cache_file"	or die "Unable to write file $cache_file : $!";
print RSS  <<"EOF";
<?xml version="1.0" encoding="iso-8859-1"?>
<rdf:RDF
  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  xmlns="http://purl.org/rss/1.0/"
  xmlns:content="http://purl.org/rss/1.0/modules/content/"
  xmlns:dc="http://purl.org/dc/elements/1.1/"
>
    <channel rdf:about="http://trolleur.net/cgi-bin/brice.pl">
           <title>The BRICE site web</title>
           <link>http://brice.the-asw.com/</link>
           <description>Complètement bienvenue sur le site web de Brice de Nice !</description>
           <language>fr</language>
		   
		   <items>
		   	<rdf:Seq>
EOF

foreach my $story (@stories)	{
	print RSS << "EOF";
				<rdf:li rdf.resource="$story->{link}"/>
EOF
}

print RSS <<"EOF";
			 </rdf:Seq>
			</items>
		</channel>
EOF

foreach my $story (@stories)
{
	print RSS <<"EOF";
        <item rdf:about="$story->{link}">
                <title>$story->{title}</title>
                <link>$story->{link}</link>
                <description>$story->{summary}</description>
				<dc:creator>$story->{auteur}</dc:creator>
				<content:encoded>$story->{description}</content:encoded>
			</item>
EOF
}
print RSS "</rdf:RDF>\n";
close RSS;

# finalement, ressort le cache
open CACHE, "<$cache_file";
	while(<CACHE>)	{
		print $_;
	}


