#! /usr/bin/env perl
BEGIN{$^W=1}  use strict;

use Data::Dumper;
use LWP::Simple;
use HTML::Entities;
use HTTP::Date qw(time2str str2time);

# Fichier temporaire à utiliser comme cache
my $cache_file="/tmp/guillermito.xml";
# Delay de rafraichissement.
my $refresh_delay = 7200;

###########################################
# CGI : Try to use already processed file
###########################################

if ( -e $cache_file )   {
        # Récupération timestamp
        my @etat=stat($cache_file);
        my $fn_time = $etat[9];

        # Si fichier là depuis moins de 2 heures, réutiliser
        if(time() - $fn_time < $refresh_delay)       {
			# Vérifier que c'est bien nécessaire
			if( ($ENV{'HTTP_IF_MODIFIED_SINCE'}) && (str2time($ENV{'HTTP_IF_MODIFIED_SINCE'}) >= $fn_time) ) {
				print "Status: 304\n";
			}
			else	{
				print "Last-Modified: " . time2str($fn_time) ."\n"; 
#				print "Expires: " . time2str($fn_time+$refresh_delay) . "\n";
				print "Content-type: text/xml\n\n";
			
				open CACHE, "<$cache_file";
               	while(<CACHE>)  {
               		print $_;
               	}
			}
            exit;
        }
}

# Ecriture headers CGI
print "Last-Modified: " . time2str() ."\n"; 
#print "Expires: " . time2str(time()+$refresh_delay) . "\n";
print "Content-type: text/xml\n\n";


#############################################
# Load the main page, parse posts
#############################################
	# Idiots don't implement etag or last-modified, so we have to eat up 70k every time
my $main_page = get('http://www.guillermito2.net/index-tmp.html')
	or die "Unable to fetch page.\n\t";

decode_entities($main_page);

my @stories;
while ($main_page =~ m#<div class=bloc_texte>(.+?)</div>\s*<br>#gis) {
	my $story = {};
	my ($link, $title, $body);
	my $valid = 0;

	my $whole = $1;
	
	# catches for 'article'
	if ($whole =~ m#<B>(.{1,200})</B><br>\s*<FONT color=\#999999 size=-2><B>.{5,30} - \[<a href="(.{1,200})">Lien</a>\]</B></FONT>\s*((?:<BLOCKQUOTE>\s*)?(?:<p>)?(.+?)(?:</p>)?\s*(?:</BLOCKQUOTE>\s*))#gis)	{
		($link, $title, $body) = ($2, $1, $3);
		$link = "http://www.guillermito2.net/" . $link;

#		print "Found $title\n";
		$valid = 1;
	}
	# catches for 'blog'
	elsif($whole =~ m#<BLOCKQUOTE>\s*<p>(.+?)\s*(?:</p>\s*)?<\/BLOCKQUOTE>#gis)	{	
		$body = $1;
		$link = "http://www.guillermito2.net/index-tmp.html";
		$title = ''; 
		$valid = 1;
	}	

	if($valid)	
	{
		# Images
		$body =~ s#src="(?!(http|ftp|https)://)#src="http://www.guillermito2.net/#gi;
		# Links
		$body =~ s#href="(?!(mailto|http|ftp|https):)#href="http://www.guillermito2.net/#gi;
		
		# Remove HTML stuff
		my $summary = $body;
		$summary =~ s/<(?:[^>'"]*|(['"]).*?\1)*>//gs;
		if(length($summary) > 400 )	{
			my $i=300;
			while( ($i < length($summary)) && (substr($summary, $i, 1) !~ /\s/) ) {	
				$i++;
			}
			$summary = substr($summary,0,$i) . '...';
		}
		
		if(! $title)	{
			$title = $summary;

			# Reduces even more the summary in case no title
			if(length($title) > 100 )	{
				my $i=70;
				while( ($i < length($title)) && (substr($title, $i, 1) !~ /\s/) ) {	
					$i++;
				}
				$title= substr($title,0,$i) . '...';
			}
		}
		
		$story->{link} = $link;
		$story->{summary} = HTML::Entities::encode_entities($summary, '<>"\'&');
		$story->{title} = HTML::Entities::encode_entities($title, '<>"\'&');  
		$story->{description} = HTML::Entities::encode_entities($body, '<>"\'&');  

		push(@stories, $story);
	}
}


#############################################
# Output RSS file
#############################################
#my $zero_dir = dirname $0;
open RSS, ">$cache_file"	or die "Unable to write file $cache_file : $!";
print RSS  <<"EOF";
<?xml version="1.0" encoding="iso-8859-1"?>
<rdf:RDF
  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  xmlns="http://purl.org/rss/1.0/"
  xmlns:content="http://purl.org/rss/1.0/modules/content/"
>
    <channel rdf:about="http://trolleur.net/cgi-bin/guillermito.pl">
           <title>Guillermito Zone</title>
           <link>http://www.guillermito2.net</link>
           <description>Guillermito Zone</description>
           <language>en-us</language>
		   
		   <items>
		   	<rdf:Seq>
EOF

foreach my $story (@stories)	{
	print RSS << "EOF";
				<rdf:li rdf:resource="$story->{link}"/>
EOF
}

print RSS <<"EOF";
			 </rdf:Seq>
			</items>
		</channel>
EOF

foreach my $story (@stories)
{
	print RSS <<"EOF";
        <item rdf:about="$story->{link}">
                <title>$story->{title}</title>
                <link>$story->{link}</link>
                <description>$story->{summary}</description>
				<content:encoded>$story->{description}</content:encoded>
			</item>
EOF
}
print RSS "</rdf:RDF>\n";
close RSS;

# finalement, ressort le cache
open CACHE, "<$cache_file";
	while(<CACHE>)	{
		print $_;
	}


