#!/usr/local/bin/perl

use strict;
use warnings;
$| = 1;

use lib '/usr/local/gramene/lib/perl/';
#use lib '/usr/local/lib/perl5/site_perl/5.8.6';

use Readonly;

use Gramene::Config;
use WWW::Mechanize::Timed;
use HTML::Lint;
use Mail::Sendmail;
use Getopt::Std;

my %parents         = ();
my %failed          = ();
my %previously_seen = ();

my $cfile   =       Gramene::Config->new;

my %options = ('n' => 0, 'v' => 0, 's' => 1, 'w' =>
$cfile->get('link_checker')->{'site'});
getopts('nvsw:', \%options);
my ($noisy, $verbose, $send_email, $site) = @options{qw(n v s w)};

Readonly my $to     =>  $cfile->get('link_checker')->{'email_to'};
Readonly my $from   =>  $cfile->get('link_checker')->{'email_from'};
my $message         =   undef;
my (@slow, @invalid, @failed);

my @links = grab_links($site);
foreach my $link (@links) {
    next if will_fail($link) || ! traceable_url($link);
    will_grab_link($link);
    push @links, grab_links($link);
}

if (@slow || @invalid || @failed) {
    @slow    = qw('No slow links found') unless @slow;
    @failed  = qw('No failed found') unless @slow;
	$message .= "Slow links:\n===========\n" . join('', @slow) . "\n";
	$message .= "Failed links:\n=============\n" . join('', @failed) . "\n";
	$message .= "Validation errors:\n================\n" . join('', @invalid) . "\n" if @invalid;
}
else {
	$message = "No problems found";
}

#if we want to send the message AND we actually have one to send AND we have somewhere to send it, then send it...
if ($send_email && $message && $to && $from) {
    my %mail = (
        'To'        => $to,
        'From'      => $from,
        'Subject'   => "Gramene broken links on $site at " . localtime,
        'Message'   => $message,
    );

    sendmail(%mail) or die $Mail::Sendmail::error;
}

sub grab_links {
    my $link    = shift or die "Cannot grab links w/o url";

    my $mech = WWW::Mechanize::Timed->new();
    
    $mech->get($link);
    
    my $time = $mech->client_response_receive_time;
    
    link_is_slow($link, $time) if is_slow($time);
    
    if ($mech->status =~ /^2/) {
    
        if ( $mech->is_html ) {
            my $lint = HTML::Lint->new();
            $lint->parse( $mech->content );
            
            validation_errors($link, $lint->errors) if $lint->errors;
            
        }
    
        return () unless traceable_url($link);
            
        my @links = map {$_->URI->abs} $mech->links;
        foreach my $child (@links) {
            push @{$parents{$child}}, $link;
        }
        return @links;
    }
    else {
        $failed{$link} = $mech->status;
        failed_link($link);
        return ();
    }
}

sub will_fail {
    my $link = shift;

    if ($failed{$link}) {
        failed_link($link);
        return 1;
    }
    else {
        return 0;
    }
    
}

# Returns a boolean value whether we think a link is slow or not.

sub is_slow {
    my $time = shift || 0;
    
    return $time > 2;
}

# These are the URLs that we assume we want to follow.

sub traceable_url {
    my $link = shift;

    return 0 if
        $link !~ m!$site!
        || $link =~ /^javascript:/
        || $link !~ /http/
        || $previously_seen{$link}++
    ;
    
    return 1;
}

#the various IO functions
sub will_grab_link {
    my $link = shift;
    
    my $output = "Grabbing $link\n\n";
    
    print $output if $noisy;
    
    #$message .= $output if $verbose;
}

sub link_is_slow {

    my $link        = shift;
    my $time        = shift;
    
    my $output      = "Slow link: $link - took $time seconds\n\n";
    
	push @slow, $output;
    
    print $output if $noisy;
}

sub validation_errors {

    my $page    = shift;
    my @errors  = @_;
    
    my $output  = "Validation errors on page $page\n";
    
	foreach my $error (@errors) {
        $output .= "\t" . $error->as_string . "\n";
    }

    $output .= "\n";
    
    if ($verbose) {
    	push @invalid, $output;
    }
    
    print $output if $noisy && $verbose;
}

sub failed_link {
    my $link    = shift;
    
    my $output  = "FAILED $link: " . $failed{$link} . "\n";
    $output     .= "linked from:\n";
    foreach my $parent (@{$parents{$link}}) {
        $output .= "\t$parent\n";
    }

    $output .= "\n";
    
	push @failed, $output;

    print $output if $noisy;

}

