#!/usr/local/bin/perl

# vim: tw=78: sw=4: ts=4: et: 

# $Id: check-xrefs.pl,v 1.2 2006/11/01 21:37:57 kclark Exp $

use strict;
use warnings;
use English qw( -no_match_vars );
use File::Basename;
use Getopt::Long;
use Gramene::CDBI::Literature;
use Gramene::CDBI::Markers;
use Gramene::CDBI::Ontology;
use Gramene::Utils qw( commify );
use LWP::Simple;
use Pod::Usage;
use Readonly;

Readonly my $EMPTY_STRING => q{};
Readonly my %BAD_CHECK => (
    'Gramene Literature' => qr/the reference id you entered is not valid/,
);
Readonly my $VERSION 
    => sprintf '%d.%02d', qq$Revision: 1.2 $ =~ /(\d+)\.(\d+)/;

my ( $help, $man_page, $show_version, $types );
GetOptions(
    't|type:s' => \$types,
    'help'     => \$help,
    'man'      => \$man_page,
    'version'  => \$show_version,
) or pod2usage(2);

if ( $help || $man_page ) {
    pod2usage({
        -exitval => 0,
        -verbose => $man_page ? 2 : 1
    });
}; 

if ( $show_version ) {
    my $prog = basename( $PROGRAM_NAME );
    print "$prog v$VERSION\n";
    exit 0;
}

my %check = map { $_, 1 } split /,/, $types;;
my $xrefs = Gramene::CDBI::Markers::Xref->retrieve_all;

my ( $i, $num_errors ) = ( 0, 0 );
while ( my $x = $xrefs->next ) {
    my $val        = $x->xref_value;
    my $table_name = $x->table_name;
    my $record_id  = $x->record_id;
    my $xref_type  = $x->xref_type;
    my $url_tmpl   = $xref_type->url_template;

    if ( %check ) {
        next unless $check{ $xref_type->xref_type };
    }

    $i++;

    if ( $url_tmpl =~ m{^/} ) {
        $url_tmpl  = 'http://dev.gramene.org' . $url_tmpl;
    }
    my $url      = sprintf( $url_tmpl, $val);

    my $is_bad   = 0;
    if ( $x->xref_type->xref_type eq 'Gramene Literature' ) {
        my $lit = Gramene::CDBI::Literature::Reference->retrieve( $val );
        $is_bad = $lit ? 0 : 1;
    }
    elsif ( $x->xref_type->xref_type =~ /ontology/ ) {
        my ($ont_term) = Gramene::CDBI::Ontology::Term->search(
            { term_accession => $val }
        );
        $is_bad = $ont_term ? 0 : 1;
    }
    else {
        my $content  = get( $url );
        my $bad_re   = $BAD_CHECK{ $x->xref_type->xref_type };

        if ( !$content ) {
            $is_bad = 1;
        }
        elsif ( $bad_re && $content =~ $bad_re  ) {
            $is_bad = 1;
        }
    }

    if ( $is_bad ) {
        print "$table_name $record_id $url\n";
        $num_errors++;
    }
}

printf "Done, checked %s xref%s, found %s error%s.\n",
    commify($i),
    $i          == 1 ? $EMPTY_STRING : 's',
    commify($num_errors),
    $num_errors == 1 ? $EMPTY_STRING : 's'
;

__END__

# ----------------------------------------------------

=pod

=head1 NAME

check-xrefs.pl - check the validity of xrefs in the markers db

=head1 VERSION

This documentation refers to check-xrefs.pl version $Revision: 1.2 $

=head1 SYNOPSIS

  check-xrefs.pl 

Options:

  -t|--type=Foo[,Bar]  Check only xrefs of defined types
  --help               Show brief help and exit
  --man                Show full documentation
  --version            Show version and exit

=head1 DESCRIPTION

Goes through each xref and checks if it works.

=head1 SEE ALSO

Gramene::CDBI::Markers.

=head1 AUTHOR

Ken Youens-Clark E<lt>kclark@cshl.eduE<gt>.

=head1 COPYRIGHT

Copyright (c) 2006 Cold Spring Harbor Laboratory

This library is free software;  you can redistribute it and/or modify 
it under the same terms as Perl itself.

=cut
