[Bio] / FigKernelScripts / find_new_annotations.pl Repository:
ViewVC logotype

View of /FigKernelScripts/find_new_annotations.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (download) (as text) (annotate)
Sun Feb 24 23:41:33 2013 UTC (6 years, 8 months ago) by parrello
Branch: MAIN
CVS Tags: rast_rel_2014_0912, rast_rel_2014_0729, HEAD
Changes since 1.5: +42 -41 lines
Fixed to recover from genomes with missing annotation files.

use File::stat;
use Data::Dumper;
use FileHandle;
use strict;
use FIG;
use POSIX;
use Date::Parse;
use Fcntl ':seek';
use Getopt::Long;
use Time::Local;

my $write_date;
my $verbose;
my $rc = GetOptions("verbose" => \$verbose,
		    "write-date:s" => \$write_date);


if (!$rc || @ARGV != 1) {
    die "Usage: find_new_annotations [-v] [--write-date timestamp-file] starting-time\n";
}

my $start_time;
my $arg = shift;

#
# If it's numeric and translates to a "recent" date (post-1989)
# assume it's a seconds-since-the-epoch timestamp.
#
if ($arg =~ /^\d+$/ && $arg > 600000000)
{
    $start_time = $arg;
}
elsif ($arg eq "today" || $arg eq "yesterday") {
    my $now = time;
    if ($arg eq "yesterday") {
    	$now -= 60*60*24;
    }
    my @lpart = localtime($now);
    $lpart[0] = 0;
    $lpart[1] = 0;
    $lpart[2] = 0;
    #just use Yr/Mon/Day
    $start_time = timelocal(@lpart);
} else {
    $start_time = str2time($arg);
}

if (!$start_time)
{
    die "Cannot parse starting time $arg\n";
}
my $start_str = strftime("%Y-%m-%d %H:%M:%S", localtime $start_time);

my $end_time = time;

if ($write_date)
{
    if (open(FH, ">", $write_date))
    {
	print FH "$end_time\n";
    }
    else
    {
	warn "Could not write $write_date: $!";
    }
}

#print STDERR $start_time, "\n";
#run this on anno-3, sourcing ~fig/FIGdisk/config/... to get anno-3 annotations
# one argument is required, a date and time. All annotations newer than this are processed.
# yyyy:mm:dd:hh:mm:ss


my $fig = new FIG;

my %skip_name;

$skip_name{$_} = 1 foreach qw(annotation_repair rapid_propagation rapid_propogation);

my @genomes = $fig->genomes();

my %genomes;
$genomes{$_} = 1 foreach @genomes;

my $res = $fig->db_handle->SQL(qq(SELECT fid, dateof, who, fileno, seek
				  FROM annotation_seeks
				  WHERE dateof >= ?
				  ORDER BY fileno, seek), undef, $start_time);
my %seen;
my %gfile;
my %work;
for my $ent (@$res)
{
    my($fid, $date, $who, $fileno, $seek) = @$ent;
#    print STDERR "$fid $date $who $fileno $seek\n";
    next if $seen{$fileno};
    $seen{$fileno}++;
    my $genome = &FIG::genome_of($fid);
    next unless $genomes{$genome};
    if (defined($gfile{$genome}) && $gfile{$genome} != $fileno)
    {
	die "Bad genome file mapping $genome $fileno\n";
    }
    $work{$genome} = [$genome, $fig->N2file($fileno), $seek];
}


for my $genome (@genomes)
{
    my $work = $work{$genome};
    next unless ref($work);
    my($genome, $file, $seek) = @$work;
    print STDERR "Work: $genome $file $seek\n" if $verbose;
    my $anno_file = "$FIG_Config::organisms/$genome/annotations";
    my $s = stat($anno_file);
    if (!$s)
    {
	warn "Cannot stat $anno_file: $!\n";
    } else {
        if ($s->mtime < $start_time)
        {
    	my $mod_str = strftime("%Y-%m-%d %H:%M:%S", localtime $s->mtime);
    	warn "$anno_file was modified $mod_str < $start_str\n" if $verbose;
    	next;
        }

        my $fh = new FileHandle("<$anno_file");

        if (!$fh)
        {
            die "Cannot open annotations for $genome: $!\n";
        }

        seek($fh, $seek, SEEK_SET);

        local $/ = "//\n";

        my ($fid, $anno_time, $who, $anno_text, $anno_who, $fn);

        while (my $ann = <$fh>)
        {
            chomp $ann;

    	if ((($fid, $anno_time, $who, $anno_text) =
    		($ann =~ /^(fig\|\d+\.\d+\.peg\.\d+)\n(\d+)\n(\S+)\n(.*)/s))) {

    	    if ($skip_name{$who})
    	    {
    		next;
    	    }
    	    if ($anno_time >= $start_time && $anno_time < $end_time) {
    		print $ann, "//\n";;
    		#print scalar($fig->function_of($fid)), "\n";
    		#print "$fid, $who, $anno_text, $anno_who, $fn\n";
                }
    	}
        }
        close($fh);
    }
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3