[Bio] / FigKernelScripts / find_new_annotations.pl Repository:
ViewVC logotype

View of /FigKernelScripts/find_new_annotations.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (download) (as text) (annotate)
Thu Oct 20 14:54:49 2011 UTC (8 years, 5 months ago) by disz
Branch: MAIN
CVS Tags: mgrast_release_3_1_2, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_10262011
Changes since 1.4: +4 -1 lines
added yesterday option

use File::stat;
use Data::Dumper;
use FileHandle;
use strict;
use FIG;
use POSIX;
use Date::Parse;
use Fcntl ':seek';
use Getopt::Long;
use Time::Local;

my $write_date;
my $verbose;
my $rc = GetOptions("verbose" => \$verbose,
		    "write-date:s" => \$write_date);


if (!$rc || @ARGV != 1) {
    die "Usage: find_new_annotations [-v] [--write-date timestamp-file] starting-time\n";
}

my $start_time;
my $arg = shift;

#
# If it's numeric and translates to a "recent" date (post-1989)
# assume it's a seconds-since-the-epoch timestamp.
#
if ($arg =~ /^\d+$/ && $arg > 600000000)
{
    $start_time = $arg;
}
elsif ($arg eq "today" || $arg eq "yesterday") {
    my $now = time;
    if ($arg eq "yesterday") {
    	$now -= 60*60*24;
    }
    my @lpart = localtime($now);
    $lpart[0] = 0;
    $lpart[1] = 0;
    $lpart[2] = 0;
    #just use Yr/Mon/Day
    $start_time = timelocal(@lpart);
} else {
    $start_time = str2time($arg);
}

if (!$start_time)
{
    die "Cannot parse starting time $arg\n";
}
my $start_str = strftime("%Y-%m-%d %H:%M:%S", localtime $start_time);

my $end_time = time;

if ($write_date)
{
    if (open(FH, ">", $write_date))
    {
	print FH "$end_time\n";
    }
    else
    {
	warn "Could not write $write_date: $!";
    }
}

#print STDERR $start_time, "\n";
#run this on anno-3, sourcing ~fig/FIGdisk/config/... to get anno-3 annotations
# one argument is required, a date and time. All annotations newer than this are processed.
# yyyy:mm:dd:hh:mm:ss


my $fig = new FIG;

my %skip_name;

$skip_name{$_} = 1 foreach qw(annotation_repair rapid_propagation rapid_propogation);

my @genomes = $fig->genomes();

my %genomes;
$genomes{$_} = 1 foreach @genomes;

my $res = $fig->db_handle->SQL(qq(SELECT fid, dateof, who, fileno, seek
				  FROM annotation_seeks
				  WHERE dateof >= ?
				  ORDER BY fileno, seek), undef, $start_time);
my %seen;
my %gfile;
my %work;
for my $ent (@$res)
{
    my($fid, $date, $who, $fileno, $seek) = @$ent;
#    print STDERR "$fid $date $who $fileno $seek\n";
    next if $seen{$fileno};
    $seen{$fileno}++;
    my $genome = &FIG::genome_of($fid);
    next unless $genomes{$genome};
    if (defined($gfile{$genome}) && $gfile{$genome} != $fileno)
    {
	die "Bad genome file mapping $genome $fileno\n";
    }
    $work{$genome} = [$genome, $fig->N2file($fileno), $seek];
}


for my $genome (@genomes)
{
    my $work = $work{$genome};
    next unless ref($work);
    my($genome, $file, $seek) = @$work;
    print STDERR "Work: $genome $file $seek\n" if $verbose;
    my $anno_file = "$FIG_Config::organisms/$genome/annotations";
    my $s = stat($anno_file);
    if (!$s)
    {
	die "Cannot stat $anno_file: $!";
    }
    if ($s->mtime < $start_time)
    {
	my $mod_str = strftime("%Y-%m-%d %H:%M:%S", localtime $s->mtime);
	warn "$anno_file was modified $mod_str < $start_str\n" if $verbose;
	next;
    }

    my $fh = new FileHandle("<$anno_file");

    if (!$fh)
    {
        die "Cannot open annotations for $genome: $!\n";
    }

    seek($fh, $seek, SEEK_SET);

    local $/ = "//\n";

    my ($fid, $anno_time, $who, $anno_text, $anno_who, $fn);

    while (my $ann = <$fh>)
    {
        chomp $ann;

	if ((($fid, $anno_time, $who, $anno_text) =
		($ann =~ /^(fig\|\d+\.\d+\.peg\.\d+)\n(\d+)\n(\S+)\n(.*)/s))) {

	    if ($skip_name{$who})
	    {
		next;
	    }
	    if ($anno_time >= $start_time && $anno_time < $end_time) { 
		print $ann, "//\n";;
		#print scalar($fig->function_of($fid)), "\n";
		#print "$fid, $who, $anno_text, $anno_who, $fn\n";
            } 
	}
    }
    close($fh);
}        


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3