[Bio] / FigKernelScripts / apply_annotations.pl Repository:
ViewVC logotype

View of /FigKernelScripts/apply_annotations.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (download) (as text) (annotate)
Wed Apr 16 14:27:35 2014 UTC (5 years, 7 months ago) by olson
Branch: MAIN
CVS Tags: rast_rel_2014_0912, rast_rel_2014_0729, HEAD
Changes since 1.6: +1 -1 lines
Fix in the case of an empty skiplist.

use POSIX;
use File::stat;
use Data::Dumper;
use FileHandle;
use strict;
use FIG;
use Getopt::Long;
use YAML::Any;

my $dry_run = 0;
my $log_file;
my $action_file;
my @skip_genome;
my $rc = GetOptions("dry-run" => \$dry_run,
		    "log=s" => \$log_file,
		    "action-log=s" => \$action_file,
		    "skip-genome=s" => \@skip_genome,
		   );

my $skip_re = join("|", map { my $a = quotemeta($_); "(?:fig\\|$a\\.[a-z])" } @skip_genome);

if (!$rc || @ARGV != 3)
{
    die "Usage: $0 [--dry-run] source-seed anno-user annotation-file\n";
}

my $src_seed = shift;
my $anno_user = shift;
my $anno_file = shift;

my $anno_fh;
open($anno_fh, "<", $anno_file) or die "Cannot open $anno_file: $!";

my(%stat_annotators, %stat_genomes, %stat_pegs, $stat_assignments, $stat_keeps, $stat_annotations, );

my $action_fh;
if ($action_file)
{
    open($action_fh, ">", $action_file) or die "Cannot open action log file $action_file: $!";
    $action_fh->autoflush(1);
}

my $log_fh;
if ($log_file)
{
    open($log_fh, ">", $log_file) or die "Cannot open log file $log_file: $!";
    $log_fh->autoflush(1);
}

#be sure to source the seed to apply these to
#
my $fig = new FIG;
$fig or die "Cannot create fig object\n";

local $/ = "//\n";

my $now = time;
my $nowstr = strftime("%Y-%m-%d %H:%M:%S", localtime $now);

my $last_peg;
my $last_time;
my @peg_annos;
my $did_assignment = 0;
while (my $ann = <$anno_fh>)
{
    my $offset = tell($anno_fh);
    
    chomp $ann;
    #print STDERR $ann;

    if (my($this_fid, $this_time) = $ann =~ /^(fig\|\S+)\n(\d+)/)
    {
	if ($this_fid ne $last_peg || (defined($last_time) && $this_time - $last_time > 60))
	{
	    handle_annos($fig, $last_peg, $last_time, $did_assignment, \@peg_annos) if @peg_annos;
	    $did_assignment = 0;
	    @peg_annos = ();
	    $last_peg = $this_fid;
	    $last_time = $this_time;
	}
	my ($fid, $anno_time, $who, $anno_text, $anno_who, $fn);
	
	if ((($fid, $anno_time, $who, $anno_text, undef, $anno_who, $fn) =
	     ($ann =~ /^(fig\|\d+\.\d+\.peg\.\d+)\n(\d+)\n(\S+)\n(Set(\s+(FIG|master))?\s+function\s+to[ \t]*\n(.*))/s)) )
	{
	    $fn =~ s/\n$//;
	    my $fig_fun = scalar($fig->function_of($fid));
	    if ($fn ne $fig_fun) {
		push(@peg_annos, [$offset, 'assign_function', $fid, $who, $anno_text, $anno_time, $anno_who, $fn, $fig_fun]);
		$did_assignment++;
	    } else {
		push(@peg_annos, [$offset, 'no_assign_function', $fid, $who, $anno_text, $anno_time, $anno_who, $fn, $fig_fun]);
	    }
	} else {
	    if ((($fid, $anno_time, $who, $anno_text) = 
		 ($ann =~ /^(fig\|\d+\.\d+\.peg\.\d+)\n(\d+)\n(\S+)\n(.*)/s))) {
		push(@peg_annos, [$offset, 'annotation', $fid, $who, $anno_text, $anno_time, $anno_who]);
	    }
	}	
    }
}
if (@peg_annos)
{
    handle_annos($fig, $last_peg, $last_time, $did_assignment,\@peg_annos);
}

sub handle_annos
{
    my($fig, $fid, $xtime, $did_assign, $annos) = @_;

    my $genome = FIG::genome_of($fid);

    my $xtime_str = strftime("%Y-%m-%d %H:%M:%S", localtime $xtime);
    my $offset = $annos->[0]->[0];

    $stat_pegs{$fid}++;
    $stat_genomes{$genome}++;

    my $intro_txt;
    my $n = @$annos;
    my $skip = 0;

    if (@skip_genome && $fid =~ /$skip_re/)
    {
	$skip = 1;
	$intro_txt = "Skipping $n annotations for $fid from $src_seed (skipping assignments due to skiplist @skip_genome) at $nowstr";
    }
    elsif ($did_assign)
    {
	$intro_txt = "Migrating $n annotations for $fid from $src_seed (including function assignment) at $nowstr";
    }
    else
    {
	$intro_txt = "Migrating $n annotations for $fid from $src_seed (no function assignment necessary) at $nowstr";
    }
    
    my_add_annotation($fig, $anno_file, $offset, $fid, $anno_user, $intro_txt, $xtime);

    return if $skip;

    print $log_fh join("\t", "Anno", $fid, $xtime_str, $anno_user, $intro_txt), "\n" if $log_fh;
    # print STDERR "Annotate: $intro_txt\n";
	
    for my $anno (@$annos)
    {
	my($offset, $what, $fid, $who, $anno_text, $anno_time, $anno_who, $new_fun, $old_fun) = @$anno;
	$stat_annotators{$who}++;
	my $timestr = strftime("%Y-%m-%d %H:%M:%S", localtime $anno_time);
	if ($what eq 'assign_function')
	{
	    my_add_annotation($fig, $anno_file, $offset, $fid, $who, "Migrate assignment from $old_fun to $new_fun", $anno_time);
	    my_assign_function($fig, $anno_file, $offset, $fid, $who, $new_fun);

	    $stat_assignments++;
	    # print STDERR "Assignment $fid, $timestr, $who, $anno_text, $anno_who, $new_fun\n";
	    print $log_fh join("\t", "Assign", $fid, $timestr, $who, $old_fun, $new_fun), "\n" if $log_fh;
	    #print STDERR "Assignment $fid, $timestr, OLD:$old_fun, NEW:$new_fun\n";
	}
	elsif ($what eq 'no_assign_function')
	{
	    my_add_annotation($fig, $anno_file, $offset, $fid, $who, "Skipping assignment to $old_fun", $anno_time);

	    $stat_keeps++;
	    #print STDERR "Not assigning $fid, $timestr, $who, $old_fun\n";
	    print $log_fh join("\t", "Keep", $fid, $timestr, $who, $old_fun), "\n" if $log_fh;
	}
	elsif ($what eq 'annotation')
	{
	    my_add_annotation($fig, $anno_file, $offset, $fid, $who, $anno_text, $anno_time);

	    $stat_annotations++;
	    $anno_text =~ s/[\r\n]/ /g;
	    print $log_fh join("\t", "Anno", $fid, $timestr, $who, $anno_text), "\n" if $log_fh;
	    #print STDERR "Annotation $fid, $timestr, $who:\n\t$anno_text\n";
	}
    }
}

my $ng = keys %stat_genomes;
my $np = keys %stat_pegs;
my $nwho = keys %stat_annotators;
print "$ng\tgenomes\n";
print "$np\tpegs\n";
print "$nwho\tannotators\n";
print "\t$_\n" foreach sort keys %stat_annotators;
print "$stat_assignments\tassignments\n";
print "$stat_keeps\tkeeps\n";
print "$stat_annotations\tannotations\n";

sub my_add_annotation
{
    my($fig, $anno_file, $offset, $fid, $who, $anno_text, $anno_time) = @_;
    print $action_fh Dump(['add_annotation', $anno_file, $offset, $fid, $who, $anno_text, $anno_time]) if $action_fh;
    if (!$dry_run)
    {
	eval {
	    $fig->add_annotation($fid, $who, $anno_text, $anno_time);
	};
	if ($@)
	{
	    die "Error adding annotation $fid $who $anno_text $anno_time at $anno_file:$offset: $@";
	}
    }
}

sub my_assign_function
{
    my($fig, $anno_file, $offset, $fid, $who, $new_fun) = @_;
    print $action_fh Dump(['assign_function', $anno_file, $offset, $fid, $who, $new_fun]) if $action_fh;
    if (!$dry_run)
    {
	eval {
	    $fig->assign_function($fid, $who, $new_fun);
	};
	if ($@)
	{
	    die "Error assigning function $fid $who $new_fun at $anno_file:$offset: $@";
	}
    }
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3