[Bio] / FigKernelScripts / recent_annotations_by_genome.pl Repository:
ViewVC logotype

View of /FigKernelScripts/recent_annotations_by_genome.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Thu Feb 23 19:07:00 2012 UTC (7 years, 8 months ago) by disz
Branch: MAIN
CVS Tags: rast_rel_2014_0729, mgrast_version_3_2, rast_rel_2014_0912, HEAD
create data for the Daily SEED

use File::stat;
use Data::Dumper;
use FileHandle;
use strict;
use FIG;
use Time::Local;
use POSIX;
use Date::Parse;
use Getopt::Long;

my $write_date;
my $verbose;
my $rc = GetOptions("verbose" => \$verbose,
		    "write-date:s" => \$write_date);


if (!$rc || @ARGV != 2) {
    die "Usage: new_sort_by_function seed starting-time\n";
}

my %pegs;
my @annos;
my $seed = shift;

my $start_time;
my $arg = shift;

#
# If it's numeric and translates to a "recent" date (post-1989)
# assume it's a seconds-since-the-epoch timestamp.
#
if ($arg =~ /^\d+$/ && $arg > 600000000)
{
    $start_time = $arg;
}
elsif ($arg eq "today" || $arg eq "yesterday") {
    my $now = time;
    if ($arg eq "yesterday") {
    	$now -= 60*60*24;
    }


    my @lpart = localtime($now);
    $lpart[0] = 0;
    $lpart[1] = 0;
    $lpart[2] = 0;
    #just use Yr/Mon/Day
    $start_time = timelocal(@lpart);
} else {
    $start_time = str2time($arg);
}

if (!$start_time)
{
    die "Cannot parse starting time $arg\n";
}
my $start_str = strftime("%Y-%m-%d %H:%M:%S", localtime $start_time);

my $end_time = time;

if ($write_date)
{
    if (open(FH, ">", $write_date))
    {
	print FH "$end_time\n";
    }
    else
    {
	warn "Could not write $write_date: $!";
    }
}

#print STDERR $start_time, , $start_str, "\n";
#run this on anno-3, sourcing ~fig/FIGdisk/config/... to get anno-3 annotations
# one argument is required, a date and time. All annotations newer than this are processed.
# yyyy:mm:dd:hh:mm:ss


my $fig = new FIG;

my %skip_name;

$skip_name{$_} = 1 foreach qw(annotation_repair rapid_propagation rapid_propogation);

make_genome_stats($fig,\%skip_name);
 
my $res = $fig->db_handle->SQL(qq(SELECT prot, was_assigned_function, assigned_function, made_by
				  FROM function_trail
				  WHERE mod_time >= ?
				  ), undef, $start_str);

for my $ent (@$res) {
    my ($fid, $old_fn, $fn, $who) = @$ent;
    $old_fn = $old_fn ? $old_fn : "None";
    
    #print STDERR "$fid, $old_fn, $fn, $who\n";
    if (! $skip_name{$who}) { 
	my $genome = $fig->genome_of($fid);
	push (@annos, [$fid, $genome, $fn, $old_fn]);
	push (@{$pegs{$old_fn}{$fn}}, $fid);
    }
}	
my %count;

for my $anno (@annos) {
	my ($peg, $genome, $new, $old) = @$anno;
	$count{$old}{$new}{$genome}++;
}

my $i = 0;
for my $old (sort (keys %count)){ 
	for my $new (keys %{$count{$old}}) {
#		open (TMP, ">/home/disz/FIGdisk/FIG/Tmp/DAILY/$i");
		open (TMP, ">/vol/public-pseed/FIGdisk/FIG/Tmp/DAILY/$i");
		print TMP "<html><body>\n";
		print TMP "OLD: $old<br>NEW: $new<br>\n";
		for my $fid (@{$pegs{$old}{$new}}) {
			my $link = make_link($fid, $seed);
			print TMP "$link<br>\n";
	       }	
		print TMP "</body></html>\n";
	       close(TMP);
#		print "<dt>\n";
		my $g = $count{$old}{$new};
		my @c = values(%$g);
		my $c = scalar(@c);
		$old = $old ? $old : "None";
		my $msg = $c > 1 ? "In $c genomes" : "In $c genome";
		my $page_link = make_page_link($i, $msg); 
		print "<center>$page_link</center>\n<B><I>OLD:</I></B>&nbsp;$old<br>\n";
		print "<B><I>NEW:</I></B>&nbsp;$new<br><br>\n";
	       $i++;
	}
}
#print "</dl>\n";

sub make_page_link {
	my ($page_num, $title) = @_;
	return "<a href='http://pubseed.theseed.org/FIG-Tmp/DAILY/$page_num' target='_blank'>$title</a>";
}
sub make_link {
	my ($id, $seed) = @_;
	return "<a href='http://$seed.theseed.org/linkin.cgi?id=$id' target='_blank'>$id</a>";
}
sub make_g_link {
	my ($id, $seed, $name) = @_;
	return "<a href='http://$seed.theseed.org/linkin.cgi?genome=$id' target='_blank'>$name</a>";
}

sub make_genome_stats {
    my ($fig, $skip_name) = @_;

    my %g_count;
    my %g_list;
    my %whodunnit;
    my $now = time;
    $now -= 60*60*24*7;

    my @lpart = localtime($now);
    $lpart[0] = 0;
    $lpart[1] = 0;
    $lpart[2] = 0;
    #just use Yr/Mon/Day
    my $start_time = timelocal(@lpart);
    my $start_str = strftime("%Y-%m-%d %H:%M:%S", localtime $start_time);

    my $res = $fig->db_handle->SQL(qq(SELECT prot, was_assigned_function, assigned_function, made_by
	  FROM function_trail
	  WHERE mod_time >= ?
	  ), undef, $start_str);

    for my $ent (@$res) {
	    my ($fid, $old_fn, $fn, $who) = @$ent;
	    if (! $skip_name->{$who}) { 
		my $genome = $fig->genome_of($fid);
		$g_count{$genome}++;
		push @{$g_list{$genome}}, $fid;
    		$whodunnit{$genome}{$who}++;
	    }
	}	
	my $i;
	open (TMP, ">/vol/public-pseed/SharedData/DailyStatistics/pubseed_genome_annos");
	print TMP "<html><body>\n";
	for my $g (sort {$g_count{$b} <=> $g_count{$a}} keys(%g_count)) {
		my $name = $fig->genus_species($g);
		my $g_link = make_g_link($g, $seed, $name);
		#open (TMP1, ">/home/disz/FIGdisk/FIG/Tmp/DAILY/g$i");
                open (TMP1, ">/vol/public-pseed/FIGdisk/FIG/Tmp/DAILY/g$i");
		print TMP1 "<html><body>\n";
		print TMP1 "<h4>Annotations This Week In $g_link</h4>\n";
		my $who = join(", ", keys (%{$whodunnit{$g}}));
		print TMP1 "<h5>Annotators: $who</h5>";
		for my $fid (@{$g_list{$g}}) {
			my $link = make_link($fid, $seed);
			print TMP1 "$link<br>\n";
		}
		print TMP1 "</body></html>\n";
		close TMP1;

                
		my $msg = "($g_count{$g})"; 
		my $linkid = "g".$i;
		my $page_link = make_page_link($linkid, $msg);
		print TMP "$name $page_link<br>\n";
		if ($i++ > 50) {last;}
		if ($g_count{$g} < 2) {last;};
	}
	print TMP "</body></html>\n";
	close(TMP);
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3