[Bio] / FigKernelScripts / get_evaluation_check.pl Repository:
ViewVC logotype

View of /FigKernelScripts/get_evaluation_check.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Mon Aug 13 15:58:38 2007 UTC (12 years, 3 months ago) by bartels
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
Changes since 1.2: +108 -6 lines
added option -a for showing pegs that do not match via blast

#!/usr/bin/env /home/bartels/FIGdisk/env/cee/bin/perl

use Data::Dumper;
use Carp;
use FIG_Config;
use FIGV;
use Getopt::Std;
use strict;
use warnings;

$ENV{'BLASTMAT'} = "/home/bartels/FIGdisk/BLASTMAT";
$ENV{'FIG_HOME'} = "/home/bartels/FIGdisk";

getopts ( 'aj:r:s:hcnt:z' );

our ( $opt_a, $opt_j, $opt_r, $opt_s, $opt_h, $opt_c, $opt_n, $opt_t, $opt_z );

if ( defined( $opt_h ) ) {
    &usage;
    exit( 0 );
}

if ( !defined( $opt_j ) || !defined( $opt_r ) || !defined( $opt_s ) || !defined( $opt_t ) ) {
    &usage;
    exit( 0 );
}

# Edit login in shebang too!
my $job = $opt_j;
my $rast_org_id = $opt_r;
my $seed_org_id = $opt_s;
my $base_dir = $opt_t;

# static
my $result_dir = $base_dir . "/results";
my $input_orgs_file = $base_dir. "/orgs";
my $input_file = $result_dir . "/$rast_org_id-$seed_org_id";
my $result_file = $result_dir . "/comparison";
my $org_dir_1 = "/vol/48-hour/Jobs.prod.2007-0601/$job/rp/";
my $rast_org_dir = "/vol/48-hour/Jobs.prod.2007-0601/$job/rp/$rast_org_id";

if ( $opt_n ) {
  $org_dir_1 = "/vol/48-hour/Jobs.dev/$job/rp/";
  $rast_org_dir = "/vol/48-hour/Jobs.dev/$job/rp/$rast_org_id";
}

my $org_dir_2 = "/home/bartels/FIGdisk/FIG/Data/Organisms/";

open ( ORGFILE, ">$input_orgs_file" ) or die $!;
print ORGFILE "$rast_org_id\t$seed_org_id";
close ORGFILE;


my $fig = new FIGV($rast_org_dir);
my $fig2 = new FIG;


my $features_rast = $fig->all_features_detailed_fast( $rast_org_id );
my $features_seed = $fig2->all_features_detailed_fast( $seed_org_id );
my $frhash;
my $fshash;
foreach my $f ( @$features_rast ) {
  $frhash->{ $f->[0] } = 0;
}
foreach my $f ( @$features_seed ) {
  $fshash->{ $f->[0] } = 0;
}


print $FIG_Config::bin."/make_peg_maps $org_dir_1 $org_dir_2 $result_dir < $input_orgs_file\n";
system( $FIG_Config::bin."/make_peg_maps $org_dir_1 $org_dir_2 $result_dir < $input_orgs_file" );

open(IN, $input_file) or die "could not open $input_file\n";
open(OUT, ">$result_file") or die "could not open $result_file\n";



while (<IN>) {
  $_ =~ /(.+)\t(.+)/;
  my $id1 = $1;
  $frhash->{ $id1 } = 1;

  my $id2 = $2;
  chomp($id2);

  $fshash->{ $id2 } = 1;


  my $f1 = $fig->function_of($id1, "master");
  my $f2 = $fig2->function_of($id2, "master");

  print OUT $id1;

  if ( !$opt_c ) {
    print OUT "\t" .$f1;
  }

  my @subsystems_id1 = $fig->subsystems_for_peg( $id1 );
  my $unisubsystems_id1 = uniquify( \@subsystems_id1 );
  @subsystems_id1 = @$unisubsystems_id1;

  if ( $opt_z ) {
    my $in = 0;
    print OUT "\t";
    foreach my $ss ( @subsystems_id1 ) {
      if ( $in ) {
	print OUT '~';
      }
      print OUT $ss;
      $in = 1;
    }
    if ( !$in ) {
      print OUT "0";
    }
  }
  else {
    print OUT "\t".scalar( @subsystems_id1 );
  }

  print OUT "\t" . $id2;

  if ( !$opt_c ) {
    print OUT "\t" . $f2;
  }

  my @subsystems_id2 = $fig2->subsystems_for_peg( $id2 );
  my $unisubsystems_id2 = uniquify( \@subsystems_id2 );
  @subsystems_id2 = @$unisubsystems_id2;
  if ( $opt_z ) {
    my $in = 0;
    print OUT "\t";
    foreach my $ss ( @subsystems_id2 ) {
      if ( $in ) {
	print OUT '~';
      }
      print OUT $ss;
      $in = 1;
    }
    if ( !$in ) {
      print OUT "0";
    }
  }
  else {
    print OUT "\t".scalar( @subsystems_id2 );
  }

  my $issame = "different";
  if ( $f1 eq $f2 ) {
    $issame = "same";
  }
  else {
    my ( $sfs ) = SameFunc::same_func_why( $f1, $f2 );
    if ( $sfs ) {
      $issame = "samefunc";
    }
  }
  print OUT "\t$issame\n";
}

if ( $opt_a ) {
  my $counter = 0;
  foreach my $ffhash ( ( $frhash, $fshash ) ) {
    $counter++;
    foreach my $f ( keys %$ffhash ) {
      next if ( $ffhash->{ $f } == 1 );
      print OUT "$f\t";
      
      my $f1 = $fig->function_of( $f, "master" );
      
      if ( !$opt_c ) {
	print OUT $f1."\t";
      }
      
      my @subsystems_f = $fig->subsystems_for_peg( $f );
      my $unisubsystems_f = uniquify( \@subsystems_f );
      @subsystems_f = @$unisubsystems_f;
      
      if ( $opt_z ) {
	my $in = 0;
	foreach my $ss ( @subsystems_f ) {
	  if ( $in ) {
	    print OUT '~';
	  }
	  print OUT $ss;
	  $in = 1;
	}
	if ( !$in ) {
	  print OUT "0";
	}
      }
      else {
	print OUT scalar( @subsystems_f );
      }
      if ( $counter == 1 ) {
	print OUT "\t-\t-\t-\tnot_found_in_seed\n";
      }
      if ( $counter == 2 ) {
	print OUT "\t-\t-\t-\tnot_found_in_rast\n";
      }
    }
  }
    
#   foreach my $f ( keys %$fshash ) {
    
#     next if ( $fshash->{ $f } == 1 );
    
#     print OUT "$f\t";
    
#     my $f1 = $fig->function_of( $f, "master" );
    
#     if ( !$opt_c ) {
#       print OUT $f1."\t";
#     }
    
#     my @subsystems_f = $fig->subsystems_for_peg( $f );
#     my $unisubsystems_f = uniquify( \@subsystems_f );
#     @subsystems_f = @$unisubsystems_f;
    
#     if ( $opt_z ) {
#       my $in = 0;
#       foreach my $ss ( @subsystems_f ) {
# 	if ( $in ) {
# 	  print OUT '~';
# 	}
# 	print OUT $ss;
# 	$in = 1;
#       }
#       if ( !$in ) {
# 	print OUT "0";
#       }
#     }
#     else {
#       print OUT scalar( @subsystems_f );
#     }
#     print OUT "\t-\t-\tnot_found_in_rast\n";
#   }
}

close OUT;
close IN;

sub usage {

  print STDERR "get_evaluation_check.pl\n";
  print STDERR "\t-r <Genome id on the RAST server>\n";
  print STDERR "\t-j <Job id on the RAST server>\n";
  print STDERR "\t-s <Genome id in the SEED>\n";
  print STDERR "\t-t <output target directory>\n";
  print STDERR "\t-z <prints out subsystems names divided by ~ instead of the number of subsystems>\n";
  print STDERR "\t-c flag if you want a 5-column table not including the functions\n";
  print STDERR "\t-n flag if you want to use the development version of the RAST server\n";
  print STDERR "\t-a flag if you want to see the ones that do not match in the list\n\n";

}

sub uniquify {
  my ( $arr ) = @_;

  my $hash;
  foreach my $e ( @$arr ) {
    $hash->{ $e->[0] } = 1;
  }

  my @back = keys %$hash;
  return \@back;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3