[Bio] / FigKernelScripts / get_id_mapping_for_list_of_IDs.pl Repository:
ViewVC logotype

View of /FigKernelScripts/get_id_mapping_for_list_of_IDs.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Wed Jul 23 21:20:25 2008 UTC (11 years, 3 months ago) by wilke
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
Draft version for id mapping

#use FIG;
use strict;
use warnings;

use vars qw( $opt_d $opt_f $opt_h $opt_o);
use Getopt::Std;

use AnnoClearinghouse;
use FIG;

getopts('f:o:');

my $file   = $opt_f || "";
my $output = $opt_o || "fig";

my $usage = "get_id_mapping_for_list_of_IDs -f FILE -o [all|fig]\n";

unless (-f $file){
  print STDERR "No file $file!\n";
  print $usage;
  #exit;
}

my $fig  = new FIG;
#my $aclh = AnnoClearinghouse->new( "/vol/clearinghouse/v12" , "/vol/clearinghouse/contrib" , 1);

my $ext_ids        = {} ;
my $with_type_info = 0;

 open (FILE , $file) or die "Can't open $file\n";

 while (my $line = <FILE>){
   chomp $line;
   my ($id,@rest) = split "\t" , $line;
  # print $id , "\n";
   if ($id){
     $ext_ids->{$id} = { seq => 0,
 			org => 0,
 		      };
   }
 }
 close(FILE);

#$ext_ids->{"fig|345073.6.peg.2186 "} = { seq => 0,
#					 org => 0,
#				       };

foreach my $ext_id (keys %$ext_ids){
   my @id_list = $fig->get_corresponding_ids($ext_id, $with_type_info);
   my @ids_seq;

   foreach my $line ( $fig->mapped_prot_ids( $ext_id ) ){
   
     push @ids_seq , $line->[0];
   }
  
  #  print "ACH:\n";
#    foreach my $line (  $aclh->lookup_id( $ext_id) ){
#      foreach my $entry (@$line){
#        print scalar @$entry , "\n";
#        print join "\t" , @$entry , "\n" if (scalar @$entry);
#      }
#    } 
 
  

   $ext_ids->{ $ext_id }->{ org } = \@id_list;
   $ext_ids->{ $ext_id }->{ seq } = \@ids_seq;
}
 

if ($output eq "all"){
  print "Query ID\tID Correspondence by Organism\tID Correspondence by Sequence\n";
  foreach my $ext_id (keys %$ext_ids){
    my @fig_org = ();
    print $ext_id , "\t";
    print join ";",  @{ $ext_ids->{ $ext_id }->{ org } } , "\t";
    print join ";",  @{ $ext_ids->{ $ext_id }->{ seq } } , "\n";
  }
}
else{
  print_figs( $ext_ids );
}


sub print_figs{
  my ($ext_ids) = @_;
  print "Query ID\tID Correspondence\tSequence Correspondence\n";
  foreach my $ext_id (keys %$ext_ids){
    print $ext_id , "\t";
    my @org;
    my @seq;
    
    map { push @org , $1 if ($_=~/(fig\|[^;\s]+)/ );  }  @{ $ext_ids->{ $ext_id }->{ org } }; 
    map { push @seq , $1 if ($_=~/(fig\|[^;\s]+)/ );  }  @{ $ext_ids->{ $ext_id }->{ seq } };
    
    if ( @{ $ext_ids->{ $ext_id }->{ org } } ){
      
      foreach my $id (  @{ $ext_ids->{ $ext_id }->{ org } } ){
	print $id , "\t"  if ($id =~ /fig/);
      }
    }
    else{
      print "\t";
    }
    
      print join ";",  @seq , "\n";
 

    
  }
  
}

# sub get_synonyms_for_complete_genome{
#        my ($genome) =@_;

#        $genome=~s/fig\|//g;  # strip any prefix fig| part of the ID as the downstream methods choke on them

#        # init clearinghouse (this really should be done in config not in code!)
     

#        # the version number needs to be split from the genome ID
#        $genome =~ s/\.\d+//g;

#        print "retrieving from ACH for $genome \n" if defined $verbose;

#        foreach my $entry ($aclh->get_synonyms_for_prefix($genome) ){
#                #my ($fid,$ref)=(@line=split (/:/, $entry))[4,3];
#                my @line=split (/:/, $entry);
#                my ($fid,$ref)=@line[4,3];

#                next if ($ref eq $fid ) ; # exclude reference to $fid itself
#                my $list=$Synonyms{$fid};
#                $list.=",".$ref;
#                $Synonyms{$fid}=$list;

# #       print "ID: $fid:: ".$Synonyms{$fid}."\n" if defined $verbose;
#        }
# }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3