[Bio] / FigKernelScripts / get_neighbors_and_corr_to_ref.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/get_neighbors_and_corr_to_ref.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.5, Thu Jan 7 23:55:55 2010 UTC revision 1.6, Fri Feb 19 19:38:21 2010 UTC
# Line 1  Line 1 
1  ########################################################################  ########################################################################
2    
3    # This is a SAS Component
4    
5  use SeedHTML;  use SeedHTML;
6  use strict;  use strict;
7  use SeedEnv;  use SeedEnv;
8  use ProtSims;  use ProtSims;
9  use gjoseqlib;  use gjoseqlib;
10    use Data::Dumper;
11    
12  my $usage = "usage: get_neighbors_and_corr_to_ref GenomeDir";  my $usage = "usage: get_neighbors_and_corr_to_ref GenomeDir";
13  my $gdir;  my $gdir;
14    
15    $| = 1;
16    
17    my $sapO = SAPserver->new;
18    
19  ($gdir   =  shift @ARGV)  ($gdir   =  shift @ARGV)
20      || die $usage;      || die $usage;
21  ($gdir =~ /(\d+\.\d+)$/) || die "Invalid Genome Directory: $gdir";  ($gdir =~ /(\d+\.\d+)$/) || die "Invalid Genome Directory: $gdir";
# Line 18  Line 25 
25  my %id2seqH = map { ($_->[2] && (length($_->[2]) > 30)) ? ($_->[0] => $_->[2]) : () } @fasta;  my %id2seqH = map { ($_->[2] && (length($_->[2]) > 30)) ? ($_->[0] => $_->[2]) : () } @fasta;
26    
27  &SeedUtils::verify_dir("$gdir/CorrToReferenceGenomes");  &SeedUtils::verify_dir("$gdir/CorrToReferenceGenomes");
28    print "Finding neighbors\n";
29  my @poss_pegs = &prioritize_pegs_used_to_find_neighbors($gdir);  my @poss_pegs = &prioritize_pegs_used_to_find_neighbors($gdir);
30    print "found " . scalar(@poss_pegs) . " poss_pegs\n";
31  my %counts;  my %counts;
32  my $best  = 0;  my $best  = 0;
33  my $tuple;  my $tuple;
# Line 35  Line 43 
43  my @reference = sort { $counts{$b} <=> $counts{$a} } keys(%counts);  my @reference = sort { $counts{$b} <=> $counts{$a} } keys(%counts);
44  if (@reference > 30) { $#reference = 29 }  if (@reference > 30) { $#reference = 29 }
45    
 my $sapO = SAPserver->new;  
46  my $genomesH  = $sapO->all_genomes(-complete => 1);  my $genomesH  = $sapO->all_genomes(-complete => 1);
47  open(CLOSE,">$gdir/closest.genomes") || die "could not open closest.genomes";  open(CLOSE,">$gdir/closest.genomes") || die "could not open closest.genomes";
48    print "Generating correspondences for these genomes:\n";
49    print "\t$_\n" for @reference;
50  foreach my $g2 (@reference)  foreach my $g2 (@reference)
51  {  {
52      if ($g2 ne $gdir_id)      if ($g2 ne $gdir_id)
53      {      {
54            print "Generating correspondences for $g2...\n";
55          &generate_correspondence_table($g2,$gdir);          &generate_correspondence_table($g2,$gdir);
56            print "Generating correspondences for $g2...done\n";
57          print CLOSE join("\t",($g2,$genomesH->{$g2})),"\n";          print CLOSE join("\t",($g2,$genomesH->{$g2})),"\n";
58      }      }
59  }  }
# Line 101  Line 112 
112      my($tuple,$id2seqH,$counts,$bestP) = @_;      my($tuple,$id2seqH,$counts,$bestP) = @_;
113    
114      my($role,$peg) = @$tuple;      my($role,$peg) = @$tuple;
115        print "Get figfam pegs for $role\n";
116      my $figfam_pegs = &figfam_pegs_for_role($role);      my $figfam_pegs = &figfam_pegs_for_role($role);
117    
118        print "Compute sims\n";
119      my @sims        = &ProtSims::blastP([[$peg,'',$id2seqH->{$peg}]],$figfam_pegs,10);      my @sims        = &ProtSims::blastP([[$peg,'',$id2seqH->{$peg}]],$figfam_pegs,10);
120        print "Computed " . scalar(@sims) . " sims\n";
121      my $i;      my $i;
122      for ($i=0; (($i < @sims) && ($i < 50)); $i++)      for ($i=0; (($i < @sims) && ($i < 50)); $i++)
123      {      {
# Line 114  Line 127 
127      }      }
128  }  }
129    
   
130  sub figfam_pegs_for_role {  sub figfam_pegs_for_role {
131      my($role) = @_;      my($role) = @_;
132    
133      my %figfams;      my %figfams;
134      foreach $_ (`cat $FIG_Config::FigfamsData/family.functions`)  
135      {      my $res = $sapO->all_figfams(-roles => $role);
136          if ((index($_,$role) >= 0) && ($_ =~ /^(FIG\d{6})/))      my @pegs;
137        for my $ff (keys %$res)
138          {          {
139              $figfams{$1} = 1;          my $fids = $sapO->figfam_fids(-id => $ff);
140            push(@pegs, @$fids);
141          }          }
142      }  
143      my $sapO = SAPserver->new;      my $idsH = $sapO->ids_to_sequences(-ids => \@pegs, -protein => 1);
144      my $genomesH  = $sapO->all_genomes(-complete => 1);  
145      my @ids =  grep { $genomesH->{&SeedUtils::genome_of($_)} }      return [map { my $seq = $idsH->{$_}; $seq ? [$_,'',$seq] : () } keys(%$idsH)];
                map { (($_ =~ /^(\S+)\t(\S+)/) && $figfams{$1}) ? $2 : () }  
               `cat $FIG_Config::FigfamsData/families.2c`;  
     my $idsH = $sapO->ids_to_sequences(-ids => \@ids, -protein => 1);  
     return [map { my $seq = $idsH->{$_}; ($seq && (length($seq) > 30)) ? [$_,'',$seq] : () } keys(%$idsH)];  
146  }  }

Legend:
Removed from v.1.5  
changed lines
  Added in v.1.6

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3