[Bio] / FigKernelScripts / get_neighbors_and_corr_to_ref.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/get_neighbors_and_corr_to_ref.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.3, Mon Dec 28 21:34:23 2009 UTC revision 1.4, Thu Jan 7 23:38:01 2010 UTC
# Line 13  Line 13 
13      || die $usage;      || die $usage;
14    
15  my @fasta = &gjoseqlib::read_fasta("$gdir/Features/peg/fasta");  my @fasta = &gjoseqlib::read_fasta("$gdir/Features/peg/fasta");
16  my %id2seqH = map { $_->[0] => $_->[2] } @fasta;  my %id2seqH = map { ($_->[2] && (length($_->[2]) > 30)) ? ($_->[0] => $_->[2]) : () } @fasta;
17    
18  &SeedUtils::verify_dir("$gdir/CorrToReferenceGenomes");  &SeedUtils::verify_dir("$gdir/CorrToReferenceGenomes");
19  my @poss_pegs = &prioritize_pegs_used_to_find_neighbors($gdir);  my @poss_pegs = &prioritize_pegs_used_to_find_neighbors($gdir);
# Line 23  Line 23 
23  my $tuple;  my $tuple;
24  while (($best < 500) && ($tuple = shift @poss_pegs))  while (($best < 500) && ($tuple = shift @poss_pegs))
25  {  {
26        my($role,$peg) = @$tuple;
27        if ($id2seqH{$peg} && (length($id2seqH{$peg}) > 30))
28        {
29      &compute_hits_and_set_best($tuple,\%id2seqH,\%counts,\$best);      &compute_hits_and_set_best($tuple,\%id2seqH,\%counts,\$best);
30  }  }
31    }
32  if ($best == 0) { die "$gdir describes a genome without enough RAST-called genes to identify neighbors" }  if ($best == 0) { die "$gdir describes a genome without enough RAST-called genes to identify neighbors" }
33  my @reference = sort { $counts{$b} <=> $counts{$a} } keys(%counts);  my @reference = sort { $counts{$b} <=> $counts{$a} } keys(%counts);
34  if (@reference > 30) { $#reference = 29 }  if (@reference > 30) { $#reference = 29 }
# Line 53  Line 57 
57      my %by_func;      my %by_func;
58      (-s "$gdir/assigned_functions") || die "$gdir contains no assigned_functions";      (-s "$gdir/assigned_functions") || die "$gdir contains no assigned_functions";
59    
60        my %uniqH;
61      foreach my $line (`cat $gdir/assigned_functions`)      foreach my $line (`cat $gdir/assigned_functions`)
62      {      {
63          if ($line =~ /^(fig\|\d+\.\d+\.peg\.\d+)\t(\S[^\#]+\S)/)          if ($line =~ /^(fig\|\d+\.\d+\.peg\.\d+)\t(\S[^\#]+\S)/)
64          {          {
65              my($peg,$func) = ($1,$2);              $uniqH{$1} = $2;
             $func =~ s/\s*\#$//;  
             push(@{$by_func{$2}},$1);  
66          }          }
67      }      }
68    
69        foreach my $peg (keys(%uniqH))
70        {
71            my $func = $uniqH{$peg};
72            $func =~ s/\s*\#.*$//;
73            push(@{$by_func{$func}},$peg);
74        }
75    
76      my @synthetases        = map {[$_,$by_func{$_}->[0]] } grep { @{$by_func{$_}} == 1 } grep { $_ =~ /tRNA synthetase/ }   keys(%by_func);      my @synthetases        = map {[$_,$by_func{$_}->[0]] } grep { @{$by_func{$_}} == 1 } grep { $_ =~ /tRNA synthetase/ }   keys(%by_func);
77      my @ribosomal_proteins = map {[$_,$by_func{$_}->[0]] } grep { @{$by_func{$_}} == 1 } grep { $_ =~ /ribosomal protein/ } keys(%by_func);      my @ribosomal_proteins = map {[$_,$by_func{$_}->[0]] } grep { @{$by_func{$_}} == 1 } grep { $_ =~ /ribosomal protein/ } keys(%by_func);
78      my @ok_pegs            = map {[$_,$by_func{$_}->[0]] } grep { @{$by_func{$_}} == 1 }                                    keys(%by_func);      my @ok_pegs            = map {[$_,$by_func{$_}->[0]] } grep { @{$by_func{$_}} == 1 }                                    keys(%by_func);
# Line 82  Line 93 
93      my($tuple,$id2seqH,$counts,$bestP) = @_;      my($tuple,$id2seqH,$counts,$bestP) = @_;
94    
95      my($role,$peg) = @$tuple;      my($role,$peg) = @$tuple;
96    
97      my $figfam_pegs = &figfam_pegs_for_role($role);      my $figfam_pegs = &figfam_pegs_for_role($role);
98    
99      my @sims        = &ProtSims::blastP([[$peg,'',$id2seqH->{$peg}]],$figfam_pegs,10);      my @sims        = &ProtSims::blastP([[$peg,'',$id2seqH->{$peg}]],$figfam_pegs,10);
# Line 94  Line 106 
106      }      }
107  }  }
108    
109  #### MUST update ###  
110  sub figfam_pegs_for_role {  sub figfam_pegs_for_role {
111      my($role) = @_;      my($role) = @_;
112    
# Line 112  Line 124 
124                 map { (($_ =~ /^(\S+)\t(\S+)/) && $figfams{$1}) ? $2 : () }                 map { (($_ =~ /^(\S+)\t(\S+)/) && $figfams{$1}) ? $2 : () }
125                `cat $FIG_Config::FigfamsData/families.2c`;                `cat $FIG_Config::FigfamsData/families.2c`;
126      my $idsH = $sapO->ids_to_sequences(-ids => \@ids, -protein => 1);      my $idsH = $sapO->ids_to_sequences(-ids => \@ids, -protein => 1);
127      return [map { my $seq = $idsH->{$_}; $seq ? [$_,'',$seq] : () } keys(%$idsH)];      return [map { my $seq = $idsH->{$_}; ($seq && (length($seq) > 30)) ? [$_,'',$seq] : () } keys(%$idsH)];
128  }  }

Legend:
Removed from v.1.3  
changed lines
  Added in v.1.4

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3