[Bio] / FigKernelPackages / FIG.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/FIG.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.747, Fri Jan 30 12:42:15 2009 UTC revision 1.748, Mon Feb 2 12:50:20 2009 UTC
# Line 3025  Line 3025 
3025      #get lineage      #get lineage
3026      my $lineage = "";      my $lineage = "";
3027      if ($content =~ /\&lt\;Lineage\&gt\;cellular organisms; (.*)\&lt\;\/Lineage\&gt\;/) {      if ($content =~ /\&lt\;Lineage\&gt\;cellular organisms; (.*)\&lt\;\/Lineage\&gt\;/) {
3028          $lineage = &decode_html_chars($1);      $lineage = $1;
3029          $lineage =~ s/^\s*//o;      $overview->{ lineage } = &decode_html_chars($lineage);
         $lineage =~ s/\s*$//o;  
         $overview->{ lineage } = $lineage;  
3030      }      }
3031    
3032      # set genus      # set genus
3033      my $genus = "";      my $genus = "";
3034      if ($content =~ /\&lt\;ScientificName\&gt\;(.*)\&lt\;\/ScientificName\&gt\;(\W*)\&lt\;Rank\&gt\;genus\&lt\;\/Rank\&gt\;/) {      if ($content =~ /\&lt\;ScientificName\&gt\;(.*)\&lt\;\/ScientificName\&gt\;(\W*)\&lt\;Rank\&gt\;genus\&lt\;\/Rank\&gt\;/) {
3035          $genus = &decode_html_chars($1);          $genus = &decode_html_chars($1);
         $genus =~ s/^\s*//o;  
         $genus =~ s/^Candidatus\s+//o;  
         $genus =~ s/\s*$//o;  
3036          $overview->{ genus } = $genus ;          $overview->{ genus } = $genus ;
3037      }      }
3038    
# Line 3048  Line 3043 
3043          $species =~ s/$genus\s//;          $species =~ s/$genus\s//;
3044          #$species =~ s/ii$/i/;          #$species =~ s/ii$/i/;
3045          #$species =~ s/ae$/a/;          #$species =~ s/ae$/a/;
3046          $species =~ s/^\s*//o;  
         $species =~ s/\s*$//o;  
3047          $overview->{ species } = $species ;          $overview->{ species } = $species ;
3048      }      }
3049    
# Line 3057  Line 3051 
3051      my $strain = "";      my $strain = "";
3052      if ($content =~ /\&lt\;ScientificName\&gt\;(.*)\&lt\;\/ScientificName\&gt\;(\W*)\&lt\;OtherNames\&gt\;/) {      if ($content =~ /\&lt\;ScientificName\&gt\;(.*)\&lt\;\/ScientificName\&gt\;(\W*)\&lt\;OtherNames\&gt\;/) {
3053          $strain = &decode_html_chars($1);          $strain = &decode_html_chars($1);
3054          $strain =~ s/^s*//o;  
3055          $strain =~ s/^Candidatus\s+//o;      $strain =~ s/$genus\s//;
3056          $strain =~ s/^$genus\s+//;      $strain =~ s/$species\s//;
3057          $strain =~ s/^$species\s+//;  
         $strain =~ s/\s+/ /go;  
         $strain =~ s/\s*$//o;  
3058          $overview->{ strain } = $strain;          $overview->{ strain } = $strain;
3059      }      }
3060    
# Line 3070  Line 3062 
3062      my $scientific_name = "";      my $scientific_name = "";
3063      my $names = {};      my $names = {};
3064      foreach my $line (split ("\n", $content) )  {      foreach my $line (split ("\n", $content) )  {
3065          if ( $line =~ /\&lt\;Synonym\&gt\;(.*)\&lt\;\/Synonym\&gt\;/) {      $names->{ &decode_html_chars($1) } = 1 if ( $line =~ /\&lt\;Synonym\&gt\;(.*)\&lt\;\/Synonym\&gt\;/);
3066              $names->{ &decode_html_chars($1) } = 1;      $names->{ &decode_html_chars($1) } = 1 if ( $line =~ /\&lt\;EquivalentName\&gt\;(.*)\&lt\;\/EquivalentName\&gt\;/);
3067          }      $scientific_name = &decode_html_chars($1) if ($content =~ /\&lt\;ScientificName\&gt\;(.*)\&lt\;\/ScientificName\&gt\;/ and !$scientific_name );
   
         if ( $line =~ /\&lt\;EquivalentName\&gt\;(.*)\&lt\;\/EquivalentName\&gt\;/) {  
             $names->{ &decode_html_chars($1) } = 1;  
         }  
   
         if ($content =~ /\&lt\;ScientificName\&gt\;(.*)\&lt\;\/ScientificName\&gt\;/ and !$scientific_name ) {  
             $scientific_name = &decode_html_chars($1);  
         }  
3068      }      }
3069    
3070      $overview->{ synonyms } = $names;      $overview->{ synonyms } = $names;
3071      $overview->{ scientific_name } = $scientific_name;      $overview->{ scientific_name } = $scientific_name;
3072    
3073    
3074    
3075      return $overview;      return $overview;
3076  }  }
3077    
# Line 4825  Line 4811 
4811          }          }
4812      }      }
4813    
4814      $ans =~ s/^\s*//o;      $ans =~ s/^\s+//o;
4815      $ans =~ s/^Candidatus\s+//o;      $ans =~ s/^Candidatus\s*//o;
     $ans =~ s/\s+/ /og;  
     $ans =~ s/\s*$//o;  
4816    
4817      return $ans;      return $ans;
4818  }  }
# Line 8122  Line 8106 
8106    
8107                              #...Build BLAST nucleotide database for extracted DNA region,                              #...Build BLAST nucleotide database for extracted DNA region,
8108                              #   and TBLASTN $peg2 against the DNA:                              #   and TBLASTN $peg2 against the DNA:
8109                              &FIG::run("$FIG_Config::ext_bin/formatdb -i $tmp_dna -pF");                              &FIG::run("formatdb -i $tmp_dna -pF");
8110                              open(BLAST,"$FIG_Config::ext_bin/blastall -i $tmp_prot -d $tmp_dna -p tblastn -FF -e 1.0e-20 |")                              open(BLAST,"blastall -i $tmp_prot -d $tmp_dna -p tblastn -FF -e 1.0e-20 |")
8111                                  || die "could not blast";                                  || die "could not blast";
8112    
8113                              #...Parse the TBLASTN output; find and sort HSPs by left boundary:                              #...Parse the TBLASTN output; find and sort HSPs by left boundary:
# Line 8470  Line 8454 
8454    
8455  sub net_bbhs {  sub net_bbhs {
8456      my ($self, $peg, $cutoff) = @_;      my ($self, $peg, $cutoff) = @_;
8457      my $retVal = FIGRules::BBHData($peg, $cutoff);      return FIGRules::BBHData($peg, $cutoff);
     return @$retVal;  
8458  }  }
8459    
8460    
# Line 9662  Line 9645 
9645    
9646          # create a new subsystem object #          # create a new subsystem object #
9647          my $subsystem = new Subsystem( $ssn, $fig, 0 );          my $subsystem = new Subsystem( $ssn, $fig, 0 );
9648            if ( !defined( $subsystem ) ) {
9649                print STDERR "Could not get Subsystem Object for $ssn\n";
9650                next;
9651            }
9652    
9653    
9654          # print "Change role in $ssn\n";          # print "Change role in $ssn\n";
9655          my ( $succ, $error ) = $subsystem->change_role( $role, $newname );          my ( $succ, $error ) = $subsystem->change_role( $role, $newname );
# Line 16354  Line 16342 
16342    
16343      $ans = $taxonomy->{$genome};      $ans = $taxonomy->{$genome};
16344    
16345      if (!defined($ans)) {      if (!defined($ans))
16346          if (keys(%$taxonomy) == 0) {      {
16347            if (keys(%$taxonomy) == 0)
16348            {
16349              my $rdbH = $self->db_handle;              my $rdbH = $self->db_handle;
16350              my $relational_db_response = $rdbH->SQL("SELECT genome,taxonomy  FROM genome");              my $relational_db_response = $rdbH->SQL("SELECT genome,taxonomy  FROM genome");
16351              foreach my $pair (@$relational_db_response) {              my $pair;
16352                  my ($db_genome, $db_taxonomy) = @$pair;              foreach $pair (@$relational_db_response)
16353                  $db_taxonomy =~ s/^\s*//o;              {
16354                  $db_taxonomy =~ s/Candidatus\s*//og;                  $taxonomy->{$pair->[0]} = $pair->[1];
                 $db_taxonomy =~ s/\s+/ /og;  
                 $db_taxonomy =~ s/\s*$//o;  
                 $taxonomy->{$db_genome} = $db_taxonomy;  
16355              }              }
16356              $ans = $taxonomy->{$genome};              $ans = $taxonomy->{$genome};
16357          }          }
16358      }      }
16359        if (!$ans)
16360      if (!$ans) {      {
16361          warn "No taxonomy found for $genome\n";          warn "No taxonomy found for $genome\n";
16362      }      }
16363    
16364      $ans =~ s/^\s*//o;      $ans =~ s/^\s*//o;
     $ans =~ s/Candidatus\s*//og;  
     $ans =~ s/\s+/ /o;  
     $ans =~ s/\s*$//o;  
16365    
16366      return $ans;      return $ans;
16367  }  }
# Line 16447  Line 16431 
16431      my($self) = @_;      my($self) = @_;
16432      my $taxonomy = $self->cached('_taxonomy');      my $taxonomy = $self->cached('_taxonomy');
16433    
16434      if (keys(%$taxonomy) == 0) {      if (keys(%$taxonomy) == 0)
16435        {
16436          my $rdbH = $self->db_handle;          my $rdbH = $self->db_handle;
16437          my $relational_db_response = $rdbH->SQL("SELECT genome,taxonomy  FROM genome");          my $relational_db_response = $rdbH->SQL("SELECT genome,taxonomy  FROM genome");
16438          foreach my $pair (@$relational_db_response) {          my $pair;
16439              my ($db_genome, $db_taxonomy) = @$pair;          foreach $pair (@$relational_db_response)
16440              $db_taxonomy =~ s/^\s*//o;          {
16441              $db_taxonomy =~ s/Candidatus\s*//og;              $taxonomy->{$pair->[0]} = $pair->[1];
             $db_taxonomy =~ s/\s+/ /og;  
             $db_taxonomy =~ s/\s*$//o;  
             $taxonomy->{$db_genome} = $db_taxonomy;  
16442          }          }
16443      }      }
16444      return $taxonomy;      return $taxonomy;
# Line 16475  Line 16457 
16457  sub is_bacterial :Scalar {  sub is_bacterial :Scalar {
16458      my($self,$genome) = @_;      my($self,$genome) = @_;
16459    
16460      return ($self->taxonomy_of($genome) =~ /^Bacteria/o) ? 1 : 0;      return ($self->taxonomy_of($genome) =~ /^Bacteria/) ? 1 : 0;
16461  }  }
16462    
16463    
# Line 16490  Line 16472 
16472  sub is_archaeal :Scalar {  sub is_archaeal :Scalar {
16473      my($self,$genome) = @_;      my($self,$genome) = @_;
16474    
16475      return ($self->taxonomy_of($genome) =~ /^Archaea/o) ? 1 : 0;      return ($self->taxonomy_of($genome) =~ /^Archaea/) ? 1 : 0;
16476  }  }
16477    
16478    
# Line 16505  Line 16487 
16487  sub is_prokaryotic :Scalar {  sub is_prokaryotic :Scalar {
16488      my($self,$genome) = @_;      my($self,$genome) = @_;
16489    
16490      return ($self->taxonomy_of($genome) =~ /^(Archaea|Bacteria)/o) ? 1 : 0;      return ($self->taxonomy_of($genome) =~ /^(Archaea|Bacteria)/) ? 1 : 0;
16491  }  }
16492    
16493    
# Line 16520  Line 16502 
16502  sub is_eukaryotic :Scalar {  sub is_eukaryotic :Scalar {
16503      my($self,$genome) = @_;      my($self,$genome) = @_;
16504    
16505      return ($self->taxonomy_of($genome) =~ /^Eukaryota/o) ? 1 : 0;      return ($self->taxonomy_of($genome) =~ /^Eukaryota/) ? 1 : 0;
16506  }  }
16507    
16508    
# Line 16535  Line 16517 
16517  sub is_viral :Scalar {  sub is_viral :Scalar {
16518      my($self,$genome) = @_;      my($self,$genome) = @_;
16519    
16520      return ($self->taxonomy_of($genome) =~ /^Vir/o) ? 1 : 0;      return ($self->taxonomy_of($genome) =~ /^Vir/) ? 1 : 0;
16521  }  }
16522    
16523    
# Line 16549  Line 16531 
16531    
16532  sub is_environmental :Scalar {  sub is_environmental :Scalar {
16533      my($self,$genome) = @_;      my($self,$genome) = @_;
16534      return ($self->taxonomy_of($genome) =~ /environmental samples/io) ? 1 : 0;      return ($self->taxonomy_of($genome) =~ /environmental samples/) ? 1 : 0;
16535  }  }
16536    
16537    
# Line 16611  Line 16593 
16593          my @tax1 = split(/\s*;\s*/,$self->taxonomy_of($genome1));          my @tax1 = split(/\s*;\s*/,$self->taxonomy_of($genome1));
16594          my @tax2 = split(/\s*;\s*/,$self->taxonomy_of($genome2));          my @tax2 = split(/\s*;\s*/,$self->taxonomy_of($genome2));
16595    
16596          $d = FIGRules::CrudeDistanceFormula(\@tax1, \@tax2);          $d = 1;
16597          for ($i=0, $v=0.5; ($i < @tax1) && ($i < @tax2) && ($tax1[$i] eq $tax2[$i]); $i++, $v = $v/2)          for ($i=0, $v=0.5; ($i < @tax1) && ($i < @tax2) && ($tax1[$i] eq $tax2[$i]); $i++, $v = $v/2)
16598          {          {
16599              $d -= $v;              $d -= $v;
# Line 17197  Line 17179 
17179      return 1;      return 1;
17180  }  }
17181    
 sub is_cluster_based_subsystem {  
     my ( $self, $sub ) = @_;  
   
     my $class = $self->subsystem_classification($sub);  
     return ($class && ($class->[0] =~ /cluster/i));  
 }  
   
17182  sub is_exchangable_subsystem :Scalar {  sub is_exchangable_subsystem :Scalar {
17183      shift if UNIVERSAL::isa($_[0],__PACKAGE__);      shift if UNIVERSAL::isa($_[0],__PACKAGE__);
17184      my $ssa = (@_ == 1) ? $_[0] : $_[1];      my $ssa = (@_ == 1) ? $_[0] : $_[1];
# Line 18084  Line 18059 
18059  sub get_subsystem :Scalar  sub get_subsystem :Scalar
18060  {  {
18061      my($self, $subsystem, $force_load) = @_;      my($self, $subsystem, $force_load) = @_;
     my $ent;  
18062      my $sub;      my $sub;
18063    
     my $ss_cache_size = $FIG_Config::subsystem_cache_size;  
     $ss_cache_size = 30 unless $ss_cache_size =~ /^\d+$/;  
   
   
     my $now = time;  
18064      $subsystem =~ s/[ \/]/_/g;      $subsystem =~ s/[ \/]/_/g;
18065      my $cache = $self->cached('_Subsystems');      my $cache = $self->cached('_Subsystems');
18066        if ($force_load || !($sub = $cache->{$subsystem}))
     if ($force_load || !($ent = $cache->{$subsystem}))  
18067      {      {
18068          $sub = new Subsystem($subsystem, $self);          $sub = new Subsystem($subsystem, $self);
18069          if ($sub)          $cache->{$subsystem} = $sub if $sub;
         {  
             $ent = $cache->{$subsystem} = [$sub, $now];  
             if (scalar(keys(%$cache)) > $ss_cache_size)  
             {  
                 my @sorted = sort { $cache->{$a}->[1] <=> $cache->{$b}->[1] } keys %$cache;  
                 my $remove = $sorted[0];  
                 #warn "Removing ss $remove from cache\n";  
                 #warn Dumper(\@sorted);  
                 delete $cache->{$remove};  
             }  
         }  
     }  
   
     if ($ent)  
     {  
         $ent->[1] = $now;  
         return $ent->[0];  
     }  
     else  
     {  
         return undef;  
18070      }      }
18071        return $sub;
18072  }  }
18073    
18074  =head3 subsystem_to_roles  =head3 subsystem_to_roles
# Line 18164  Line 18112 
18112    
18113  sub pegs_in_subsystem_cell  sub pegs_in_subsystem_cell
18114  {  {
18115      my($self, $subsystem, $genome, $role, $ignore_inactive) = @_;      my($self, $subsystem, $genome, $role) = @_;
18116      $subsystem =~ s/[ \/]/_/g;      $subsystem =~ s/[ \/]/_/g;
18117    
18118      my $subsys = $self->get_subsystem($subsystem);      my $sub = $self->get_subsystem($subsystem);
     return undef unless $subsys;  
     my @pegs =  grep { ! $self->is_deleted_fid($_) } $subsys->get_pegs_from_cell($genome, $role);  
   
     if (! $ignore_inactive)  
     {  
         return @pegs;  
     }  
     my @pegs1 = grep { &active_peg($subsys,$_) } @pegs;  
     return @pegs1;  
 }  
   
 sub active_peg {  
     my($subsys,$peg) = @_;  
18119    
18120      my $variant = $subsys->get_variant_code_for_genome(&FIG::genome_of($peg));      return undef unless $sub;
18121      return ($variant !~ /0|-1|\*0|\*-1/);      return grep { ! $self->is_deleted_fid($_) } $sub->get_pegs_from_cell($genome, $role);
18122  }  }
18123    
18124  sub get_clearinghouse :Scalar  sub get_clearinghouse :Scalar
# Line 18389  Line 18324 
18324    
18325      if ($self->is_deleted_fid($peg)) { return () }      if ($self->is_deleted_fid($peg)) { return () }
18326    
18327      ($peg =~ /^fig\|\d+\.\d+\.\w+\.\d+$/) or return;      ($peg =~ /^fig\|\d+\.\d+\.peg\.\d+$/) or return;
18328    
18329      my $rdbH = $self->db_handle;      my $rdbH = $self->db_handle;
18330    
# Line 18447  Line 18382 
18382    
18383      if ($self->is_deleted_fid($peg)) { return () }      if ($self->is_deleted_fid($peg)) { return () }
18384    
18385      ($peg =~ /^fig\|\d+\.\d+\.\w+\.\d+$/) or return;      ($peg =~ /^fig\|\d+\.\d+\.peg\.\d+$/) or return;
18386    
18387      my $rdbH = $self->db_handle;      my $rdbH = $self->db_handle;
18388    
# Line 18477  Line 18412 
18412      }      }
18413  }  }
18414    
 =head3 subsystems_for_pegs_complete  
   
  Return the list of subsystems, roles and variants that the pegs appear in.  
  Returns a hash keyed by peg. Each item in the hash is a reference to a tuple  
  of subsystem, role and variant. If the last argument ($include_aux)  
  is "true", also roles playing auxiliary roles will be returned.  
   
 =cut  
   
 sub subsystems_for_pegs_complete {  
     my ($self, $pegs, $include_aux) = @_;  
     my $rdbH = $self->db_handle;  
     my %results;  
   
     my $q = "SELECT subsystem, role, variant, protein FROM (SELECT subsystem, role, variant, protein FROM subsystem_index WHERE protein IN ('".join("', '", @$pegs)."')) AS t1 LEFT JOIN deleted_fids ON t1.protein=deleted_fids.fid WHERE deleted_fids.fid IS NULL";  
     unless ($include_aux) {  
         $q = "SELECT t2.subsystem, t2.role, t2.variant, t2.protein FROM (".$q.") AS t2 LEFT JOIN aux_roles ON (t2.subsystem=aux_roles.subsystem AND t2.role=aux_roles.role) WHERE aux_roles.role IS NULL";  
     }  
     my $ret = $rdbH->SQL($q);  
   
     foreach my $row (@$ret) {  
         if (exists($results{$row->[3]})) {  
             push(@{$results{$row->[3]}}, [ $row->[0], $row->[1], $row->[2] ]);  
         } else {  
             $results{$row->[3]} = [ [ $row->[0], $row->[1], $row->[2] ] ];  
         }  
     }  
   
     return %results;  
 }  
   
18415  =head3 subsystems_for_peg  =head3 subsystems_for_peg
18416    
18417   Return the list of subsystems and roles that this peg appears in.   Return the list of subsystems and roles that this peg appears in.
# Line 21010  Line 20914 
20914      return $resp->result;      return $resp->result;
20915  }  }
20916    
 =head3 clearinghouse_ping  
   
     $fig->clearinghouse_ping($timeout);  
   
 Send a ping message to the clearinghouse. If it doesn't respond within the timeout,  
 the clearinghouse is not available for use.  
   
 =cut  
   
 sub clearinghouse_ping  
 {  
     my($self, $timeout) = @_;  
   
     my $ch_url = "http://clearinghouse.theseed.org/Clearinghouse/clearinghouse_services.cgi";  
     my $proxy = SOAP::Lite->uri("http://www.soaplite.com/Scripts")->proxy($ch_url, timeout => $ timeout);  
   
     my $resp;  
     eval {  
         $resp = $proxy->ping();  
     };  
     if ($@)  
     {  
         warn "Error on proxy call: $@\n";  
         return undef;  
     }  
     if ($resp->fault)  
     {  
         warn "Failure on ping(): " .$resp->faultcode . ": " . $resp->faultstring . "\n";  
         return undef;  
     }  
   
     return $resp->result;  
 }  
   
   
20917  =head3 clearinghouse_next_feature_id  =head3 clearinghouse_next_feature_id
20918    
20919      my $val = $fig->clearinghouse_next_feature_id($genome, $type)      my $val = $fig->clearinghouse_next_feature_id($genome, $type)

Legend:
Removed from v.1.747  
changed lines
  Added in v.1.748

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3