[Bio] / FigKernelPackages / FIG.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/FIG.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.711, Mon Jul 28 15:27:11 2008 UTC revision 1.712, Wed Aug 6 14:03:51 2008 UTC
# Line 12948  Line 12948 
12948    
12949  =item groupName  =item groupName
12950    
12951  Name of the group whose keys are desired.  Name of the group whose keys are desired. If omitted, then all keys will be returned. This
12952    could be expensive, but when it's necessary, it's necessary.
12953    
12954  =item RETURN  =item RETURN
12955    
# Line 12962  Line 12963 
12963  sub get_group_key_info {  sub get_group_key_info {
12964      # Get the parameters.      # Get the parameters.
12965      my ($self, $groupName) = @_;      my ($self, $groupName) = @_;
12966      # Get the key list.      # Declare the return variable.
12967      my %retVal = $self->{_ca}->GetAttributeData('group', $groupName);      my %retVal;
12968        # Check the parameter.
12969        if (defined $groupName) {
12970            # Get all keys in the group.
12971            %retVal = $self->{_ca}->GetAttributeData(group => $groupName);
12972        } else {
12973            # Get all the keys.
12974            %retVal = $self->{_ca}->GetAttributeData(name => '');
12975        }
12976      # Return the results.      # Return the results.
12977      return %retVal;      return %retVal;
12978  }  }
# Line 21737  Line 21746 
21746      return $retVal;      return $retVal;
21747  }  }
21748    
21749  #new functions and support to add structured english  =head3 to_structured_english
 sub to_structured_english {  
     my($self,$peg, $escaped) = @_;  
     my $fig = $self;  
   
 #1) With dlits:    "The characterization of essentially identical proteins has been discussed in  
 #pubmed1 [, pubmed2,... and pubmedn]"  Where the pubmed IDs are links  
   
 #2) With ilits:     "The characterization of proteins implementing this function was done in  
 #GenusSpecies1 [, GenusSpecies2, ... and GenusSpecies3].  We believe that this protein is an  
 #isofunctional homolog of these characterized proteins."  
   
 # GenusSpeciesn should not be the whole string returned by $fig->genus_species($genome) -- use only the first two words.  
   
     my @ev_codes = &evidence_codes($fig,$peg);  
     if (!@ev_codes) {return ("", "", "");}  
     my $by_sub = {};  
     my $ilit = {};  
     my $dlit = {};  
   
     # for testing  
     #push (@ev_codes, "dlit(8332479);gj");  
     #push (@ev_codes, "dlit(1646786);gj");  
     #push (@ev_codes, "ilit(1646786);fig|224308.1.peg.1063");  
     #push (@ev_codes, "ilit(8332479);fig|279010.5.peg.2147");  
     #push (@ev_codes, "ilit(1646787);fig|224308.1.peg.2273");  
     #push (@ev_codes, "ilit(1646787);fig|192222.1.peg.543");  
   
     foreach my $code (@ev_codes)  
     {  
         if ($code =~ /^isu;(\S.*\S)/)                { $by_sub->{$1}->{'isu'} = 1  }  
         if ($code =~ /^icw\((\d+)\);(\S.*\S)/)       { $by_sub->{$2}->{'icw'} = $1 }  
         if ($code =~ /^ilit\((\d+)\);(\S.*\S)/)       {  
                 my $gs = &get_gs($fig, $2);  
                 print STDERR "GS = $gs\n";  
                 unless (exists $ilit->{$gs}) { $ilit->{$gs} = [];}  
                 push(@{$ilit->{$gs}}, $1);  
         }  
         if ($code =~ /^dlit\((\d+)\);(\S.*\S)/)        { $dlit->{$1} = 1 }  
     }  
   
     my @insubs = grep { $fig->usable_subsystem($_,1) } $fig->peg_to_subsystems($peg,1);  
     my %subs = map { $_ => 1 } @insubs;  
     my $funcSeed = $fig->function_of($peg,undef,1);  
     if (@insubs < 1) { return ("", "", "") }  
   
     my $pieces = [];  
     &add_func_assertion($pieces,$funcSeed);  
     &add_in_subs($pieces,\@insubs);  
     my @sub_numbers;  
   
     foreach my $sub (@insubs)  
     {  
         &add_clustering_and_dup($pieces,$by_sub->{$sub},$sub);  
         push(@sub_numbers, "SS:".$fig->clearinghouse_register_subsystem_id($sub));  
     }  
   
      my @keys =  keys(%$dlit);  
      if (@keys) {  
         make_dlit_text($pieces, @keys);  
     }  
     if (keys(%$ilit)) {  
             make_ilit_text($pieces, $ilit);  
     }  
   
     return join(",", @ev_codes), join(",", @sub_numbers), &render($pieces, $escaped);  
 }  
   
 sub get_gs {  
         my ($fig, $peg) = @_;  
   
         $peg =~ /^fig\|(\d+\.\d+)\.peg\.\d+$/;  
         my $gs = $fig->genus_species($1);  
         my @words = split /\s+/, $gs;  
         if (@words)  {  
                 $gs = $words[0];  
                 if (@words > 1)  {  
                         $gs .= " $words[1]";  
                 }  
         }  
         return($gs);  
 }  
   
 sub render {  
     my $cgi = new CGI;  
     my($pieces, $escaped) = @_;  
   
     my @lines = ();  
     my $curr  = "";  
     foreach my $piece (@$pieces)  
     {  
         $piece = "$piece  ";  
         $curr = $curr . $piece;  
   
         while (length($curr) > 100)  
         {  
             my($p1,$p2) = &split_piece($curr,100);  
             $p1 =~ s/^\s+//;  
             push(@lines, $p1);  
             $curr = $p2;  
         }  
     }  
     if ($curr)  
     {  
         $curr =~ s/^\s+//;  
         push(@lines,$curr) ;  
     }  
   
     if ($escaped) {  
         return  $cgi->escape(join("\n",@lines) . "\n");  
     } else {  
         return (join("\n",@lines) . "\n");  
   
     }  
 }  
   
 sub split_piece {  
     my($piece,$n) = @_;  
   
     my $i;  
     for ($i = $n; ($i > 0) && (substr($piece,$i,1) ne " "); $i--) {}  
     if ($i)  
     {  
         return (substr($piece,0,$i+1),substr($piece,$i+1));  
     }  
     else  
     {  
         return ($piece,"");  
     }  
 }  
21750    
21751  sub make_dlit_text {      my ($ev_code_list, $subsys_list, $english_string) = $fig->to_structured_english($fig, $peg, $escape_flag);
         my ($pieces, @dlit) = @_;  
21752    
21753          #my $text = "The characterization of essentially identical proteins has been discussed in ".&make_pubmed_link($dlit[0]);  Create a structured English description of the evidence codes for a PEG,
21754          my $text = "The function of this gene is asserted in ".&make_pubmed_link($dlit[0]);  in either HTML or text format. In addition to the structured text, we
21755          shift(@dlit);  also return the subsystems and evidence codes for the PEG in list form.
         if (@dlit) {  
                 my $size = @dlit;  
21756    
21757                  while (--$size) {  =over 4
                         my $p = shift(@dlit);  
                         $text = $text.", ".&make_pubmed_link($p);  
                 }  
                 if (@dlit) {  
                         $text = $text." and ".&make_pubmed_link($dlit[0]);  
                 }  
         }  
         $text .= ".";  
         push (@$pieces, $text);  
 }  
   
   
 sub make_ilit_text {  
         my ($pieces, $ilit) = @_;  
   
        my  @keys =  keys(%$ilit);  
         my $filler = "";  
         #my $text = "The characterization of proteins implementing this function was done in ";  
         my $text = "The function of genes we believe play the same functional roles have been described in ";  
         my $key = shift(@keys);  
         print STDERR Dumper($ilit->{$key}), $key, "\n";  
         $text .= $key.&make_pubmed_list($ilit->{$key});  
   
         if (@keys) {  
                 $filler.="these are homologous proteins which implement";  
                 my $size =  @keys;  
                 while(--$size) {  
                         $key = shift(@keys);  
                         $text .= ", ".$key.&make_pubmed_list($ilit->{$key});  
                 }  
                 if (@keys) {  
                         $key = shift(@keys);  
                         $text = $text." and ".$key.&make_pubmed_list($ilit->{$key});  
                 }  
         } else {  
                 $filler.="this is a homologous protein which implements";  
         }  
   
   
         $text = $text.".  We believe that $filler the same function.";  
         push (@$pieces, $text);  
   
 }  
   
 sub make_pubmed_list {  
   
         my ($plst) = @_;  
         print STDERR Dumper($plst);  
   
         my $text = " (";  
         foreach my $pub (@$plst) {  
                 print STDERR $pub, "\n";  
                 $text .= make_pubmed_link($pub).", ";  
         }  
         $text =~s/, $/)/;  
         return($text);  
 }  
   
   
 sub make_pubmed_link {  
         my ($pubmed) = @_;  
         return "<a href='http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=Retrieve&db=PubMed&list_uids=$pubmed&dopt=AbstractPlus' target='_blank'>$pubmed</a>";  
 }  
   
 sub add_clustering_and_dup {  
     my($pieces,$by_sub_entry,$sub) = @_;  
   
     if ($by_sub_entry)  
     {  
         if ($by_sub_entry->{isu} || $by_sub_entry->{icw})  
         {  
             my $fixed_sub = &fix_sub_name($sub);  
             push(@$pieces,"In $fixed_sub, " . &isu_and_icw($by_sub_entry->{isu},$by_sub_entry->{icw}));  
         }  
     }  
 }  
   
 sub isu_and_icw {  
     my($isu,$icw) = @_;  
21758    
21759      if ($isu && $icw) { return "it appears to play a functional role that we have not associated with any other gene, and it occurs in close proximity on the chromosome with " . (($icw == 1) ? "another gene from the same subsystem." : "$icw other genes from the same subsystem.") }  =item peg
     if ($isu)         { return "it appears to play a functional role that we have not associated with any other gene." }  
     if ($icw)         { "It occurs in close proximity on the chromosome with " . (($icw == 1) ? "another gene from the same subsystem." : "$icw other genes from the same subsystem.") }  
 }  
21760    
21761  sub add_func_assertion {  ID of the protein or feature whose evidence is desired.
     my($pieces,$funcSeed) = @_;  
21762    
21763      push(@$pieces,"We currently believe that the function of the encoded protein is \"$funcSeed\".");  =item escape_flag
     return;  
 }  
21764    
21765  sub add_in_subs {  TRUE if the output text should be HTML, else FALSE
     my($pieces,$insubs) = @_;  
21766    
21767      if (@$insubs > 0)  =item RETURN
     {  
         my $n = @$insubs;  
         if ($n > 0)  
         {  
             my $in_sub_state = "The protein occurs in " .  
                                (($n == 1) ? "1 subsystem" : "$n subsystems") . ': ' . &subs($insubs) . ".";  
             push(@$pieces,$in_sub_state);  
         }  
     }  
 }  
21768    
21769  sub subs {  Returns a three-element list. The first element is a reference to a list of evidence codes,
21770      my($subs) = @_;  the second is a list of the subsystem containing the peg, and the third is the readable
21771    text description of the evidence.
21772    
21773      if (@$subs == 1) { return &fix_sub_name($subs->[0]) }  =back
     my @subL = map { &fix_sub_name($_) } @$subs;  
     $subL[$#subL] = "and $subL[$#subL]";  
     return join(", ",@subL);  
 }  
21774    
21775  sub fix_sub_name {  =cut
     my($x) = @_;  
21776    
21777      $x =~ s/_/ /g;  sub to_structured_english {
21778      return "\"$x\"";      my ($self, $peg, $escape_flag) = @_;
21779        return FIGRules::to_structured_english($self, $peg, $escape_flag);
21780  }  }
21781    
 sub evidence_codes {  
     my($fig,$peg) = @_;  
   
     if ($peg !~ /^fig\|\d+\.\d+\.peg\.\d+$/) { return "" }  
   
     my @codes = grep { $_->[1] =~ /^evidence_code/i } $fig->get_attributes($peg);  
     return map { $_->[2] } @codes;  
 }  
21782  =head3 GetUserDataDirectory  =head3 GetUserDataDirectory
21783    
21784      my $directoryName = FIG::_GetUserDataDirectory($userName);      my $directoryName = FIG::_GetUserDataDirectory($userName);

Legend:
Removed from v.1.711  
changed lines
  Added in v.1.712

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3