[Bio] / FigKernelPackages / FIG.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/FIG.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.695, Thu Jun 12 17:16:35 2008 UTC revision 1.696, Fri Jun 20 20:27:49 2008 UTC
# Line 1043  Line 1043 
1043              warn "Genome $genome that replaces $replaces is missing a peg_maps file: $!";              warn "Genome $genome that replaces $replaces is missing a peg_maps file: $!";
1044          }          }
1045      }      }
1046    print "Done Salvaging\n";
1047    
1048      #      #
1049      # Make sure that the features are registered for this genome. We assume here that      # Make sure that the features are registered for this genome. We assume here that
# Line 1077  Line 1077 
1077          }          }
1078          close($fh);          close($fh);
1079    
1080            print "Done registering features\n";
1081          #          #
1082          # See what the clearinghouse has, and register features if they are not there.          # See what the clearinghouse has, and register features if they are not there.
1083          #          #
# Line 21105  Line 21106 
21106      return $retVal;      return $retVal;
21107  }  }
21108    
21109    #new functions and support to add structured english
21110    sub to_structured_english {
21111        my($self, $fig,$peg) = @_;
21112    
21113    # ilit(1646786);fig|224308.1.peg.1063
21114    #
21115    # dlit(8332479);gj
21116    
21117    #1) With dlits:    "The characterization of essentially identical proteins has been discussed in
21118    #pubmed1 [, pubmed2,... and pubmedn]"  Where the pubmed IDs are links
21119    
21120    #2) With ilits:     "The characterization of proteins implementing this function was done in
21121    #GenusSpecies1 [, GenusSpecies2, ... and GenusSpecies3].  We believe that this protein is an
21122    #isofunctional homolog of these characterized proteins."
21123    
21124    # GenusSpeciesn should not be the whole string returned by $fig->genus_species($genome) -- use only the first two words.
21125    
21126        my @ev_codes = &evidence_codes($fig,$peg);
21127        my $by_sub = {};
21128        #my @ilit;
21129        #my @dlit;
21130        my $ilit = {};
21131        my $dlit = {};
21132    
21133        push (@ev_codes, "dlit(8332479);gj");
21134        push (@ev_codes, "dlit(1646786);gj");
21135        push (@ev_codes, "ilit(1646786);fig|224308.1.peg.1063");
21136        push (@ev_codes, "ilit(8332479);fig|279010.5.peg.2147");
21137        push (@ev_codes, "ilit(1646787);fig|224308.1.peg.1063");
21138    
21139        foreach my $code (@ev_codes)
21140        {
21141            if ($code =~ /^isu;(\S.*\S)/)                { $by_sub->{$1}->{'isu'} = 1  }
21142            if ($code =~ /^icw\((\d+)\);(\S.*\S)/)       { $by_sub->{$2}->{'icw'} = $1 }
21143            #if ($code =~ /^ilit\((\d+)\);(\S.*\S)/)       { push (@ilit, ($2))}
21144            #if ($code =~ /^dlit\((\d+)\);(\S.*\S)/)        { push (@dlit, $1)}
21145            if ($code =~ /^ilit\((\d+)\);(\S.*\S)/)       { $ilit->{$2} = $1 }
21146            if ($code =~ /^dlit\((\d+)\);(\S.*\S)/)        { $dlit->{$1} = 1 }
21147    
21148        }
21149    
21150        my @insubs = grep { $fig->usable_subsystem($_,1) } $fig->peg_to_subsystems($peg,1);
21151        my %subs = map { $_ => 1 } @insubs;
21152        my $funcSeed = $fig->function_of($peg,undef,1);
21153        if (@insubs < 1) { return "" }
21154    
21155        my $pieces = [];
21156        &add_func_assertion($pieces,$funcSeed);
21157        &add_in_subs($pieces,\@insubs);
21158        my @sub_numbers;
21159    
21160        foreach my $sub (@insubs)
21161        {
21162            &add_clustering_and_dup($pieces,$by_sub->{$sub},$sub);
21163            push(@sub_numbers, "SS:".$fig->clearinghouse_register_subsystem_id($sub));
21164        }
21165    
21166         my @keys =  keys(%$dlit);
21167         if (@keys) {
21168            make_dlit_text($pieces, @keys);
21169        }
21170    
21171    
21172         my @keys =  keys(%$ilit);
21173         if (@keys) {
21174            make_ilit_text($fig, $pieces, $ilit, @keys);
21175        }
21176    
21177    
21178        return join(",", @ev_codes), join(",", @sub_numbers), &render($pieces);
21179        #return join(",", @ev_codes), join(",", @insubs), &render($pieces);
21180    }
21181    
21182    sub render {
21183        my $cgi = new CGI;
21184        my($pieces) = @_;
21185    
21186        my @lines = ();
21187        my $curr  = "";
21188        foreach my $piece (@$pieces)
21189        {
21190            $piece = "$piece  ";
21191            $curr = $curr . $piece;
21192    
21193            while (length($curr) > 100)
21194            {
21195                my($p1,$p2) = &split_piece($curr,100);
21196                $p1 =~ s/^\s+//;
21197                push(@lines, $p1);
21198                $curr = $p2;
21199            }
21200        }
21201        if ($curr)
21202        {
21203            $curr =~ s/^\s+//;
21204            push(@lines,$curr) ;
21205        }
21206    
21207        my $encoded = $cgi->escape(join("\n",@lines) . "\n");
21208    #    print &Dumper($pieces,$encoded);
21209        return $encoded;
21210    }
21211    
21212    sub split_piece {
21213        my($piece,$n) = @_;
21214    
21215        my $i;
21216        for ($i = $n; ($i > 0) && (substr($piece,$i,1) ne " "); $i--) {}
21217        if ($i)
21218        {
21219            return (substr($piece,0,$i+1),substr($piece,$i+1));
21220        }
21221        else
21222        {
21223            return ($piece,"");
21224        }
21225    }
21226    
21227    sub make_dlit_text {
21228            my ($pieces, @dlit) = @_;
21229    
21230            my $text = "The characterization of essentially identical proteins has been discussed in ".&make_pubmed_link($dlit[0]);
21231            print STDERR "MAKE_DLIT $text, \n";
21232            shift(@dlit);
21233            if (@dlit) {
21234                    my $size = @dlit;
21235    
21236                    while (--$size) {
21237                            my $p = shift(@dlit);
21238                            $text = $text.", ".&make_pubmed_link($p);
21239                    }
21240                    if (@dlit) {
21241                            $text = $text." and ".&make_pubmed_link($dlit[0]);
21242                    }
21243            }
21244            $text .= ".";
21245            push (@$pieces, $text);
21246    }
21247    
21248    
21249    sub make_ilit_text {
21250            my ($fig, $pieces, $ilit_hash, @ilit) = @_;
21251    
21252            my $filler = "";
21253            my $text = "The characterization of proteins implementing this function was done in ".make_ilit_gs($fig, $ilit[0], $ilit_hash);
21254            shift(@ilit);
21255            if (@ilit) {
21256                    $filler.="these are homologous proteins which implement";
21257                    my $size =  @ilit;
21258                    while(--$size) {
21259                            my $peg = shift(@ilit);
21260                            $text = $text.", ".&make_ilit_gs($fig, $peg, $ilit_hash);
21261                    }
21262                    if (@ilit) {
21263                            $text = $text." and ".&make_ilit_gs($fig, $ilit[0], $ilit_hash);
21264                    }
21265            } else {
21266    
21267                    $filler.="this is a homologous protein which implements";
21268            }
21269    
21270            $text = $text.".  We believe that $filler the same function.";
21271            push (@$pieces, $text);
21272    
21273    }
21274    
21275    sub make_ilit_gs {
21276    
21277            my ($fig, $peg, $ilit_hash) = @_;
21278    
21279    
21280            $peg =~ /^fig\|(\d+\.\d+)\.peg\.\d+$/;
21281            my $gs = $fig->genus_species($1);
21282            my @words = split /\s+/, $gs;
21283            if (@words) {
21284                    if (@words > 1) {
21285                            return $words[0]." ".$words[1]." ".make_pubmed_link($ilit_hash->{$peg});
21286                    } else {
21287                            return $words[0]." ".make_pubmed_link($ilit_hash->{$peg});
21288                    }
21289            } else {
21290                    return "";
21291            }
21292    }
21293    
21294    sub make_pubmed_link {
21295            my ($pubmed) = @_;
21296            return "<a href='http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=Retrieve&db=PubMed&list_uids=$pubmed&dopt=AbstractPlus' target='_blank'>$pubmed</a>";
21297    }
21298    
21299    sub add_clustering_and_dup {
21300        my($pieces,$by_sub_entry,$sub) = @_;
21301    
21302        if ($by_sub_entry)
21303        {
21304            if ($by_sub_entry->{isu} || $by_sub_entry->{icw})
21305            {
21306                my $fixed_sub = &fix_sub_name($sub);
21307                push(@$pieces,"In $fixed_sub, " . &isu_and_icw($by_sub_entry->{isu},$by_sub_entry->{icw}));
21308            }
21309        }
21310    }
21311    
21312    sub isu_and_icw {
21313        my($isu,$icw) = @_;
21314    
21315        if ($isu && $icw) { return "it appears to play a functional role that we have not associated with any other gene, and it occurs in close proximity on the chromosome with " . (($icw == 1) ? "another gene from the same subsystem." : "$icw other genes from the same subsystem.") }
21316        if ($isu)         { return "it appears to play a functional role that we have not associated with any other gene." }
21317        if ($icw)         { "It occurs in close proximity on the chromosome with " . (($icw == 1) ? "another gene from the same subsystem." : "$icw other genes from the same subsystem.") }
21318    }
21319    
21320    sub add_func_assertion {
21321        my($pieces,$funcSeed) = @_;
21322    
21323        push(@$pieces,"We currently believe that the function of the encoded protein is \"$funcSeed\".");
21324        return;
21325    }
21326    
21327    sub add_in_subs {
21328        my($pieces,$insubs) = @_;
21329    
21330        if (@$insubs > 0)
21331        {
21332            my $n = @$insubs;
21333            if ($n > 0)
21334            {
21335                my $in_sub_state = "The protein occurs in " .
21336                                   (($n == 1) ? "1 subsystem" : "$n subsystems") . ': ' . &subs($insubs) . ".";
21337                push(@$pieces,$in_sub_state);
21338            }
21339        }
21340    }
21341    
21342    sub subs {
21343        my($subs) = @_;
21344    
21345        if (@$subs == 1) { return &fix_sub_name($subs->[0]) }
21346        my @subL = map { &fix_sub_name($_) } @$subs;
21347        $subL[$#subL] = "and $subL[$#subL]";
21348        return join(", ",@subL);
21349    }
21350    
21351    sub fix_sub_name {
21352        my($x) = @_;
21353    
21354        $x =~ s/_/ /g;
21355        return "\"$x\"";
21356    }
21357    
21358    sub evidence_codes {
21359        my($fig,$peg) = @_;
21360    
21361        if ($peg !~ /^fig\|\d+\.\d+\.peg\.\d+$/) { return "" }
21362    
21363        my @codes = grep { $_->[1] =~ /^evidence_code/i } $fig->get_attributes($peg);
21364        return map { $_->[2] } @codes;
21365    }
21366  =head3 GetUserDataDirectory  =head3 GetUserDataDirectory
21367    
21368      my $directoryName = FIG::_GetUserDataDirectory($userName);      my $directoryName = FIG::_GetUserDataDirectory($userName);

Legend:
Removed from v.1.695  
changed lines
  Added in v.1.696

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3