[Bio] / FigKernelPackages / RC.pm Repository:
ViewVC logotype

View of /FigKernelPackages/RC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Mon Dec 5 19:06:30 2005 UTC (13 years, 11 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, mgrast_dev_02212011, rast_rel_2010_1206, caBIG-05Apr06-00, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, caBIG-13Feb06-00, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07
Changes since 1.1: +17 -0 lines
Added license words.

#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

package RC;

use Carp;

sub show_page {
    my($cgi,$html) = @_;

    print $cgi->header;
    print @$html;
}
    

sub make_table {
    my($col_hdrs,$tab,$title) = @_;
    my(@tab);

    push(@tab,"<table border><caption><b>$title</b></caption>\n");
    push(@tab,"<tr><th>" . join("</th><th>",@$col_hdrs) . "</th></tr>\n");
    foreach $_ (@$tab)
    {
	push(@tab,"<tr><td>" . join("</td><td>",@$_) . "</td></tr>\n");
    }
    push(@tab,"</table>\n");
    return join("",@tab);
}

sub same_func {
    my($func1,$func2) = @_;
    my(@ecs1,@ecs2,$ec,$func1n,$func2n);

    (defined($func1) && defined($func2)) || confess "func1=$func1 func2=$func2";

    if (&hypo($func1) )
    {
	return &hypo($func2);
    }
    elsif (&hypo($func2))
    {
	return 0;
    }

    $func1n = &normalize($func1);
    $func2n = &normalize($func2);
#    print STDERR "$func1n\n$func2n\n";

    if ($func1n && $func2n &&
	((index($func1n,$func2n) > -1) ||
	 (index($func2n,$func1n) > -1)))
    {
#        print STDERR "same: $func1\t$func2\n";
	return 1;
    }

    @ecs1 = ($func1 =~ /\d+\.\d+\.\d+\.\d+/g);
    foreach $ec (@ecs1)
    {
	if ($func2 !~ /\b$ec\b/)
	{
	    return 0;
	}
    }

    @ecs2 = ($func2 =~ /\d+\.\d+\.\d+\.\d+/g);
    foreach $ec (@ecs2)
    {
	if ($func1 !~ /\b$ec\b/)
	{
	    return 0;
	}
    }
    
    if (@ecs1) { return 1; }

#    print STDERR "diff: $func1\t$func2\n";
    return 0;
}

sub normalize {
    my($func) = @_;

    $func =~ s/[,\.\(\)\[\]\']//g;
    $func = uc($func);
    $func =~ s/SUBUNIT \S+//;
    $func =~ s/\d+ kda? SUBUNIT//i;
    $func =~ s/ SUBUNIT$//i;
    $func =~ s/(PROBABLE|PUTATIVE|PRECURSOR|HOMOLOG|IMPORTED|PRIME)//i;
    $func =~ s/^.*TRANSCRIPTION.*REGULAT.*$/transcription regulator/;
    $func =~ s/SINGLE-STRANDED/SINGLE-STRAND/i;
    $func =~ s/  */ /g;
    $func =~ s/^\s+//;
    $func =~ s/\s+$//;

    return $func;
}

sub hypo {
    my $x = (@_ == 1) ? $_[0] : $_[1];

    return ((! $x) ||
	    ($x =~ /hypoth/i) || 
	    ($x =~ /,.*genes/i) ||
	    ($x =~ /gene \d/i) ||
	    ($x =~ /\d{3}.pep/i) ||
	    ($x =~ /\bFROM\b/i) ||
	    ($x =~ /\bA\.L/i) ||
	    ($x =~ /\bA\d\d/i) ||
	    ($x =~ /^C$/i) ||
	    ($x =~ /^\([A-Z]+\d+\)$/) || 
	    ($x =~ /putative/i) || 
	    ($x =~ /tentative/i) || 
	    ($x =~ /predicted/i) || 
	    # ($x =~ /homolog/i) || 
	    ($x =~ /dna fragment/i) || 
	    ($x =~ /conserved protein\b/) || 
	    ($x =~ /^[XY]\d\S+/i) || 
	    ($x =~ /^[Yy][a-z]{2}[A-Z]/) || 
	    ($x =~ /^[Yy][A-Z]{3}\b/) || 
	    ($x =~ /weak similarity/i) || 
	    ($x =~ /similar to/i) || 
	    ($x =~ /gene product/i) || 
	    ($x =~ /ORF_/) || 
	    ($x =~ /NO SWISS-PROT/) || 
	    ($x =~ /predicted coding/i) || 
	    ($x =~ /predicted by/i) || 
	    ($x =~ /pct identical/i) || 
	    ($x =~ /\borf\d+/i) || 
	    ($x =~ /\bcosmid\d+\b/i) || 
	    ($x =~ /^[a-zA-Z0-9]+\d+[a-z]?$/i) || 
	    ($x =~ /^[a-zA-Z0-9]+[\.-]\d+[a-z]?$/i) || 
	    ($x =~ /^[a-zA-Z0-9]+[\.-]\d+[a-z]?\s+PROTEIN$/i) || 
	    ($x =~ /^cosmid\s+\S+$/i) || 
	    ($x =~ /^\([A-Z0-9]+\) [A-Z][a-z]{2}[a-zA-Z] \[\S+ \S+\]\s*$/) ||
	    ($x =~ /region orf/i) ||
	    ($x =~ /UNNAMED PROTEIN PRODUCT/) ||
	    ($x =~ /HYDROPHOBIC PROTEIN/) ||
	    ($x =~ /\bORF\b/i) ||
	    ($x =~ /protein similarity/) ||
	    ($x =~ /Uncharacterized/) ||
	    ($x =~ /UNIDENTIFIED/) ||
	    ($x =~ /belongs to the family/) ||
	    ($x =~ /predicted protein/) ||
	    ($x =~ /1-EVIDENCE=PREDICTED BY MATCH/) ||
	    ($x =~ /INTERGENIC REGION/) ||
	    ($x =~ /NO SWISS-PROT SIMILARITIES/) ||
	    ($x =~ /no known similarities/) ||
	    ($x =~ /alternate gene name/) ||
	    ($x =~ /alternate open reading frame/) ||
	    ($x =~ /similar to GenBank Accession Number/) ||
	    ($x =~ /family with/) ||
	    ($x =~ /No definition/) ||
	    ($x =~ /id:/i) ||
	    ($x =~ /cDNA/) ||
	    ($x =~ /SP:/) ||
	    ($x =~ /COMPLETE CDS/) ||
	    ($x =~ /GENE CLUSTER/) ||
	    ($x =~ /\dp,Lp/) ||
	    ($x =~ /3\' END/) ||
	    ($x =~ /START CODON/) ||
	    ($x =~ /_\S+_/) ||
	    ($x =~ /GTG START/i) ||
	    ($x =~ /TTG START/i) ||
	    ($x =~ /chain length determinant/i) ||
	    ($x =~ /f135/i) ||
	    ($x =~ /KDA PROTEIN/i) ||
	    ($x =~ /yole/i) ||
	    ($x =~ /\bMAP\b/) ||
	    ($x =~ /\(\d+-\d+\)/i) ||
	    ($x =~ /D9719.36p/i) ||
	    ($x =~ /THYMOCYTE PROTEIN CTHY28KD/i) ||
	    ($x =~ /PHAC1, PHAC2 AND PHAD GENES/i) ||
	    ($x =~ /OR23peptide/i) ||
	    ($x =~ /\(AE/i) ||
	    ($x =~ /Bem3p,Lph12p/i) ||
	    ($x =~ /Rlm1p,Lpg19p/i) ||
	    ($x =~ /unknown/i));
}

sub check_sum {
    my($file) = @_;

    my @tmp = `cksum $file`;
    $tmp[0] =~ /^(\d+\s+\d+)/;
    return $1;
}

1

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3