[Bio] / FigKernelPackages / RC.pm Repository:
ViewVC logotype

View of /FigKernelPackages/RC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Mon Dec 1 16:54:26 2003 UTC (16 years, 6 months ago) by efrank
Branch: MAIN
Branch point for: initial
Initial revision

package RC;

use Carp;

sub show_page {
    my($cgi,$html) = @_;

    print $cgi->header;
    print @$html;
}
    

sub make_table {
    my($col_hdrs,$tab,$title) = @_;
    my(@tab);

    push(@tab,"<table border><caption><b>$title</b></caption>\n");
    push(@tab,"<tr><th>" . join("</th><th>",@$col_hdrs) . "</th></tr>\n");
    foreach $_ (@$tab)
    {
	push(@tab,"<tr><td>" . join("</td><td>",@$_) . "</td></tr>\n");
    }
    push(@tab,"</table>\n");
    return join("",@tab);
}

sub same_func {
    my($func1,$func2) = @_;
    my(@ecs1,@ecs2,$ec,$func1n,$func2n);

    (defined($func1) && defined($func2)) || confess "func1=$func1 func2=$func2";

    if (&hypo($func1) )
    {
	return &hypo($func2);
    }
    elsif (&hypo($func2))
    {
	return 0;
    }

    $func1n = &normalize($func1);
    $func2n = &normalize($func2);
#    print STDERR "$func1n\n$func2n\n";

    if ($func1n && $func2n &&
	((index($func1n,$func2n) > -1) ||
	 (index($func2n,$func1n) > -1)))
    {
#        print STDERR "same: $func1\t$func2\n";
	return 1;
    }

    @ecs1 = ($func1 =~ /\d+\.\d+\.\d+\.\d+/g);
    foreach $ec (@ecs1)
    {
	if ($func2 !~ /\b$ec\b/)
	{
	    return 0;
	}
    }

    @ecs2 = ($func2 =~ /\d+\.\d+\.\d+\.\d+/g);
    foreach $ec (@ecs2)
    {
	if ($func1 !~ /\b$ec\b/)
	{
	    return 0;
	}
    }
    
    if (@ecs1) { return 1; }

#    print STDERR "diff: $func1\t$func2\n";
    return 0;
}

sub normalize {
    my($func) = @_;

    $func =~ s/[,\.\(\)\[\]\']//g;
    $func = uc($func);
    $func =~ s/SUBUNIT \S+//;
    $func =~ s/\d+ kda? SUBUNIT//i;
    $func =~ s/ SUBUNIT$//i;
    $func =~ s/(PROBABLE|PUTATIVE|PRECURSOR|HOMOLOG|IMPORTED|PRIME)//i;
    $func =~ s/^.*TRANSCRIPTION.*REGULAT.*$/transcription regulator/;
    $func =~ s/SINGLE-STRANDED/SINGLE-STRAND/i;
    $func =~ s/  */ /g;
    $func =~ s/^\s+//;
    $func =~ s/\s+$//;

    return $func;
}

sub hypo {
    my $x = (@_ == 1) ? $_[0] : $_[1];

    return ((! $x) ||
	    ($x =~ /hypoth/i) || 
	    ($x =~ /,.*genes/i) ||
	    ($x =~ /gene \d/i) ||
	    ($x =~ /\d{3}.pep/i) ||
	    ($x =~ /\bFROM\b/i) ||
	    ($x =~ /\bA\.L/i) ||
	    ($x =~ /\bA\d\d/i) ||
	    ($x =~ /^C$/i) ||
	    ($x =~ /^\([A-Z]+\d+\)$/) || 
	    ($x =~ /putative/i) || 
	    ($x =~ /tentative/i) || 
	    ($x =~ /predicted/i) || 
	    # ($x =~ /homolog/i) || 
	    ($x =~ /dna fragment/i) || 
	    ($x =~ /conserved protein\b/) || 
	    ($x =~ /^[XY]\d\S+/i) || 
	    ($x =~ /^[Yy][a-z]{2}[A-Z]/) || 
	    ($x =~ /^[Yy][A-Z]{3}\b/) || 
	    ($x =~ /weak similarity/i) || 
	    ($x =~ /similar to/i) || 
	    ($x =~ /gene product/i) || 
	    ($x =~ /ORF_/) || 
	    ($x =~ /NO SWISS-PROT/) || 
	    ($x =~ /predicted coding/i) || 
	    ($x =~ /predicted by/i) || 
	    ($x =~ /pct identical/i) || 
	    ($x =~ /\borf\d+/i) || 
	    ($x =~ /\bcosmid\d+\b/i) || 
	    ($x =~ /^[a-zA-Z0-9]+\d+[a-z]?$/i) || 
	    ($x =~ /^[a-zA-Z0-9]+[\.-]\d+[a-z]?$/i) || 
	    ($x =~ /^[a-zA-Z0-9]+[\.-]\d+[a-z]?\s+PROTEIN$/i) || 
	    ($x =~ /^cosmid\s+\S+$/i) || 
	    ($x =~ /^\([A-Z0-9]+\) [A-Z][a-z]{2}[a-zA-Z] \[\S+ \S+\]\s*$/) ||
	    ($x =~ /region orf/i) ||
	    ($x =~ /UNNAMED PROTEIN PRODUCT/) ||
	    ($x =~ /HYDROPHOBIC PROTEIN/) ||
	    ($x =~ /\bORF\b/i) ||
	    ($x =~ /protein similarity/) ||
	    ($x =~ /Uncharacterized/) ||
	    ($x =~ /UNIDENTIFIED/) ||
	    ($x =~ /belongs to the family/) ||
	    ($x =~ /predicted protein/) ||
	    ($x =~ /1-EVIDENCE=PREDICTED BY MATCH/) ||
	    ($x =~ /INTERGENIC REGION/) ||
	    ($x =~ /NO SWISS-PROT SIMILARITIES/) ||
	    ($x =~ /no known similarities/) ||
	    ($x =~ /alternate gene name/) ||
	    ($x =~ /alternate open reading frame/) ||
	    ($x =~ /similar to GenBank Accession Number/) ||
	    ($x =~ /family with/) ||
	    ($x =~ /No definition/) ||
	    ($x =~ /id:/i) ||
	    ($x =~ /cDNA/) ||
	    ($x =~ /SP:/) ||
	    ($x =~ /COMPLETE CDS/) ||
	    ($x =~ /GENE CLUSTER/) ||
	    ($x =~ /\dp,Lp/) ||
	    ($x =~ /3\' END/) ||
	    ($x =~ /START CODON/) ||
	    ($x =~ /_\S+_/) ||
	    ($x =~ /GTG START/i) ||
	    ($x =~ /TTG START/i) ||
	    ($x =~ /chain length determinant/i) ||
	    ($x =~ /f135/i) ||
	    ($x =~ /KDA PROTEIN/i) ||
	    ($x =~ /yole/i) ||
	    ($x =~ /\bMAP\b/) ||
	    ($x =~ /\(\d+-\d+\)/i) ||
	    ($x =~ /D9719.36p/i) ||
	    ($x =~ /THYMOCYTE PROTEIN CTHY28KD/i) ||
	    ($x =~ /PHAC1, PHAC2 AND PHAD GENES/i) ||
	    ($x =~ /OR23peptide/i) ||
	    ($x =~ /\(AE/i) ||
	    ($x =~ /Bem3p,Lph12p/i) ||
	    ($x =~ /Rlm1p,Lpg19p/i) ||
	    ($x =~ /unknown/i));
}

sub check_sum {
    my($file) = @_;

    my @tmp = `cksum $file`;
    $tmp[0] =~ /^(\d+\s+\d+)/;
    return $1;
}

1

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3