[Bio] / FigKernelScripts / to_prodom.pl Repository:
ViewVC logotype

View of /FigKernelScripts/to_prodom.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (download) (as text) (annotate)
Fri Sep 8 18:59:21 2006 UTC (13 years, 3 months ago) by hwang
Branch: MAIN
CVS Tags: rast_rel_2009_05_18, rast_rel_2008_06_18, rast_rel_2008_06_16, rast_rel_2008_12_18, rast_rel_2008_07_21, rast_2008_0924, rast_rel_2008_04_23, rast_rel_2008_09_30, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, mgrast_rel_2008_0625, rast_rel_2008_10_09, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, mgrast_rel_2008_1110, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, rast_rel_2009_03_26, rast_rel_2008_11_24, rast_rel_2008_08_07
Changes since 1.5: +3 -1 lines
Change url for prodom's post

#!/usr/bin/perl -w

use strict;
#use CGI qw(:standard);
#use CGI::Carp qw(warningsToBrowser fatalsToBrowser);
use Data::Dumper;
use LWP;
use HTTP::Request::Common;
use FIG;
use HTML;

my $fig = new FIG;
my $user_agent = LWP::UserAgent->new;
my $usage = "usage: to_prodom [peg\tpegID]"; 
my @arguments_in;

if (@ARGV > 0)
{
    @arguments_in = @ARGV;
}
else
{
    die $usage;
}

my %arg_pairs = ();

foreach (@arguments_in)
{
    my ($name,$val) = split(/\t/,$_); 
    $arg_pairs{$name} = $val;
}

my @keys = keys %arg_pairs;

my $peg = $arg_pairs{'peg'};

$peg =~ s/\%7C/\|/g;

my $seq = $fig->get_translation($peg);
my @aliases=$fig->feature_aliases($peg);

my @sp_ids = grep {/.*sp.*/} @aliases;
my @tr_ids = grep {/.*tr.*/} @aliases;
my @uni_ids = grep {/.*uni.*/} @aliases;

# Putting the ids in an array. 
# Allow the program to process each one till there is a ProDom webpage
# Order of importance is sp, tr, uni

my @all_ids;
&add_to_array (\@uni_ids);
&add_to_array (\@tr_ids);
&add_to_array (\@sp_ids);


foreach (@all_ids) {

    &to_prodom_by_id($_);
}


# If made it this far, use sequence to get the Prodom website
&to_prodom_by_seq;

###############
# Subroutines
###############

 
sub add_to_array {
    my ($x) = @_;
    for (my $i=0; $i <@$x; $i++){
	push @all_ids, $x->[$i];
    }
}


sub to_prodom_by_id {

    my $id = $_;
    
    if ( $id ne "") {
	# Remove all the identifier before | 
	
	my $url_id = $id;
	$url_id =~ s/sp\|//g;
	$url_id =~ s/uni\|//g;
	$url_id =~ s/tr\|//g;
	
	my $url = "http://protein.toulouse.inra.fr/prodom/current/cgi-bin/request.pl?question=SPTR&query=$url_id&bool_operator=OR";


	# Pre-Check to see if there's no entry in ProDom. If there are no entry for the specified id,
	# then it will go to the next id;
	
	my $response = $user_agent->get( $url );
	die "Can't get $url -- ", $response->status_line
	    unless $response->is_success;

	if(! ($response->content =~ m/Sorry/i) ) {
	    my $result = $response->content;
	    # Replace relative paths with absolute paths
	    $result =~ s/\.\./http:\/\/prodes\.toulouse\.inra\.fr\/prodom\/current/g;
	    $result =~ s/href\=\"\/prodom\.html\"/http:\/\/prodes\.toulouse\.inra\.fr\/prodom\/current\/html\/home\.php/g;
	   
	    $result =~ s/<\!DOCTYPE HTML PUBLIC \"-\/\/W3C\/\/DTD HTML 4\.0 Transitional\/\/EN\"//;
	    $result =~ s/"http\:\/\/www\.w3\.org\/TR\/REC-html40\/loose\.dtd\">//;
            $result =~ s/<HTML>//; 
            $result =~ s/<HEAD>//;
            $result =~ s/<\/HEAD>//;
            $result =~ s/<\/HTML>//;   
	    #Get rid of css reference because it breaks the FIG header
	    $result =~ s/<link rel=.*?>//g;
	    print $result;
	    exit;
	}     
    }
}

sub to_prodom_by_seq {

    my $url = "http://prodom.prabi.fr/prodom/current/cgi-bin/ProDomBlast3.pl"; 
    my $request = POST( $url,
			 Content_Type => 'form-data',
			 Content =>  [ 'matrice' => 'BLOSUM62',	   
				     'program' => 'ncbi-blastp',
				     'typebd' => 'multiple alignments',
				     'expect' => '.01',
				     'filtre' => 'seq', 
				     'nom_seq' => '',
				     'sequence' => $seq,
				    ]
				   );

    
    my $response = $user_agent->request($request);
    my $result = $response->content;
    
    $result =~ s/<\!DOCTYPE HTML PUBLIC \"-\/\/W3C\/\/DTD HTML 4\.0 Transitional\/\/EN\"//;
    $result =~ s/"http\:\/\/www\.w3\.org\/TR\/REC-html40\/loose\.dtd\">//;
    $result =~ s/<HTML>//;
    $result =~ s/<HEAD>//;
    $result =~ s/<\/HEAD>//;
    $result =~ s/<\/HTML>//;
        
    # Replace relative paths with absolute paths
    $result =~ s/\.\./http:\/\/prodes\.toulouse\.inra\.fr\/prodom\/current/g;
    $result =~ s/<link rel=.*?>//g;
    $result =~ s/href\=\"\/prodom\.html\"/http:\/\/prodes\.toulouse\.inra\.fr\/prodom\/current\/html\/home\.php/g;
    $result =~ s/\"request\.pl/\"http\:\/\/prodomweb\.univ\-lyon1\.fr\/prodom\/current\/cgi\-bin\/request\.pl\?/g;

    print $result;
    
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3