[Bio] / FigKernelScripts / to_prodom.pl Repository:
ViewVC logotype

View of /FigKernelScripts/to_prodom.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (download) (as text) (annotate)
Thu Aug 27 21:08:37 2009 UTC (10 years, 2 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.6: +8 -3 lines
fix updated URL

#!/usr/bin/perl -w

use strict;
#use CGI qw(:standard);
#use CGI::Carp qw(warningsToBrowser fatalsToBrowser);
use Data::Dumper;
use LWP;
use HTTP::Request::Common;
use FIG;
use HTML;

my $fig = new FIG;
my $user_agent = LWP::UserAgent->new;
my $usage = "usage: to_prodom [peg\tpegID]"; 
my @arguments_in;

if (@ARGV > 0)
{
    @arguments_in = @ARGV;
}
else
{
    die $usage;
}

my %arg_pairs = ();

foreach (@arguments_in)
{
    my ($name,$val) = split(/\t/,$_); 
    $arg_pairs{$name} = $val;
}

my @keys = keys %arg_pairs;

my $peg = $arg_pairs{'peg'};

$peg =~ s/\%7C/\|/g;

my $seq = $fig->get_translation($peg);
my @aliases=$fig->feature_aliases($peg);

my @sp_ids = grep {/.*sp.*/} @aliases;
my @tr_ids = grep {/.*tr.*/} @aliases;
my @uni_ids = grep {/.*uni.*/} @aliases;

# Putting the ids in an array. 
# Allow the program to process each one till there is a ProDom webpage
# Order of importance is sp, tr, uni

my @all_ids;
&add_to_array (\@uni_ids);
&add_to_array (\@tr_ids);
&add_to_array (\@sp_ids);


foreach (@all_ids) {

    &to_prodom_by_id($_);
}


# If made it this far, use sequence to get the Prodom website
&to_prodom_by_seq;

###############
# Subroutines
###############

 
sub add_to_array {
    my ($x) = @_;
    for (my $i=0; $i <@$x; $i++){
	push @all_ids, $x->[$i];
    }
}


sub to_prodom_by_id {

    my $id = $_;
    
    if ( $id ne "") {
	# Remove all the identifier before | 
	
	my $url_id = $id;
	$url_id =~ s/sp\|//g;
	$url_id =~ s/uni\|//g;
	$url_id =~ s/tr\|//g;
	
	my $url = "http://prodom.prabi.fr/prodom/current/cgi-bin/request.pl?question=SPTR&query=$url_id&bool_operator=OR";
	# my $url = "http://protein.toulouse.inra.fr/prodom/current/cgi-bin/request.pl?question=SPTR&query=$url_id&bool_operator=OR";


	# Pre-Check to see if there's no entry in ProDom. If there are no entry for the specified id,
	# then it will go to the next id;
	
	my $response = $user_agent->get( $url );
	die "Can't get $url -- ", $response->status_line
	    unless $response->is_success;

	if(! ($response->content =~ m/Sorry/i) ) {
	    my $result = $response->content;
	    # Replace relative paths with absolute paths

            $result =~ s/\.\./http:\/\/prodom\.prabi\.fr\/prodom\/current/g;
	    $result =~ s/href\=\"\/prodom\.html\"/http:\/\/prodom\.prabi\.fr\/prodom\/current\/html\/home\.php/g;
	    
	    #$result =~ s/\.\./http:\/\/prodes\.toulouse\.inra\.fr\/prodom\/current/g;
	    #$result =~ s/href\=\"\/prodom\.html\"/http:\/\/prodes\.toulouse\.inra\.fr\/prodom\/current\/html\/home\.php/g;
	   
	    $result =~ s/<\!DOCTYPE HTML PUBLIC \"-\/\/W3C\/\/DTD HTML 4\.0 Transitional\/\/EN\"//;
	    $result =~ s/"http\:\/\/www\.w3\.org\/TR\/REC-html40\/loose\.dtd\">//;
            $result =~ s/<HTML>//; 
            $result =~ s/<HEAD>//;
            $result =~ s/<\/HEAD>//;
            $result =~ s/<\/HTML>//;   
	    #Get rid of css reference because it breaks the FIG header
	    $result =~ s/<link rel=.*?>//g;
	    print $result;
	    exit;
	}     
    }
}

sub to_prodom_by_seq {

    my $url = "http://prodom.prabi.fr/prodom/current/cgi-bin/ProDomBlast3.pl"; 
    my $request = POST( $url,
			 Content_Type => 'form-data',
			 Content =>  [ 'matrice' => 'BLOSUM62',	   
				     'program' => 'ncbi-blastp',
				     'typebd' => 'multiple alignments',
				     'expect' => '.01',
				     'filtre' => 'seq', 
				     'nom_seq' => '',
				     'sequence' => $seq,
				    ]
				   );

    
    my $response = $user_agent->request($request);
    my $result = $response->content;
    
    $result =~ s/<\!DOCTYPE HTML PUBLIC \"-\/\/W3C\/\/DTD HTML 4\.0 Transitional\/\/EN\"//;
    $result =~ s/"http\:\/\/www\.w3\.org\/TR\/REC-html40\/loose\.dtd\">//;
    $result =~ s/<HTML>//;
    $result =~ s/<HEAD>//;
    $result =~ s/<\/HEAD>//;
    $result =~ s/<\/HTML>//;
        
    # Replace relative paths with absolute paths
    $result =~ s/\.\./http:\/\/prodes\.toulouse\.inra\.fr\/prodom\/current/g;
    $result =~ s/<link rel=.*?>//g;
    $result =~ s/href\=\"\/prodom\.html\"/http:\/\/prodes\.toulouse\.inra\.fr\/prodom\/current\/html\/home\.php/g;
    $result =~ s/\"request\.pl/\"http\:\/\/prodomweb\.univ\-lyon1\.fr\/prodom\/current\/cgi\-bin\/request\.pl\?/g;

    print $result;
    
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3