[Bio] / FigKernelScripts / to_arabidopsis.pl Repository:
ViewVC logotype

View of /FigKernelScripts/to_arabidopsis.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Tue Jan 23 23:25:56 2007 UTC (12 years, 9 months ago) by hwang
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
This script parses arabidopsis.org to get the AT aliases output

#!/usr/bin/perl -w
use lib "/home/khwang/MODULES/FramesReady/lib";

use strict;
use Data::Dumper;
use LWP;
use LWP::UserAgent;
use HTTP::Request::Common;
use FIG;
use HTML;
use HTML::LinkExtor;
use URI::URL;

my $fig = new FIG;
my $user_agent = LWP::UserAgent->new;
my $usage = "usage: to_jcsg [key1\tvalue1\nkey2\tvalue2]";
my @arguments_in;
my @imgs = ();
my $result;
my $base;

if (@ARGV > 0)
{
    @arguments_in = @ARGV;
}
else
{
    die $usage;
}

my %pairs = ();

foreach (@arguments_in)
{
    my ($name,$val) = split(/\t/,$_); 
    $pairs{$name} = $val;
}

my @keys = keys %pairs;

my $peg = $pairs{'peg'};
my $alias_filter = $pairs{'alias'};
my $url = $pairs{'url'};
my $comment = $pairs{'comment'};
my $url_end = $pairs{'url_end'};

$peg =~ s/\%7C/\|/g;

my $seq = $fig->get_translation($peg);
my @aliases=$fig->feature_aliases($peg);

my @url_id = grep {/.*$alias_filter.*/} @aliases;

#Clean the ids;

my $clean_id = $url_id[0];
$clean_id=~ s/.*://;
$clean_id=~ s/.*\|//;
&to_url_by_id($clean_id);


###############
# Subroutines
###############

sub to_url_by_id {

    my $id = $_[0];
    if ( $id ne "") {
	my $url_id = $id;
	my $complete_url;

	if ($url_end ne "") {
	    
	    $complete_url = $url.$url_id.$url_end;
	}
	else {
	    $complete_url = $url.$url_id;
	}

	my $p = HTML::LinkExtor->new(\&callback);
	
	my $response = $user_agent->request(HTTP::Request->new(GET => $complete_url),
				    sub {$p->parse($_[0])});
	print "The $complete_url can not be accessed now. <p> "
	    unless $response->is_success;

	my $response_html = $user_agent->get( $complete_url );

	if(! ($response_html->content =~ m/Sorry/i) ) {
	   
	    $result = $response_html->content;
	    $result =~ s/<link rel=.*?>//g;

	    #Relative urls needs to be changed to absolute urls
	    # If LinksToTools specify that the relative link is yes then put all the links in a hash.
	    # Key is relative url. 
	    # Value is absolute url
	    $base = $response->base;

	    if ( $pairs{'rel_links'} eq 'yes') {

		&fix_url;
	    }
		
	}

	   my $css_header = " <head><title>Search</title>
                              <link rel=\"stylesheet\" type=\"text/css\" href=\"http://www.arabidopsis.org/css/main.css\" />
                              <link rel=\"stylesheet\" type=\"text/css\" href=\"http://www.arabidopsis.org/css/search.css\" />
                              <link rel=\"stylesheet\" type=\"text/css\" href=\"http://www.arabidopsis.org/css/base.css\" />
                              <style type=\"text/css\">
                              </style> ";
	
	my $old = "<script type=\"text\/javascript\">";
	$result =~ s/$old/$css_header$old/g;
	print $result;
	exit;
	}
   
    else {

	print $comment;
    }

}

sub callback {
    my($tag, %attr) = @_; 
    return if $tag ne 'img'; 
    push(@imgs, values %attr);
}


sub fix_url {

    my %url=();
    
    foreach my $rel_url(@imgs) {
	$url{$rel_url} = url($rel_url, $base)->abs;
    }


    #Replace the relative urls with absolute url
    while ( my ($key, $value) = each %url) {
	$result =~ s/$key/$value/;
    }
		
    #Some links can't be replace with a simple subsitution
    #In LinksToTools, these are the problem links

    my @problem_links = split(/\s+/, $pairs{problem_links});
    foreach (@problem_links) {
	$result =~ s/$_/$pairs{problem_links_base}$_/g;
    }
		
    my @home_dir_links = split(/\s+/, $pairs{home_dir});
    foreach (@home_dir_links) {
	my ($dot, $dir) = split(/\//, $_);
	$result =~ s/$_/$pairs{home_dir_base}$dir/g;
    
    }

    #Append the base to the links that starts with /
    #Did not put these in LinksToTools because other cases will be incorrectly
    #overwritten
    my $base = $pairs{'base'};
    my @rel_before = split(/\s+/,$pairs{'append_before_base'});
    my @rel_after = split(/\s+/,$pairs{'append_after_base'});
    

    foreach (@rel_before) {
	if ($result=~ m/$_/) {
	    my $abs = "$_$base";
	    $result=~ s/$_/$abs/g;
	}
    }
    
    foreach (@rel_after) {
	if ($result=~ m/$_/) {
	    $_=~ s/\"//g;
	    my $abs = "$base$_";
	    $result=~ s/$_/$abs/g;
	}
    }


}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3