[Bio] / DrugTargets / gen_dt_table.pl Repository:
ViewVC logotype

View of /DrugTargets/gen_dt_table.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (download) (as text) (annotate)
Fri Aug 26 16:58:56 2005 UTC (14 years, 3 months ago) by fangfang
Branch: MAIN
CVS Tags: HEAD
Changes since 1.7: +10 -7 lines
Added 'use strict;' and modified codes accordingly.

use strict;

my $usage = "gen_dt_table [options] gene_list_file1 gene_list_file2 ...\n\n" .
    "       options: [-i]                 Ignore comments in gene_list_file. \n" .
    "                [-a]                 Output rows with no peg ID hit as well. \n" .
    "                [-s]                 Output single hit for one search key. \n" .
    "                [-html]              Output as an HTML file. \n" .
    "                [-htmltable]         Output an HTML table (to be embedded in an HTML file).\n" .
    "                [-nolink]            Output in plain text, without embedded html links.\n" .
    "                [-noheader]          Output the table without table header.\n\n" .
    "                [-orgs=genome_ids]   Specify one or more organisms (comma-delimited).\n" .
    "                                     Eg. -orgs=216600.1,170187.1,171101.1 \n".
    "                [-cat=gene_catetory] Eg. -cat=essential_by_Thanassi \n" .
    "                [-tag=gene_tag]      Eg. -tag='SP0435, conserved essential'\n" .
    "                [-url=reference_url] \n\n" .
    "       The latter 4 options can be specified in gene_list_file as comments well. Eg. ,\n" .
    "                #orgs=158879.1\n" .
    "                #url=http://www.ncbi.nlm.nih.gov/...\n" .
    "                SA0473\n" .
    "                IleS\n" .
    "                ...\n\n" .
    "       The general format for the content of text file is \n" .
    "                search_key[, tag]\t[category]\t[url]\t[peg_id (to override search_key)]\n\n"; 

my ($ignore_comment, $include_all, $single_hit, $table_only, $webpage, $no_link, $no_header, $def_org, $def_cat, $def_tag, $def_url, $trouble, @infiles, $org);


for (@ARGV) {
    if (/^-/) {
	(/-i$/)         && ($ignore_comment = 1, next);
	(/-a$/)         && ($include_all = 1, next);
	(/-s$/)         && ($single_hit = 1, next);
	(/-htmltable/i) && ($table_only = 1);
	(/-html/i)      && ($webpage = 1, next);
	(/-nolink/i)    && ($no_link = 1, next);
	(/-noheader/i)  && ($no_header = 1, next);
	(/-orgs=(\S+)/i)&& ($def_org = $1, next);
	(/-cat=(.*)/i)  && ($def_cat = $1, next);
	(/-tag=(.*)/i)  && ($def_tag = $1, next);
	(/-url=(\S+)/i) && ($def_url = $1, next);
	$trouble = 1;
	print STDERR "Invalid flag $_.\n";
    } else {
	push(@infiles, $_);
    }
}
die "Usage: $usage" if ($trouble || @infiles == 0);

use FIG;
use CGI;
use FIG_Config;
my $fig = new FIG;
my $cgi = new CGI;
$| = 1;

if ($webpage || !$no_header) {
    my @header = ('Category', 'Gene Name', 'Gene Id', 'PEG ID', 'PEG SeqLen', 'GenBank ID', 'UniProt ID', 'Functional Role', 'Conservation of Seqs', 'PDB (bound)', 'e-Value (bound)', 'PDB (free)', 'e-Value', 'PDB Title', 'PDB SeqLen', 'ProtDist', 'PASS ASPs', 'PASS Weight of Largest Pocket', 'PDB Ligand CLiBE', 'Total Energy', 'Van der Waals Interaction', 'Hydrogen Bond', 'Electrostatic Interaction', 'Solvation Energy');
    if ($webpage) {
	if ($table_only) {
	    print gen_html("start_table");
	} else {
	    print gen_html("start_html", ("Drug Targets - ") . join(" ", @infiles));
	}
	print gen_html("header", @header); 
    } else { 
	print join("\t", @header). "\n"; 
    } 
} 

for my $fin (@infiles) {
    open(FIN, $fin) || (print(STDERR "'$fin': Couldn't open file. Skipped...\n\n"), next);

    while (<FIN>) {
	chomp;
	if (/^\#/) {
	    next if ($ignore_comment);
	    (/orgs=(.*\S)/i) && ($def_org = $1, $def_cat = $def_tag = $def_url = undef);
	    (/cat=(.*\S)/i)  && ($def_cat = $1);
	    (/tag=(.*\S)/i)  && ($def_tag = $1);
	    (/url=(\S+)/i)   && ($def_url = $1);
	    next;
	}
	$def_url = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=Abstract&list_uids=$def_url" unless ($def_url =~ /http/);

	$org = $def_org;
	my $cat = $def_cat;
	my $tag = $def_tag;
	my $url = $def_url;

	my @cols = split(/\t/);

	if (my $gene = $cols[0]) {
	    if ($gene =~ /(\S+)?\s*\,\s*(.*\S)/) {
		$gene = $1;
		$tag = $2;
	    } else {
		$gene =~ /(\S+)/;
		$gene = $1;
	    }
	    $gene =~ s/fig\|//;
	    next unless ($gene || $cols[3]);

	    #print "gene: '$gene'\t tag: '$tag'\n";
	    
	    # peg, url, cat
	    my @pegs;
	    if ($cols[3]) {
		$cols[3] =~ /(\d+\.\d+\.peg\.\d+)/;
		@pegs = ("fig|$1");
	    } else {
		if ($org) {
		    @pegs = get_pegs($org, $gene);
		} else {
		    print(STDERR "'$gene': No genome specified. Skipped...\n\n");
		    next unless ($include_all);
		}
	    }

	    for my $peg (@pegs) {
		($fig->function_of($peg)) || ($peg = undef); 
		unless ($include_all) {next unless $peg};

		my $peg_u = "<a href=http://theseed.uchicago.edu/FIG/protein.cgi?prot=$peg&user= target=_blank>$peg</a>" if ($peg);
		$gene = $tag ? "$gene, $tag" : $gene;
		$tag = 0;
		$cat = $cols[1] ? $cols[1] : $cat;
		$url = $cols[2] ? $cols[2] : $url;
		my $gene_u = "<a href=$url>$gene</a>" if ($gene);
		
		# funtion role
		my $role = $fig->function_of($peg);
		
		# geneId
		my @aliases = $fig->feature_aliases($peg);
		my @genes = grep {$_ !~ /.*(\||\_|\:).*/ } @aliases;
		my $geneId = join (",", @genes);

		# Genebank ID
		my @gids = grep {/.*gi.*/} @aliases;
		my $gid = join(", ", @gids);
		my @gids_u = map {/gi\|(\S+)/ } @gids;	    
		map {$_ = "<a href=http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?db=protein&val=$_ target=_blank>gi|$_</a>"} @gids_u;
		my $gid_u = join(", ", @gids_u); 
		
		# uniprot ID
		my @uniIds = grep {/.*uni.*/} @aliases; 
		my $uniId = join(", ", @uniIds);
		my @uniIds_u = map {/uni\|(\S+)/} @uniIds;
		map {$_ = "<a href=http://www.pir.uniprot.org/cgi-bin/upEntry?id=$_ target=_blank>uni|$_</a>"} @uniIds_u;
		my $uniId_u = join(", ", @uniIds_u);

		# conservation of sequences
		my $cons = get_cons($peg);
		
		# best pdb bound
		my ($pdb_bound, $escore_bound, $pdb_bound_url) = get_pdb($peg, "bound") if ($peg);
		my $pdb_bound_u = "<a href=$pdb_bound_url>$pdb_bound</a>" if ($pdb_bound);

		# best pdb free
		my ($pdb, $escore, $pdb_url, $pdb_title) = get_pdb($peg, "free") if ($peg);
		my $pdb_u = "<a href=$pdb_url>$pdb</a>" if ($pdb);

		# prodist 
		my ($peg_len, $pdb_len, $dist) = get_dist($peg, $pdb) if ($pdb);
		my $dist_u = "<a href=peg_pdb_align.cgi?peg=$peg&pdb=$pdb>$dist</a>" if ($pdb);
		my $peg_len = $fig->translation_length($peg);

		# pass info for pdb free
		my ($pass, $pass_maxwgt, $pass_gif) = get_pass($pdb) if ($pdb);
		my $pass_u = "<a href=$pass_gif>$pass</a>" if ($pass && $pass_gif);
		($pass = $pass_u = " 0 ") unless ($pass && $pass_gif);
		$pass_maxwgt = " 0 " unless $pass_maxwgt;

		# CLiBE
		my ($clibe, $clibe_url, $tote, $vdwi, $hbond, $ei, $sole) = get_clibe($pdb) if ($pdb);
		my $clibe_u = "<a href=$clibe_url>$clibe</a>" if ($clibe);
		
		my @row;
		if ($no_link) {
		    @row = ($cat, $gene, $geneId, $peg, $peg_len, $gid, $uniId, $role, $cons, $pdb_bound, $escore_bound, $pdb, $escore, $pdb_title, $pdb_len, $dist, $pass, $pass_maxwgt, $clibe, $tote, $vdwi, $hbond, $ei, $sole);
		} else {
		    @row = ($cat, $gene_u, $geneId, $peg_u, $peg_len, $gid_u, $uniId_u, $role, $cons, $pdb_bound_u, $escore_bound, $pdb_u, $escore, $pdb_title, $pdb_len, $dist_u, $pass_u, $pass_maxwgt, $clibe_u, $tote, $vdwi, $hbond, $ei, $sole);
		}
		if ($webpage) {
		    print gen_html("row", @row);
		} else {
		    print join("\t", @row) . "\n"; 
		}
	    }
	}
    }
    
    close(FIN);
}

if ($webpage) {
    if ($table_only) {print gen_html("end_table") }
    else {print gen_html("end_html")}
}

#-------------------------------------------
# Sub Routines
#-------------------------------------------

sub get_pegs {
    my ($org, $gene) = @_;
    #print "Searching: '$gene' ...\n";
    my ($pegs_index_data) = $fig->search_index("'$gene'");

    my @orgs = split(/[^0-9.]/, $org);

    my @pegs = ();
    for my $g (@orgs) {
	my @hits = grep {/$g/} map {$_->[0]} @$pegs_index_data;
	#if (@pegs > 1) {print STDERR "get_pegs: more than one hits for '$gene' in '$g'.\n";}
	if ($single_hit && $hits[0]) {@pegs = ($hits[0]); last};
	@pegs = (@pegs, @hits);
    }
    return @pegs;
}

sub get_pdb {
    my ($peg, $domain) = @_;
    my ($pdb, $escore, $pdb_url, $pdb_title) = ();
    my $best_pdb = "$FIG_Config::bin/best_pdb";

    $best_pdb = "perl best_pdb.pl" unless (-e $best_pdb);
    ($domain =~ /bound/) && ($domain = "-bound");
    
    ($pdb, $escore, $pdb_url, $pdb_title) = split(/\t/, `$best_pdb $domain '$peg'`);
    chomp($pdb_title);

    return ($pdb, $escore, $pdb_url, $pdb_title);
}

sub get_pass {
    my ($pdb) = @_;
    my ($pass, $pass_maxwgt, $pass_gif) = ();
    my $pass_pdb = "$FIG_Config::bin/pass_pdb";
    my $pdb2passinfo = "$FIG_Config::bin/pdb2passinfo";
    
    $pdb2passinfo = "perl pdb2passinfo.pl" unless (-e $pdb2passinfo);
    $pass_pdb = "perl pass_pdb.pl" unless (-e $pass_pdb);
    system "$pass_pdb $pdb";

    my $rv = `$pdb2passinfo $pdb`;
    chomp($rv);
    ($pass, $pass_maxwgt, $pass_gif) = split(/\t/, $rv);

    return ($pass, $pass_maxwgt, $pass_gif);
}

sub get_dist {
    my ($peg, $pdb) = @_;
    my ($peg_len, $pdb_len, $dist) = ();
    my $peg_pdb_dist = "$FIG_Config::bin/peg_pdb_dist";
    
    $peg_pdb_dist = "perl peg_pdb_dist.pl" unless (-e $peg_pdb_dist);

    ($peg_len, $pdb_len, $dist) = split(/\t/, `$peg_pdb_dist -len '$peg' $pdb`);
    chomp($dist);

    return ($peg_len, $pdb_len, $dist);
}

sub get_cons {
    my ($peg) = @_;
    my $get_homo = "$FIG_Config::bin/get_homologs";
    my $aln_cons = "$FIG_Config::bin/aln_conservation";

    $get_homo = "perl get_homologs.pl" unless (-e $get_homo);
    $aln_cons = "perl aln_conservation.pl" unless (-e $aln_cons);
    
    my $cons = `$get_homo -cons '$peg' | $aln_cons`;
    chomp($cons);

    return $cons;
}

sub get_clibe {
    my ($pdb) = @_;
    my $clibe_db = "$FIG_Config::fig/var/DrugTargets/pdb_ligand_clibe_attribute_detailed.txt"; 
    $clibe_db = "pdb_ligand_clibe_attribute_detailed.txt" unless (-e $clibe_db); 
    my $pdb_key =  ($pdb =~/(\d\w{3})/, $1); 
 
    my $attr = `grep $pdb_key $clibe_db|head -n1`; 
    chomp($attr); 
 
    my $url; 
    (undef, undef, $attr, $url) = split(/\t/, $attr); 

    if ($attr =~ /(.*),TotE(.*),VDWI(.*),HBond(.*),EI(.*),SolE(.*)/) { 
        return ($1, $url, $2, $3, $4, $5, $6); 
    } else { 
        return (); 
    } 
}

sub gen_html {
    my ($part, @content) = @_;
    my @html;
    if ($part =~ /start_html/i) {
	push @html, "<html>";
	push @html, $cgi->head($cgi->title($content[0]));
	push @html, $cgi->start_body();
	push @html, "<table ID=DrugTargets BORDER=1 width=80%>";
    } elsif ($part =~ /end_html/i) {
	push @html, "</table>";
	push @html, $cgi->end_body();
	push @html, "</html>";
    } elsif ($part =~ /start_table/i) {
	push @html, "<table ID=DrugTargets BORDER=1 width=80%>";
    } elsif ($part =~ /end_table/i) {
	push @html, "</table>";
    } else {
	my @cols;
	my $s = ($part =~ /header/i) ? "th" : "td";
	push @html, "<tr>";
	for (my $i=0; $i<24; $i++) {$cols[$i] = ($content[$i]) ? $content[$i] : "N/A"};	
	map {$_ = "<$s>$_<\/$s>"} @cols;
	push @html, join(" ", @cols);
	push @html, "</tr>";
    }
    return join("\n", @html) . "\n";
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3