[Bio] / FigWebServices / get_dlits.cgi Repository:
ViewVC logotype

View of /FigWebServices/get_dlits.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.13 - (download) (annotate)
Thu Jun 25 22:45:59 2009 UTC (10 years, 5 months ago) by overbeek
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2009_07_09, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.12: +0 -24 lines
get rid of one-time code

# -*- perl -*-
#
# Copyright (c) 2003-2008 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

#
#  DBMS tables used:
#
#     dlits
#     flds => "status char(1), md5_hash varchar(32), pubmed varchar(16), curator varchar(30), go_code varchar(15)"
#
#     titles
#     flds => "pubmed varchar(16), title varchar(1000)"
#
#     hash_role
#     flds => "md5_hash char(32), role varchar(1000)"
#
#     curr_role
#     flds => "curator varchar(30), role varchar(1000)"
#
#     genome_hash
#     flds => "genome varchar(32), md5_hash char(32)"
#

use FIG;
my $fig = new FIG;

use HTML;
use strict;

use CGI;
my $cgi = new CGI;

use Data::Dumper;

if (0)
{
    my $VAR1;
    eval(join("",`cat /tmp/get_dlit_parms`));
    $cgi = $VAR1;
#   print STDERR &Dumper($cgi);
}

if (0)
{
    print $cgi->header;
    my @params = $cgi->param;
    print "<pre>\n";
    foreach $_ (@params)
    {
        print "$_\t:",join(",",$cgi->param($_)),":\n";
    }

    if (0)
    {
        if (open(TMP,">/tmp/get_dlit_parms"))
        {
            print TMP &Dumper($cgi);
            close(TMP);
        }
    }
    exit;
}
my($genome);

my $html = [];
push @$html, "<TITLE>Get Dlits</TITLE>\n";

my $user      = $cgi->param('user');           #  Current user
my $curator   = $cgi->param('curator') || '';  #  Filter roles by subsystem curator
my $genomeD   = $cgi->param('genomeD');        #  Find lit by genome
my $ref_id    = $cgi->param('ref_id');         #  Show data by reference
my $role      = $cgi->param('role');           #  Find lit by role
my $show_just = $cgi->param('show_just');      #  Filter lit by status
   $show_just = ($show_just eq "all") ? '' : $show_just;

#  Requested actions:

my ( $submit1, $submit2, $submit3, $submit4, $submit5 );
if    ( $cgi->param( 'Process Changes' )          ) { $submit5 = 1 }
if    ( $ref_id                                   ) {}
elsif ( $cgi->param( 'Change subsystem curator' ) ) {}
elsif ( $cgi->param( 'Show Genomes' )             ) { $submit1 = 1 }
elsif ( $cgi->param( 'Show Roles' )               ) { $submit2 = 1 }
elsif ( $cgi->param( 'Show Genome' )              ) { $submit3 = 1 }
elsif ( $cgi->param( 'Show Role' )                ) { $submit4 = 1 }

my $done = 0;

my $rdbH  = $fig->db_handle;

if (! -d "$FIG_Config::data/Dlits")
{
    push( @$html, $cgi->h1("dlit data are not installed") );
    $done = 1;
}
elsif ( ! $user )
{
    push( @$html, $cgi->h2('To curate literature, please provide a user name') );
    push( @$html, $cgi->start_form(-action => "get_dlits.cgi", -method => 'post'),
                  'Username: ',
                  $cgi->textfield( -name=>"user", -size => 20 ),
                  $cgi->br,
                  $cgi->submit( 'Set user' ),
                  $cgi->end_form
        );
    $done = 1;
}


#  This is a one-time special case to add an index to the dlits table


#  Process a page of curated literature ----------------------------------------

if ( $submit5 && ! $done )
{
    &process_changes( $fig, $cgi, $html );
    my $prev_req = $cgi->param( 'prev_req' );
    $submit3 = 1 if $prev_req eq 'Show Genome';
    $submit4 = 1 if $prev_req eq 'Show Role';
}

#  If there is a problem, skip the rest of the action tests --------------------
#  This is organized this way so that a useful page can be
#  provided after processing a curated page.

if ( $done ) { }

#  All sequences in a reference ------------------------------------------------

elsif ( $ref_id )
{
    my $and_status = $show_just ? " AND ( dlits.status = '$show_just' )" : '';
    my $tuples = $rdbH->SQL( "SELECT DISTINCT status,md5_hash,curator "
                           . "FROM dlits "
                           . "WHERE ( pubmed = '$ref_id' )"
                           . $and_status
                           );
    my @to_display = ();
    foreach my $x ( @$tuples )
    {
        my( $status, $hash, $prev_curator ) = @$x;
        my @pegs = grep { $fig->is_real_feature($_) }
                   $fig->pegs_with_md5( $hash );
        if ( @pegs > 0 )
        {
            push( @to_display, [ $status, $pegs[0], $ref_id, $prev_curator ] );
        }
    }

    &display_set( $fig, $cgi, $html, \@to_display, "Genes linked to Reference: $ref_id", 'Show Reference' ) ;
}

#  Literature selected by role -------------------------------------------------

elsif ( $submit4 && $role )
{
    my $roleQ = quotemeta $role;
    my $and_status = $show_just ? " AND ( dlits.status = '$show_just' )" : '';
    my $tuples = $rdbH->SQL( "SELECT DISTINCT dlits.status,dlits.md5_hash,dlits.pubmed,dlits.curator "
                           . "FROM hash_role,dlits "
                           . "WHERE ( hash_role.role = '$roleQ') AND ( hash_role.md5_hash = dlits.md5_hash )"
                              . $and_status
                           );
    my @to_display = ();
    foreach my $x ( @$tuples )
    {
        my( $status, $hash, $pubmed, $prev_curator ) = @$x;
        my @pegs = grep { $fig->is_real_feature($_) }
                   $fig->pegs_with_md5( $hash );
        if ( @pegs > 0 )
        {
            push( @to_display, [ $status, $pegs[0], $pubmed, $prev_curator ] );
        }
    }

    &display_set( $fig, $cgi, $html, \@to_display, "Genes for Role: $role", 'Show Role' );
}

#  Show the role list ----------------------------------------------------------

elsif ( $submit2 || $submit4 )
{
    #  Did user request role literature, and not select a role?
    push @$html, $cgi->h3( '<FONT Color=red>Please select a role.</FONT>' ) if $submit4;

    my $where1 = $show_just ? " AND (dlits.status = '$show_just')" : "";
    my($where2, $from_list);

    $from_list = "hash_role,dlits";
    if ($curator)
    {
	$where2 =  " AND (curr_role.curator = '$curator' AND curr_role.role = hash_role.role)";
	$from_list .= ",curr_role";
    }
    
    my @roles = sort { lc $a cmp lc $b }  #  Make sort case insensitive
                map { $_->[0] }
                @{ $rdbH->SQL( "SELECT DISTINCT hash_role.role "
                             . "FROM $from_list "
                             . "WHERE hash_role.md5_hash = dlits.md5_hash $where1 $where2"
                             ) };
    push( @$html, $cgi->start_form(-action => "get_dlits.cgi", -method => 'post'),
                  $cgi->hidden( -name => 'user',     -value => $user ),
                  $cgi->hidden( -name => 'curator',  -value => $curator )
        );

    my $whom   = $curator ? "'$curator'" : "any one";
    my $status = $show_just ? " with literature of status code '$show_just':" : ":";
    push( @$html, $cgi->h3( "Subsystem roles curated by $whom" . $status ) );

    push( @$html, $cgi->scrolling_list( -name   => 'role',
                                        -values => [@roles],
                                        -size   => 30
                                      ),
                  $cgi->br, &show_just_selector( $cgi ),
                  $cgi->br, $cgi->submit( 'Show Role' ),
                  $cgi->br, $cgi->submit( 'Show Genomes' ),
                  $cgi->br, $cgi->submit( 'Change subsystem curator' ), " currently '$curator'.",
                  $cgi->end_form
         );
}

#  Literature selected by genome -----------------------------------------------

elsif ( $submit3 && $genomeD && ( $genomeD =~ /\((\d+\.\d+)\)$/ ) )
{
    my $genome = $1;
    my $where  = $show_just ? " AND (dlits.status = '$show_just')" : '';
    my $tuples = $rdbH->SQL( "SELECT DISTINCT dlits.status,dlits.md5_hash,dlits.pubmed,dlits.curator "
                           . "FROM genome_hash,dlits "
                           . "WHERE genome_hash.genome = '$genome' AND genome_hash.md5_hash = dlits.md5_hash $where"
                           );
    my @to_display = ();
    foreach my $x ( @$tuples )
    {
        my ( $status, $hash, $pubmed, $prev_curator ) = @$x;
        push @to_display, map  { [ $status, $_, $pubmed, $prev_curator ] }
                          grep { &FIG::genome_of($_) eq $genome && $fig->is_real_feature($_) }
                          $fig->pegs_with_md5($hash);
    }

    &display_set( $fig, $cgi, $html, \@to_display, "Genes for $genomeD", 'Show Genome' );
}

#  Show genome list ------------------------------------------------------------

elsif ( $submit1 || $submit3 )
{
    #  Did user request genome literature, and not select a genome?
    push @$html, $cgi->h3( '<FONT Color=red>Please select a genome.</FONT>' ) if $submit3;

    my $where   = $show_just ? " AND (dlits.status = '$show_just')" : "";

    my $genomes = $rdbH->SQL( "SELECT DISTINCT genome_hash.genome "
                            . "FROM genome_hash,dlits "
                            . "WHERE genome_hash.md5_hash = dlits.md5_hash $where"
                            );
    my @genomes = sort { lc $a cmp lc $b }  #  Make sort case insensitive
                  map { &compute_genome_label( $fig, $_->[0] ) }
                  @$genomes;

    push( @$html, $cgi->start_form(-action => "get_dlits.cgi", -method => 'post'),
                  $cgi->hidden( -name => 'user',     -value => $user ),
                  $cgi->hidden( -name => 'curator',  -value => $curator ),
                  $cgi->scrolling_list( -name   => 'genomeD',
                                        -values => [@genomes],
                                        -size   => 30
                                      ),
                  $cgi->br, &show_just_selector( $cgi ),
                  $cgi->br, $cgi->submit( 'Show Genome' ), ' selected above.',
                  $cgi->br, $cgi->submit( 'Show Roles' ),
                  ( $curator ? " for subsystem curator '$curator'." : '' ),
                  $cgi->br, $cgi->submit( 'Change subsystem curator' ), " currently '$curator'.",
                  $cgi->end_form
         );
}

#  Default = display subsystem curators ----------------------------------------

else
{
    my @cur  = map  { $_->[0] }
               sort { $a->[1] cmp $b->[1] || length $a->[0] <=> length $b->[0] }
               map  { my $cur = $_->[0];
                      my $nam = lc $cur;
                      $nam =~ s/^master[:_]?//;
                      [ $cur, $nam ]
                    }
               @{ $rdbH->SQL( "SELECT DISTINCT curator FROM dlits" ) };
    my $curN = @cur;

    push( @$html, $cgi->start_form(-action => "get_dlits.cgi", -method => 'post'),
                  $cgi->hidden( -name => 'user',     -value => $user ),
                  $cgi->scrolling_list( -name   => 'curator',
                                        -values => [ @cur ],
                                        -size   => $curN
                                      ),
                  $cgi->br,
                  &show_just_selector( $cgi ),
                  $cgi->submit( 'Show Roles' ),
                  $cgi->br,
                  $cgi->submit( 'Show Genomes' ),
                  $cgi->end_form
         );
}
&HTML::show_page($cgi,$html);
exit;


sub show_just_selector
{
    my ( $cgi, $html ) = @_;
    my $default = $cgi->param( 'show_just' );
    $default =~ /^[ A-Z]$/ or $default = 'all';
    return 'Show literature links with all status codes, or pick a specific one: '
          . $cgi->scrolling_list( -name     => 'show_just',
                                  -values   => ['all',' ','D','R','N','G'],
                                  -default  => $default,
                                  -override => 1,
                                  -size     => 1
                                )
          . $cgi->br;
}


sub compute_genome_label
{
    my($fig, $org) = @_;

    my $gs = $fig->genus_species($org);
    return "$gs ($org)";
}


sub title_of {
    my( $fig, $pubmed ) = @_;

    my $rdbH  = $fig->db_handle;
    my $retval = $rdbH->SQL( "SELECT title "
                           . "FROM pubmed_titles "
                           . "WHERE (pubmed = $pubmed)"
                           );
    return ( @$retval > 0 ) ? $retval->[0]->[0] : "";
}


sub pubmed_link
{
    return "<a target=_blank href=http://www.ncbi.nlm.nih.gov/sites/entrez?db=pubmed&cmd=search&term=$_[0]>$_[0]</a>";
}


sub display_set {
    my( $fig, $cgi, $html, $to_display, $tab_title, $submit ) = @_;

    my %status_code = ( 'D' => 1, ' ' => 2, 'N' => 3, 'R' => 4, 'G' => 5 );

    #  @$to_distplay items are [ $status, $peg, $pubmed ]

    foreach $_ ( @$to_display ) { $_->[0] =~ s/^\s*$/ /; }  # fix empty status strings
    my @tuples = sort { ( $status_code{$a->[0]} <=> $status_code{$b->[0]} )
                     or &FIG::by_fig_id( $a->[1], $b->[1] )
                     or ( $a->[2] <=> $b->[2] )  # Numeric sort of PMID
                      }
                 @$to_display;
    my $total_tuples = @tuples;

    my $from;                    #  Offset to first item displayed
    my $n_per_page = 100;        #  Really the lines per page

    if ( $cgi->param( 'Go to item numbers' ) )
    {
        $from = $cgi->param( 'offset_select' );
    }
    else
    {
        $from  = $cgi->param( 'from_line' ) || 0;
        $from -= $n_per_page if $cgi->param( 'Previous page' );
        $from += $n_per_page if $cgi->param( 'Next page' );
    }

    #  Cut down the array to that to be displayed

    if ( $total_tuples > 0 )
    {
        if ( $from < 0 ) { $from = 0 }
        if ( $from > $total_tuples )
        {
            $from = $n_per_page * int( ( $total_tuples - 1 ) / $n_per_page );
        }
        splice( @tuples, 0, $from );
        splice( @tuples, $n_per_page );
    }
    else
    {
        undef $from;
    }

    # Add status code key at top of page

    &desc( $html );

    # Build the html form

    push( @$html, $cgi->start_form(-action => "get_dlits.cgi", -method => 'post') );

    #  Current user
    my $user = $cgi->param('user');
    push( @$html, $cgi->hidden( -name => 'user', -value => $user, -override => 1 ) );

    #  Filter roles by subsystem curator
    my $curator = $cgi->param('curator');
    push( @$html, $cgi->hidden( -name => 'curator', -value => $curator, -override => 1 ) ) if $curator;

    #  Find lit by genome
    my $genomeD = $cgi->param('genomeD');
    push( @$html, $cgi->hidden( -name => 'genomeD', -value => $genomeD, -override => 1 ) ) if $genomeD;

    #  Show sequences associated with a reference
    my $ref_id = $cgi->param('ref_id');
    push( @$html, $cgi->hidden( -name => 'ref_id', -value => $ref_id, -override => 1 ) ) if $ref_id;

    #  Find lit by role
    my $role = $cgi->param('role');
    push( @$html, $cgi->hidden( -name => 'role', -value => $role, -override => 1 ) ) if $role;

    #  Requested action
    push( @$html, $cgi->hidden( -name => 'prev_req', -value => $submit, -override => 1 ) ) if $submit;

    #  Used to inherit action
    push( @$html, $cgi->hidden( -name => $submit, -value => 1, -override => 1 ) ) if $submit;

    push( @$html, $cgi->br );

    #  If this is not a table built around one reference, we want to append
    #  links to all proteins associated with the reference.  We will
    #  compute this on a unique set of the references:

    my %genes_in_pub;
    if ( ! $ref_id )
    {
        foreach ( @tuples )
        {
            my $pub = $_->[2];
            next if defined $genes_in_pub{ $pub };
            #  This count intentionally omits filtering by status
            $genes_in_pub{ $pub } = scalar ( &all_seqs_in_pub( $fig, $pub ) );
        }
    }

    #  Build the status selection table

    my $col_hdrs = [' ','G','N','R','D','Curator','PEG','Function','Genus/Species','PubMed','Title'];
    my $tab = [];
    my $i;
    for ( $i = 0; ($i < @tuples); $i++ )
    {
        my $tuple = $tuples[$i];
        my( $status, $peg, $pubmed, $prev_curator ) = @$tuple;
        next if ( ! $fig->is_real_feature($peg) );   # This should never fail

        $prev_curator =~ s/^master://i;  #  Remove master from displayed curator
        my $gs = $fig->genus_species( &FIG::genome_of( $peg ) );
        $gs =~ s/^(\S+\s+\S+).*$/$1/;
        my $title = &title_of( $fig, $pubmed );
        my $func  = $fig->function_of($peg);
        my @codes = $cgi->radio_group( -name     => "tuple:$peg:$pubmed:$status",
                                       -values   => [' ','G','N','R','D'],
                                       -default  => "$status",
                                       -nolabels => 1
                                     );

        my $pub_link = &pubmed_link( $pubmed );

        #  Does this paper cover more than one protein?

        if ( $genes_in_pub{ $pubmed } > 1 )
        {
            #  This link intentionally omits $show_just
            my $href = "get_dlits.cgi?user=$user&ref_id=$pubmed";
            $pub_link .= "<BR />(<A HRef='$href' Target=_blank>$genes_in_pub{$pubmed} seqs</A>)";
        }
        if ( $i && ($i % 15) == 0 ) { push(@$tab,$col_hdrs) }
        push( @$tab, [ @codes, $prev_curator, &HTML::fid_link( $cgi, $peg ), $func, $gs, $pub_link, $title ] );
    }

    push( @$html,&HTML::make_table( $col_hdrs, $tab, $tab_title ) );

    push( @$html, $cgi->br, &show_just_selector( $cgi ) );

    #  Navigation controls through long lists

    if ( defined( $from ) && ( $total_tuples > $n_per_page ) )
    {
        my $i1 = $from + 1;
        my $i2 = $from + $n_per_page;
        $i2 = $total_tuples if $i2 > $total_tuples;
        push( @$html, $cgi->hidden( -name => 'from_line', -value => $from, -override => 1 ) );
        push( @$html, $cgi->br, "Currently displaying items $i1 - $i2 of $total_tuples.", $cgi->br );
        push( @$html, $cgi->submit( 'Previous page' ) ) if $from > 0;
        push( @$html, $cgi->submit( 'Next page' ) ) if $i2 < $total_tuples;

        my @offsets;
        my %labels;
        for ( my $i = 0; $i < $total_tuples; $i += $n_per_page )
        {
            push @offsets, $i;
            my $imax = $i + $n_per_page;
            $imax = $total_tuples if $imax > $total_tuples;
            $labels{ $i } = ( $i+1 ) . " - $imax";
        }
        push( @$html, $cgi->submit( 'Go to item numbers' ),
                      $cgi->scrolling_list( -name     => 'offset_select',
                                            -values   => \@offsets,
                                            -labels   => \%labels,
                                            -default  => $from,
                                            -size     => 1,
                                            -override => 1
                                          ),
                      $cgi->br );
    }

    #  Some action buttons:

    push( @$html, $cgi->br, $cgi->submit( 'Process Changes' ), "entered on this page." );

    #  Display some general navigation buttons.  However, these are not
    #  consistent with the intended use of the publication-based view.

    if ( ! $ref_id )
    {
        push( @$html, $cgi->br, $cgi->submit( 'Show Genomes' ), "discarding any changes made on this page." );
        if ( $curator )
        {
            push( @$html, $cgi->br, $cgi->submit( 'Show Roles' ),
                                    "for subsystem curator '$curator', discarding any changes made on this page." );
        }
        push( @$html, $cgi->br, $cgi->submit( 'Change subsystem curator' ), " currently '$curator', discarding any changes made on this page." );
    }

    push( @$html, $cgi->end_form );
}


#     dlits
#     flds => "status char(1), md5_hash varchar(32), pubmed varchar(16), curator varchar(30), go_code varchar(15)"

sub all_seqs_in_pub {
    my ( $fig, $pubmed, $status ) = @_;
    my $rdbH = $fig->db_handle;
    my $and_status = $status ? " AND ( status = '$status' )" : "";
    my $response = $rdbH->SQL( "SELECT DISTINCT md5_hash "
                             . "FROM dlits "
                             . "WHERE ( pubmed = '$pubmed' )"
                             . $and_status
                             );
    return ( ref $response ) ? map { $_->[0] } @$response : ();
}


sub desc {
    my( $html ) = @_;

    my $col_hdrs = [ "Code", "Meaning" ];
    my $tab      = [ [ [" ","TD Align=center"], "No one has curated this link yet" ],
                     [ ["G","TD Align=center"], "Genome paper - marks all uncurated protein links of this paper to 'G' (implicitly irrelevant)" ],
                     [ ["N","TD Align=center"], "Not relevant to this protein" ],
                     [ ["R","TD Align=center"], "Relevant, but not strong enough to determine function" ],
                     [ ["D","TD Align=center"], "Direct reference that can be used to support function assertion" ]
                   ];
    push( @$html, &HTML::make_table( $col_hdrs, $tab, "Code Meanings" ), "<hr>" );
}


sub process_changes {
    my( $fig, $cgi, $html ) = @_;
    my $user = $cgi->param('user');

    my @tuples = grep { $_->[2] ne $_->[3] } 
                 map { ($_ =~ /^tuple:(fig\|\d+\.\d+\.peg\.\d+)\s*:\s*(\d+)\s*:([ RDGN])/) ? [$1,$2,$3,$cgi->param($_)] : () }
                 $cgi->param();
    my $n_change = 0;
    foreach my $tuple ( @tuples )
    {
        my( $peg, $pubmed, $from, $to ) = @$tuple;
        $fig->add_dlit( -status   => $to,
                        -peg      => $peg,
                        -pubmed   => $pubmed,
                        -curator  => $user,
                        -override => 1
                      );
        $n_change++;
    }

    push( @$html, $cgi->h2( "<FONT Color=green>Made $n_change requested changes.</FONT>" ) );
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3