[Bio] / Sprout / rebuild_dlit_titles.pl Repository:
ViewVC logotype

View of /Sprout/rebuild_dlit_titles.pl

Parent Directory Parent Directory | Revision Log Revision Log

Revision 1.1 - (download) (as text) (annotate)
Mon Jan 16 20:07:07 2012 UTC (7 years, 1 month ago) by parrello
Branch: MAIN
CVS Tags: rast_rel_2014_0729, mgrast_version_3_2, rast_rel_2014_0912, HEAD
More KBase reorg.

#!/usr/bin/perl -w

=head1 Refresh Title Database

This script gets a list of all the direct literature references in the
SEED and refreshes the title file with the title of each reference not
currently in the title file. It will also optionally move any literature 
references on features to their associated proteins.

The currently-supported command-line options are as follows.

=over 4

=item fixFeatures

If specified, then literature references on features will be moved
to the associated proteins.

=item user

Name suffix to be used for log files. If omitted, the PID is used.

=item trace

Numeric trace level. A higher trace level causes more messages to appear. The
default trace level is 2. Tracing will be directly to the standard output
as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory,
where I<User> is the value of the B<user> option above.

=item sql

If specified, turns on tracing of SQL activity.

=item background

Save the standard and error output to files. The files will be created
in the FIG temporary directory and will be named C<err>I<User>C<.log> and
C<out>I<User>C<.log>, respectively, where I<User> is the value of the
B<user> option above.

=item h

Display this command's parameters and options.



use strict;
use Tracer;
use FIG;
use SeedUtils;
use CustomAttributes;
use Tracer;

# Get the command-line options and parameters.
my ($options, @parameters) = StandardSetup([], { }, "<count>", @ARGV);
# Create the statistics object.
my $stats = Stats->new();
# Get the attribute database.
## HACK #######
$FIG_Config::attrHost = "localhost";
$FIG_Config::attrPort = 3333;
$FIG_Config::attrDBD ="c:/Users/Bruce/FIG/FIG/Sprout/AttributesDBD.xml";
my $attrDB = CustomAttributes->new(user => 'seed');
# Get a hash of the titles currently in the SEED. This hash maps
# each PUBMED ID to its title.
## HACK my $fig = FIG->new();
my %titles; ##HACK  = map { $_->[0] => $_->[1] } @{$fig->all_titles()};
# This hash will contain DLITs that need to be moved to proteins.
# The PUBMED ID of Each DLIT that needs to be moved will map to a sub-hash 
# of all the features to which it currently attached.
my %movers;
# Get a query that will return all the DLITs.
my $dlitQ = $attrDB->Get("IsEvidencedBy", "IsEvidencedBy(value) LIKE ?",
# This will contain all the PUBMED IDs for titles we don't have.
my %needed;
Trace("Reading DLITs.") if T(2);
# Loop through the DLITs.
while (my $dlit = $dlitQ->Fetch()) {
    $stats->Add(dlitFound => 1);
    # Get the object ID and value.
    my $objectID = $dlit->PrimaryValue('to-link');
    my $value = $dlit->PrimaryValue('value');
    # Parse out the pubmed ID.
    if ($value =~ /^dlit\((\d+)/) {
        my $pubmedID = $1;
        # Determine what object the DLIT is attached to.
        if ($objectID =~ /:(.+)/) {
            $objectID = $1;
        if (substr($objectID, 0, 3) eq 'fig') {
            # Here we are attached to a feature. Save this fact in the
            # movement hash.
            $movers{$pubmedID}{$objectID} = 1;
            $stats->Add(featureDlitFound => 1);
        # Does this DLIT have a title?
        if ($titles{$pubmedID}) {
            # Yes, we keep going.
            $stats->Add(titleFound => 1);
        } else {
            $stats->Add(titleNeeded => 1);
            # Insure we know we need this title.
            if (! $needed{$pubmedID}) {
                $stats->Add(newTitleRequest => 1);
                $needed{$pubmedID} = 1;
Trace("Statistics for run:\n" . $stats->Show()) if T(2);

# http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&id=6678417,9507199,28558982,28558984,28558988,28558990

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3