80 |
Either the name of the file containing the list of trusted subsystems or a reference |
Either the name of the file containing the list of trusted subsystems or a reference |
81 |
to a list of subsystem names. If nothing is specified, all NMPDR subsystems will be |
to a list of subsystem names. If nothing is specified, all NMPDR subsystems will be |
82 |
considered trusted. (A subsystem is considered NMPDR if it has a file named C<NMPDR> |
considered trusted. (A subsystem is considered NMPDR if it has a file named C<NMPDR> |
83 |
in its data directory.) Only subsystem data related to the trusted subsystems is loaded. |
in its data directory.) Only subsystem data related to the NMPDR subsystems is loaded. |
84 |
|
|
85 |
=item options |
=item options |
86 |
|
|
138 |
if (! defined $subsysFile || $subsysFile eq '') { |
if (! defined $subsysFile || $subsysFile eq '') { |
139 |
# Here we want all the usable subsystems. First we get the whole list. |
# Here we want all the usable subsystems. First we get the whole list. |
140 |
my @subs = $fig->all_subsystems(); |
my @subs = $fig->all_subsystems(); |
141 |
# Loop through, checking for usability. |
# Loop through, checking for the NMPDR file. |
142 |
for my $sub (@subs) { |
for my $sub (@subs) { |
143 |
if ($fig->usable_subsystem($sub)) { |
if ($fig->nmpdr_subsystem($sub)) { |
144 |
$subsystems{$sub} = 1; |
$subsystems{$sub} = 1; |
145 |
} |
} |
146 |
} |
} |
168 |
my $name = $subsystem; |
my $name = $subsystem; |
169 |
$name =~ s/_/ /g; |
$name =~ s/_/ /g; |
170 |
my $classes = $fig->subsystem_classification($subsystem); |
my $classes = $fig->subsystem_classification($subsystem); |
171 |
my @classList = map { " $_" } @{$classes}; |
$name .= " " . join(" ", @{$classes}); |
|
$name .= join("", @classList); |
|
172 |
$subsystems{$subsystem} = $name; |
$subsystems{$subsystem} = $name; |
173 |
} |
} |
174 |
} |
} |
469 |
IsLocatedIn |
IsLocatedIn |
470 |
HasFeature |
HasFeature |
471 |
HasRoleInSubsystem |
HasRoleInSubsystem |
472 |
|
FeatureEssential |
473 |
|
FeatureVirulent |
474 |
|
FeatureIEDB |
475 |
|
|
476 |
=over 4 |
=over 4 |
477 |
|
|
500 |
my $loadFeatureUpstream = $self->_TableLoader('FeatureUpstream'); |
my $loadFeatureUpstream = $self->_TableLoader('FeatureUpstream'); |
501 |
my $loadHasFeature = $self->_TableLoader('HasFeature', $self->PrimaryOnly); |
my $loadHasFeature = $self->_TableLoader('HasFeature', $self->PrimaryOnly); |
502 |
my $loadHasRoleInSubsystem = $self->_TableLoader('HasRoleInSubsystem', $self->PrimaryOnly); |
my $loadHasRoleInSubsystem = $self->_TableLoader('HasRoleInSubsystem', $self->PrimaryOnly); |
503 |
|
my $loadFeatureEssential = $self->_TableLoader('FeatureEssential'); |
504 |
|
my $loadFeatureVirulent = $self->_TableLoader('FeatureVirulent'); |
505 |
|
my $loadFeatureIEDB = $self->_TableLoader('FeatureIEDB'); |
506 |
# Get the subsystem hash. |
# Get the subsystem hash. |
507 |
my $subHash = $self->{subsystems}; |
my $subHash = $self->{subsystems}; |
508 |
# Get the maximum sequence size. We need this later for splitting up the |
# Get the maximum sequence size. We need this later for splitting up the |
535 |
$oldFeatureID = $featureID; |
$oldFeatureID = $featureID; |
536 |
# Count this feature. |
# Count this feature. |
537 |
$loadFeature->Add("featureIn"); |
$loadFeature->Add("featureIn"); |
538 |
# Begin building the keywords. |
# Begin building the keywords. We start with the genome ID, the |
539 |
my @keywords = ($genomeID); |
# feature ID, and the organism name. |
540 |
|
my @keywords = ($genomeID, $featureID, $fig->genus_species($genomeID)); |
541 |
# Get the functional assignment and aliases. This |
# Get the functional assignment and aliases. This |
542 |
# depends on the feature type. |
# depends on the feature type. |
543 |
my $assignment; |
my $assignment; |
599 |
} |
} |
600 |
} |
} |
601 |
} |
} |
602 |
# The final task is to add virulence and essentiality attributes. |
# There are three special attributes computed from property |
603 |
if ($fig->virulent($featureID)) { |
# data that we build next. If the special attribute is non-empty, |
604 |
push @keywords, "virulent"; |
# its name will be added to the keyword list. First, we get all |
605 |
} |
# the attributes for this feature. They will come back as |
606 |
if ($fig->essential($featureID)) { |
# 4-tuples: [peg, name, value, URL]. We use a 3-tuple instead: |
607 |
push @keywords, "essential"; |
# [name, value, value with URL]. (We don't need the PEG, since |
608 |
|
# we already know it.) |
609 |
|
my @attributes = map { [$_->[1], $_->[2], Tracer::CombineURL($_->[2], $_->[3])] } |
610 |
|
$fig->get_attributes($featureID); |
611 |
|
# Now we process each of the special attributes. |
612 |
|
if (SpecialAttribute($featureID, \@attributes, |
613 |
|
1, 2, '^(essential|potential_essential)$', |
614 |
|
$loadFeatureEssential)) { |
615 |
|
push @keywords, 'essential'; |
616 |
|
$loadFeature->Add('essential'); |
617 |
|
} |
618 |
|
if (SpecialAttribute($featureID, \@attributes, |
619 |
|
0, 2, '^virulen', |
620 |
|
$loadFeatureVirulent)) { |
621 |
|
push @keywords, 'virulent'; |
622 |
|
$loadFeature->Add('virulent'); |
623 |
|
} |
624 |
|
if (SpecialAttribute($featureID, \@attributes, |
625 |
|
0, 2, '^iedb_', |
626 |
|
$loadFeatureIEDB)) { |
627 |
|
push @keywords, 'iedb'; |
628 |
|
$loadFeature->Add('iedb'); |
629 |
} |
} |
630 |
# Now we need to bust up hyphenated words in the keyword |
# Now we need to bust up hyphenated words in the keyword |
631 |
# list. |
# list. |
1605 |
return $retVal; |
return $retVal; |
1606 |
} |
} |
1607 |
|
|
1608 |
|
=head3 LoadDrugData |
1609 |
|
|
1610 |
|
C<< my $stats = $spl->LoadDrugData(); >> |
1611 |
|
|
1612 |
|
Load the drug target data into Sprout. |
1613 |
|
|
1614 |
|
The following relations are loaded by this method. |
1615 |
|
|
1616 |
|
DrugProject |
1617 |
|
ContainsTopic |
1618 |
|
DrugTopic |
1619 |
|
ContainsAnalysisOf |
1620 |
|
PDB |
1621 |
|
IncludesBound |
1622 |
|
IsBoundIn |
1623 |
|
BindsWith |
1624 |
|
Ligand |
1625 |
|
DescribesProteinForFeature |
1626 |
|
FeatureConservation |
1627 |
|
|
1628 |
|
The source information for these relations is taken from flat files in the |
1629 |
|
C<$FIG_Config::drug_directory>. The file C<master_tables.list> contains |
1630 |
|
a list of drug project names paired with file names. The named file (in the |
1631 |
|
same directory) contains all the data for the project. |
1632 |
|
|
1633 |
|
=over 4 |
1634 |
|
|
1635 |
|
=item RETURNS |
1636 |
|
|
1637 |
|
Returns a statistics object for the loads. |
1638 |
|
|
1639 |
|
=back |
1640 |
|
|
1641 |
|
=cut |
1642 |
|
#: Return Type $%; |
1643 |
|
sub LoadDrugData { |
1644 |
|
# Get this object instance. |
1645 |
|
my ($self) = @_; |
1646 |
|
# Get the FIG object. |
1647 |
|
my $fig = $self->{fig}; |
1648 |
|
# Get the genome hash. |
1649 |
|
my $genomeHash = $self->{genomes}; |
1650 |
|
# Create load objects for the tables we're loading. |
1651 |
|
my $loadDrugProject = $self->_TableLoader('DrugProject'); |
1652 |
|
my $loadContainsTopic = $self->_TableLoader('ContainsTopic'); |
1653 |
|
my $loadDrugTopic = $self->_TableLoader('DrugTopic'); |
1654 |
|
my $loadContainsAnalysisOf = $self->_TableLoader('ContainsAnalysisOf'); |
1655 |
|
my $loadPDB = $self->_TableLoader('PDB'); |
1656 |
|
my $loadIncludesBound = $self->_TableLoader('IncludesBound'); |
1657 |
|
my $loadIsBoundIn = $self->_TableLoader('IsBoundIn'); |
1658 |
|
my $loadBindsWith = $self->_TableLoader('BindsWith'); |
1659 |
|
my $loadLigand = $self->_TableLoader('Ligand'); |
1660 |
|
my $loadDescribesProteinForFeature = $self->_TableLoader('DescribesProteinForFeature'); |
1661 |
|
my $loadFeatureConservation = $self->_TableLoader('FeatureConservation'); |
1662 |
|
if ($self->{options}->{loadOnly}) { |
1663 |
|
Trace("Loading from existing files.") if T(2); |
1664 |
|
} else { |
1665 |
|
Trace("Generating drug target data.") if T(2); |
1666 |
|
# Load the project list. The file comes in as a list of chomped lines, |
1667 |
|
# and we split them on the TAB character to make the project name the |
1668 |
|
# key and the file name the value of the resulting hash. |
1669 |
|
my %projects = map { split /\t/, $_ } Tracer::GetFile("$FIG_Config::drug_directory/master_tables.list"); |
1670 |
|
# Create hashes for the derived objects: PDBs, Features, and Ligands. These objects |
1671 |
|
# may occur multiple times in a single project file or even in multiple project |
1672 |
|
# files. |
1673 |
|
my %ligands = (); |
1674 |
|
my %pdbs = (); |
1675 |
|
my %features = (); |
1676 |
|
my %bindings = (); |
1677 |
|
# Set up a counter for drug topics. This will be used as the key. |
1678 |
|
my $topicCounter = 0; |
1679 |
|
# Loop through the projects. We sort the keys not because we need them sorted, but |
1680 |
|
# because it makes it easier to infer our progress from trace messages. |
1681 |
|
for my $project (sort keys %projects) { |
1682 |
|
Trace("Processing project $project.") if T(3); |
1683 |
|
# Only proceed if the download file exists. |
1684 |
|
my $projectFile = "$FIG_Config::drug_directory/$projects{$project}"; |
1685 |
|
if (! -f $projectFile) { |
1686 |
|
Trace("Project file $projectFile not found.") if T(0); |
1687 |
|
} else { |
1688 |
|
# Create the project record. |
1689 |
|
$loadDrugProject->Put($project); |
1690 |
|
# Create a hash for the topics. Each project has one or more topics. The |
1691 |
|
# topic is identified by a URL, a category, and an identifier. |
1692 |
|
my %topics = (); |
1693 |
|
# Now we can open the project file. |
1694 |
|
Trace("Reading project file $projectFile.") if T(3); |
1695 |
|
Open(\*PROJECT, "<$projectFile"); |
1696 |
|
# Get the first record, which is a list of column headers. We don't use this |
1697 |
|
# for anything, but it may be useful for debugging. |
1698 |
|
my $headerLine = <PROJECT>; |
1699 |
|
# Loop through the rest of the records. |
1700 |
|
while (! eof PROJECT) { |
1701 |
|
# Get the current line of data. Note that not all lines will have all |
1702 |
|
# the fields. In particular, the CLIBE data is fairly rare. |
1703 |
|
my ($authorOrganism, $category, $tag, $refURL, $peg, $conservation, |
1704 |
|
$pdbBound, $pdbBoundEval, $pdbFree, $pdbFreeEval, $pdbFreeTitle, |
1705 |
|
$protDistInfo, $passAspInfo, $passAspFile, $passWeightInfo, |
1706 |
|
$passWeightFile, $clibeInfo, $clibeURL, $clibeTotalEnergy, |
1707 |
|
$clibeVanderwaals, $clibeHBonds, $clibeEI, $clibeSolvationE) |
1708 |
|
= Tracer::GetLine(\*PROJECT); |
1709 |
|
# The tag contains an identifier for the current line of data followed |
1710 |
|
# by a text statement that generally matches a property name in the |
1711 |
|
# main database. We split it up, since the identifier goes with |
1712 |
|
# the PDB data and the text statement is part of the topic. |
1713 |
|
my ($lineID, $topicTag) = split /\s*,\s*/, $tag; |
1714 |
|
$loadDrugProject->Add("data line"); |
1715 |
|
# Check for a new topic. |
1716 |
|
my $topicData = "$category\t$topicTag\t$refURL"; |
1717 |
|
if (! exists $topics{$topicData}) { |
1718 |
|
# Here we have a new topic. Compute its ID. |
1719 |
|
$topicCounter++; |
1720 |
|
$topics{$topicData} = $topicCounter; |
1721 |
|
# Create its database record. |
1722 |
|
$loadDrugTopic->Put($topicCounter, $refURL, $category, $authorOrganism, |
1723 |
|
$topicTag); |
1724 |
|
# Connect it to the project. |
1725 |
|
$loadContainsTopic->Put($project, $topicCounter); |
1726 |
|
$loadDrugTopic->Add("topic"); |
1727 |
|
} |
1728 |
|
# Now we know the topic ID exists in the hash and the topic will |
1729 |
|
# appear in the database, so we get this topic's ID. |
1730 |
|
my $topicID = $topics{$topicData}; |
1731 |
|
# If the feature in this line is new, we need to save its conservation |
1732 |
|
# number. |
1733 |
|
if (! exists $features{$peg}) { |
1734 |
|
$loadFeatureConservation->Put($peg, $conservation); |
1735 |
|
$features{$peg} = 1; |
1736 |
|
} |
1737 |
|
# Now we have two PDBs to deal with-- a bound PDB and a free PDB. |
1738 |
|
# The free PDB will have data about docking points; the bound PDB |
1739 |
|
# will have data about docking. We store both types as PDBs, and |
1740 |
|
# the special data comes from relationships. First we process the |
1741 |
|
# bound PDB. |
1742 |
|
if ($pdbBound) { |
1743 |
|
$loadPDB->Add("bound line"); |
1744 |
|
# Insure this PDB is in the database. |
1745 |
|
$self->CreatePDB($pdbBound, lc "$pdbFreeTitle (bound)", "bound", \%pdbs, $loadPDB); |
1746 |
|
# Connect it to this topic. |
1747 |
|
$loadIncludesBound->Put($topicID, $pdbBound); |
1748 |
|
# Check for CLIBE data. |
1749 |
|
if ($clibeInfo) { |
1750 |
|
$loadLigand->Add("clibes"); |
1751 |
|
# We have CLIBE data, so we create a ligand and relate it to the PDB. |
1752 |
|
if (! exists $ligands{$clibeInfo}) { |
1753 |
|
# This is a new ligand, so create its record. |
1754 |
|
$loadLigand->Put($clibeInfo); |
1755 |
|
$loadLigand->Add("ligand"); |
1756 |
|
# Make sure we know this ligand already exists. |
1757 |
|
$ligands{$clibeInfo} = 1; |
1758 |
|
} |
1759 |
|
# Now connect the PDB to the ligand using the CLIBE data. |
1760 |
|
$loadBindsWith->Put($pdbBound, $clibeInfo, $clibeURL, $clibeHBonds, $clibeEI, |
1761 |
|
$clibeSolvationE, $clibeVanderwaals); |
1762 |
|
} |
1763 |
|
# Connect this PDB to the feature. |
1764 |
|
$loadDescribesProteinForFeature->Put($pdbBound, $peg, $protDistInfo, $pdbBoundEval); |
1765 |
|
} |
1766 |
|
# Next is the free PDB. |
1767 |
|
if ($pdbFree) { |
1768 |
|
$loadPDB->Add("free line"); |
1769 |
|
# Insure this PDB is in the database. |
1770 |
|
$self->CreatePDB($pdbFree, lc $pdbFreeTitle, "free", \%pdbs, $loadPDB); |
1771 |
|
# Connect it to this topic. |
1772 |
|
$loadContainsAnalysisOf->Put($topicID, $pdbFree, $passAspInfo, |
1773 |
|
$passWeightFile, $passWeightInfo, $passAspFile); |
1774 |
|
# Connect this PDB to the feature. |
1775 |
|
$loadDescribesProteinForFeature->Put($pdbFree, $peg, $protDistInfo, $pdbFreeEval); |
1776 |
|
} |
1777 |
|
# If we have both PDBs, we may need to link them. |
1778 |
|
if ($pdbFree && $pdbBound) { |
1779 |
|
$loadIsBoundIn->Add("connection"); |
1780 |
|
# Insure we only link them once. |
1781 |
|
my $bindingKey = "$pdbFree\t$pdbBound"; |
1782 |
|
if (! exists $bindings{$bindingKey}) { |
1783 |
|
$loadIsBoundIn->Add("newConnection"); |
1784 |
|
$loadIsBoundIn->Put($pdbFree, $pdbBound); |
1785 |
|
$bindings{$bindingKey} = 1; |
1786 |
|
} |
1787 |
|
} |
1788 |
|
} |
1789 |
|
# Close off this project. |
1790 |
|
close PROJECT; |
1791 |
|
} |
1792 |
|
} |
1793 |
|
} |
1794 |
|
# Finish the load. |
1795 |
|
my $retVal = $self->_FinishAll(); |
1796 |
|
return $retVal; |
1797 |
|
} |
1798 |
|
|
1799 |
|
|
1800 |
=head2 Internal Utility Methods |
=head2 Internal Utility Methods |
1801 |
|
|
1802 |
|
=head3 SpecialAttribute |
1803 |
|
|
1804 |
|
C<< my $count = SproutLoad::SpecialAttribute($id, \@attributes, $idxMatch, $idxValue, $pattern, $loader); >> |
1805 |
|
|
1806 |
|
Look for special attributes of a given type. A special attribute is found by comparing one of |
1807 |
|
the columns of the incoming attribute list to a search pattern. If a match is found, then |
1808 |
|
another column is put into an output table connected to the specified ID. |
1809 |
|
|
1810 |
|
For example, when processing features, the attribute list we look at has three columns: attribute |
1811 |
|
name, attribute value, and attribute value HTML. The IEDB attribute exists if the attribute name |
1812 |
|
begins with C<iedb_>. The call signature is therefore |
1813 |
|
|
1814 |
|
my $found = SpecialAttribute($fid, \@attributeList, 0, 2, '^iedb_', $loadFeatureIEDB); |
1815 |
|
|
1816 |
|
The pattern is matched against column 0, and if we have a match, then column 2's value is put |
1817 |
|
to the output along with the specified feature ID. |
1818 |
|
|
1819 |
|
=over 4 |
1820 |
|
|
1821 |
|
=item id |
1822 |
|
|
1823 |
|
ID of the object whose special attributes are being loaded. This forms the first column of the |
1824 |
|
output. |
1825 |
|
|
1826 |
|
=item attributes |
1827 |
|
|
1828 |
|
Reference to a list of tuples. |
1829 |
|
|
1830 |
|
=item idxMatch |
1831 |
|
|
1832 |
|
Index in each tuple of the column to be matched against the pattern. If the match is |
1833 |
|
successful, an output record will be generated. |
1834 |
|
|
1835 |
|
=item idxValue |
1836 |
|
|
1837 |
|
Index in each tuple of the column to be put as the second column of the output. |
1838 |
|
|
1839 |
|
=item pattern |
1840 |
|
|
1841 |
|
Pattern to be matched against the specified column. The match will be case-insensitive. |
1842 |
|
|
1843 |
|
=item loader |
1844 |
|
|
1845 |
|
An object to which each output record will be put. Usually this is an B<ERDBLoad> object, |
1846 |
|
but technically it could be anything with a C<Put> method. |
1847 |
|
|
1848 |
|
=item RETURN |
1849 |
|
|
1850 |
|
Returns a count of the matches found. |
1851 |
|
|
1852 |
|
=item |
1853 |
|
|
1854 |
|
=back |
1855 |
|
|
1856 |
|
=cut |
1857 |
|
|
1858 |
|
sub SpecialAttribute { |
1859 |
|
# Get the parameters. |
1860 |
|
my ($id, $attributes, $idxMatch, $idxValue, $pattern, $loader) = @_; |
1861 |
|
# Declare the return variable. |
1862 |
|
my $retVal = 0; |
1863 |
|
# Loop through the attribute rows. |
1864 |
|
for my $row (@{$attributes}) { |
1865 |
|
# Check for a match. |
1866 |
|
if ($row->[$idxMatch] =~ m/$pattern/i) { |
1867 |
|
# We have a match, so output a row. |
1868 |
|
$loader->Put($id, $row->[$idxValue]); |
1869 |
|
$retVal++; |
1870 |
|
} |
1871 |
|
} |
1872 |
|
Trace("$retVal special attributes found for $id and loader " . $loader->RelName() . ".") if T(4) && $retVal; |
1873 |
|
# Return the number of matches. |
1874 |
|
return $retVal; |
1875 |
|
} |
1876 |
|
|
1877 |
|
=head3 CreatePDB |
1878 |
|
|
1879 |
|
C<< $loader->CreatePDB($pdbID, $title, $type, \%pdbHash); >> |
1880 |
|
|
1881 |
|
Insure that a PDB record exists for the identified PDB. If one does not exist, it will be |
1882 |
|
created. |
1883 |
|
|
1884 |
|
=over 4 |
1885 |
|
|
1886 |
|
=item pdbID |
1887 |
|
|
1888 |
|
ID string (usually an unqualified file name) for the desired PDB. |
1889 |
|
|
1890 |
|
=item title |
1891 |
|
|
1892 |
|
Title to use if the PDB must be created. |
1893 |
|
|
1894 |
|
=item type |
1895 |
|
|
1896 |
|
Type of PDB: C<free> or C<bound> |
1897 |
|
|
1898 |
|
=item pdbHash |
1899 |
|
|
1900 |
|
Hash containing the IDs of PDBs that have already been created. |
1901 |
|
|
1902 |
|
=item pdbLoader |
1903 |
|
|
1904 |
|
Load object for the PDB table. |
1905 |
|
|
1906 |
|
=back |
1907 |
|
|
1908 |
|
=cut |
1909 |
|
|
1910 |
|
sub CreatePDB { |
1911 |
|
# Get the parameters. |
1912 |
|
my ($self, $pdbID, $title, $type, $pdbHash, $pdbLoader) = @_; |
1913 |
|
$pdbLoader->Add("PDB check"); |
1914 |
|
# Check to see if this is a new PDB. |
1915 |
|
if (! exists $pdbHash->{$pdbID}) { |
1916 |
|
# It is, so we create it. |
1917 |
|
$pdbLoader->Put($pdbID, $title, $type); |
1918 |
|
$pdbHash->{$pdbID} = 1; |
1919 |
|
# Count it. |
1920 |
|
$pdbLoader->Add("PDB-$type"); |
1921 |
|
} |
1922 |
|
} |
1923 |
|
|
1924 |
=head3 TableLoader |
=head3 TableLoader |
1925 |
|
|
1926 |
Create an ERDBLoad object for the specified table. The object is also added to |
Create an ERDBLoad object for the specified table. The object is also added to |