[Bio] / Sprout / SproutLoad.pm Repository:
ViewVC logotype

Diff of /Sprout/SproutLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.4, Tue Aug 16 20:35:03 2005 UTC revision 1.5, Fri Sep 9 14:55:01 2005 UTC
# Line 40  Line 40 
40  a variable called C<$fig>. This makes it fairly straightforward to determine which  a variable called C<$fig>. This makes it fairly straightforward to determine which
41  FIG methods are required to load the Sprout database.  FIG methods are required to load the Sprout database.
42    
43    This object creates the load files; however, the tables are not created until it
44    is time to actually do the load from the files into the target database.
45    
46  =cut  =cut
47    
48  #: Constructor SproutLoad->new();  #: Constructor SproutLoad->new();
# Line 935  Line 938 
938      return $retVal;      return $retVal;
939  }  }
940    
941    =head3 LoadSourceData
942    
943    C<< my $stats = $spl->LoadSourceData(); >>
944    
945    Load the source data from FIG into Sprout.
946    
947    Source data links genomes to information about the organizations that
948    mapped it.
949    
950    The following relations are loaded by this method.
951    
952        ComesFrom
953        Source
954        SourceURL
955    
956    There is no direct support for source attribution in FIG, so we access the SEED
957    files directly.
958    
959    =over 4
960    
961    =item RETURNS
962    
963    Returns a statistics object for the loads.
964    
965    =back
966    
967    =cut
968    #: Return Type $%;
969    sub LoadSourceData {
970        # Get this object instance.
971        my ($self) = @_;
972        # Get the FIG object.
973        my $fig = $self->{fig};
974        # Get the genome hash.
975        my $genomeHash = $self->{genomes};
976        my $genomeCount = (keys %{$genomeHash});
977        # Create load objects for each of the tables we're loading.
978        my $loadComesFrom = $self->_TableLoader('ComesFrom', $genomeCount * 4);
979        my $loadSource = $self->_TableLoader('Source', $genomeCount * 4);
980        my $loadSourceURL = $self->_TableLoader('SourceURL', $genomeCount * 8);
981        Trace("Beginning source data load.") if T(2);
982        # Create hashes to collect the Source information.
983        my %sourceURL = ();
984        my %sourceDesc = ();
985        # Loop through the genomes.
986        my $line;
987        for my $genomeID (%{$genomeHash}) {
988            Trace("Processing $genomeID.") if T(3);
989            # Open the project file.
990            if ((open(TMP, "<$FIG_Config::organisms/$genomeID/PROJECT")) &&
991                defined($line = <TMP>)) {
992                chomp $line;
993                my($sourceID, $desc, $url) = split(/\t/,$_);
994                $loadComesFrom->Put($genomeID, $sourceID);
995                if ($url && ! exists $sourceURL{$genomeID}) {
996                    $loadSourceURL->Put($sourceID, $url);
997                    $sourceURL{$sourceID} = 1;
998                }
999                if ($desc && ! exists $sourceDesc{$sourceID}) {
1000                    $loadSource->Put($sourceID, $desc);
1001                    $sourceDesc{$sourceID} = 1;
1002                }
1003            }
1004            close TMP;
1005        }
1006        # Finish the load.
1007        my $retVal = $self->_FinishAll();
1008        return $retVal;
1009    }
1010    
1011    
1012    =head3 LoadGroupData
1013    
1014    C<< my $stats = $spl->LoadGroupData(); >>
1015    
1016    Load the genome Groups into Sprout.
1017    
1018    The following relations are loaded by this method.
1019    
1020        GenomeGroups
1021    
1022    There is no direct support for genome groups in FIG, so we access the SEED
1023    files directly.
1024    
1025    =over 4
1026    
1027    =item RETURNS
1028    
1029    Returns a statistics object for the loads.
1030    
1031    =back
1032    
1033    =cut
1034    #: Return Type $%;
1035    sub LoadGroupData {
1036        # Get this object instance.
1037        my ($self) = @_;
1038        # Get the FIG object.
1039        my $fig = $self->{fig};
1040        # Get the genome hash.
1041        my $genomeHash = $self->{genomes};
1042        my $genomeCount = (keys %{$genomeHash});
1043        # Create a load object for the table we're loading.
1044        my $loadGenomeGroups = $self->_TableLoader('GenomeGroups', $genomeCount * 4);
1045        Trace("Beginning group data load.") if T(2);
1046        # Loop through the genomes.
1047        my $line;
1048        for my $genomeID (%{$genomeHash}) {
1049            Trace("Processing $genomeID.") if T(3);
1050            # Open the NMPDR group file for this genome.
1051            if (open(TMP, "<$FIG_Config::organisms/$genomeID/NMPDR") &&
1052                defined($line = <TMP>)) {
1053                # Clean the line ending.
1054                chomp;
1055                # Add the group to the table. Note that there can only be one group
1056                # per genome.
1057                $loadGenomeGroups->Put($genomeID, $line);
1058            }
1059            close TMP;
1060        }
1061        # Finish the load.
1062        my $retVal = $self->_FinishAll();
1063        return $retVal;
1064    }
1065    
1066  =head2 Internal Utility Methods  =head2 Internal Utility Methods
1067    
1068  =head3 TableLoader  =head3 TableLoader

Legend:
Removed from v.1.4  
changed lines
  Added in v.1.5

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3