[Bio] / FigKernelPackages / FIG.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/FIG.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.297, Mon Jun 20 21:58:56 2005 UTC revision 1.298, Wed Jun 22 21:00:24 2005 UTC
# Line 134  Line 134 
134    
135  C<< my $fig = FIG->new(); >>  C<< my $fig = FIG->new(); >>
136    
137  This is the constructor for a FIG object. It uses no parameters.  This is the constructor for a FIG object. It uses no parameters. If tracing
138    has not yet been turned on, it will be turned on here. The tracing type and
139    level are specified by the configuration variables C<$FIG_Config::trace_levels>
140    and C<$FIG_Config::trace_type>. In a future release, the code will be modified
141    to allow overriding the configuration variables with environment variables.
142    
143  =cut  =cut
144    
# Line 188  Line 192 
192      return $rdbH->table_exists($table);      return $rdbH->table_exists($table);
193  }  }
194    
 =head3 Open  
   
 C<< my $ok = FIG::Open($handle, $fileSpec, $message); >>  
   
 Open a file, translating invalid characters in the file name.  
   
 This is a general-purpose file open method that provides an easy way to deal with  
 invalid file name characters. It functions identically to C<Tracer::Open> with the  
 exception that the characters in the C<$FIG_Config::bad_chars> string will be  
 translated into underscores before the open takes place.  
   
 To find the file name, the method looks for a filemode character sequence and takes  
 everything after it, trimming spaces off either end. So, for example, in the  
 following strings the file name is C</usr/fig/myfile.txt>.  
   
     >>/usr/fig/myfile.txt  
     </usr/fig/myfile.txt  
     | sort -u > /usr/fig/myfile.txt  
   
 Note that the file handle parameter has to be specified using glob syntax. To open  
 a file with the handle C<TMP>, you use  
   
     Open(\*TMP, "| sort -u > /usr/fig/myfile.txt");  
   
 =over 4  
   
 =item handle  
   
 File handle. If this parameter is C<undef>, a file handle will be generated  
 and returned as the value of this method.  
   
 =item fileSpec  
   
 File name and mode, as per the PERL C<open> function.  
   
 =item message (optional)  
   
 Error message to use if the open fails. If omitted, a standard error message  
 will be generated. In either case, the error information from the file system  
 is appended to the message. To specify a conditional open that does not throw  
 an error if it fails, use C<0>.  
   
 =item RETURN  
   
 Returns the file handle assigned to the file, or C<undef> if the open failed.  
   
 =back  
   
 =cut  
 #: Return Type $;  
 sub Open {  
     # Get the parameters.  
     my ($handle, $fileSpec, $message) = @_;  
     # Check to see if we need to clean.  
     if ($FIG_Config::bad_chars) {  
         my ($fileName, $pos, $len) = Tracer::FindNamePart($fileSpec);  
         substr $fileSpec, $pos, $len, NameClean($fileName);  
     }  
     # Open the file.  
     my $retVal = Tracer::Open($handle, $fileSpec, $message);  
     # Return the result.  
     return $retVal;  
 }  
   
 =head3 OpenDir  
   
 C<< my @files = FIG::OpenDir($dirName, $filtered); >>  
   
 Open a directory and return all the file names. This function essentially performs  
 the functions of an C<opendir> and C<readdir>. If the I<$filtered> parameter is  
 set to TRUE, all filenames beginning with a period (C<.>) will be filtered out of  
 the return list. If the directory does not open, an exception is thrown. So,  
 for example,  
   
         my @files = FIG::OpenDir("/Volumes/fig/contigs", 1);  
   
 is effectively the same as  
   
         opendir(TMP, "/Volumes/fig/contigs") || Confess("Could not open /Volumes/fig/contigs.");  
         my @files = grep { $_ !~ /^\./ } readdir(TMP);  
   
 Similarly, the following code  
   
         my @files = grep { $_ =~ /^\d/ } FIG::OpenDir("/Volumes/fig/orgs");  
   
 Returns the names of all files in C</Volumes/fig/orgs> that begin with digits and  
 automatically throws an error if the directory fails to open.  
   
 Unlike C<Tracer::OpenDir>, this version cleans bad characters out of the directory  
 name. See L</Open> for details about file name cleaning. Unlike L</Open>, however,  
 this method cleans the whole directory name. There is no need to scan for mode characters  
 or pipes.  
   
 =over 4  
   
 =item dirName  
   
 Name of the directory to open.  
   
 =item filtered  
   
 TRUE if files whose names begin with a period (C<.>) should be automatically removed  
 from the list, else FALSE.  
   
 =back  
   
 =cut  
 #: Return Type @;  
 sub OpenDir {  
     # Get the parameters.  
     my ($dirName, $filtered) = @_;  
     # Clean the directory name, if necessary.  
     if ($FIG_Config::bad_chars) {  
         $dirName = NameClean($dirName);  
     }  
     # Open and read the directory.  
     my @retVal = Tracer::OpenDir($dirName, $filtered);  
     return @retVal;  
 }  
   
 =head3 NameClean  
   
 C<< my $cleanName = FIG::NameClean($fileName); >>  
   
 Clean invalid characters from a file name. The characters cleaned are those found int  
 the C<$FIG_Config::bad_chars> configuration variable, and they will all be replaced by  
 underscores.  
   
 =over 4  
   
 =item fileName  
   
 File name to be cleaned.  
   
 =item RETURN  
   
 Returns the incoming file name with all of the designated characters converted to  
 underscores.  
   
 =back  
   
 =cut  
 #: Return Type $;  
 sub NameClean {  
     # Get the parameters.  
     my ($fileName) = @_;  
     # Clean the file name.  
     my $retVal = $fileName;  
     my $badChars = "[$FIG_Config::bad_chars]";  
     $retVal =~ s/$badChars/_/g;  
     # Return the result.  
     return $retVal;  
 }  
   
195  =head3 cached  =head3 cached
196    
197  C<< my $x = $fig->cached($name); >>  C<< my $x = $fig->cached($name); >>
# Line 623  Line 473 
473    
474  Fully-qualified name of the file containing the data to load. Each line of the  Fully-qualified name of the file containing the data to load. Each line of the
475  file must correspond to a record, and the fields must be arranged in order and  file must correspond to a record, and the fields must be arranged in order and
476  tab-delimited.  tab-delimited. If the file name is omitted, the table is dropped and re-created
477    but not loaded.
478    
479  =item keyList  =item keyList
480    
# Line 647  Line 498 
498      }      }
499      # Get the database handler.      # Get the database handler.
500      my $dbf = $self->{_dbf};      my $dbf = $self->{_dbf};
501          # If we're in ALL mode, we drop and re-create the table. Otherwise,      # Call the DBKernel method.
502          # we delete the obsolete genomes.      $dbf->reload_table($mode, $table, $flds, $xflds, $fileName, $keyList, $keyName);
         if ( $mode eq 'all') {  
                 Trace("Recreating $table.") if T(2);  
                 $dbf->drop_table( tbl  => $table );  
                 $dbf->create_table( tbl  => $table, flds => $flds );  
         } else {  
                 Trace("Clearing obsolete data from $table.") if T(2);  
                 foreach my $key ( @{$keyList} ) {  
                         $dbf->SQL("DELETE FROM $table WHERE ( $keyName = \'$key\' )");  
                 }  
         }  
         # The table is now reading for loading.  
         Trace("Loading $table from $fileName.") if T(2);  
         $dbf->load_table( tbl  => $table, file => $fileName );  
         # If we're in ALL mode, we need to build the indexes.  
         if ( $mode eq 'all' ) {  
                 Trace("Creating indexes for $table.") if T(2);  
                 # Loop through the indexes in the index hash.  
                 for my $idxName (keys %{$xflds}) {  
                         Trace("Creating index $idxName.") if T(3);  
                         $dbf->create_index( idx  => $idxName,  
                                 tbl  => $table,  
                                 type => "btree",  
                                 flds => $xflds->{$idxName}  
                         );  
                 }  
                 $dbf->vacuum_it( "$table" );  
         }  
503  }  }
504    
505  =head3 enqueue_similarities  =head3 enqueue_similarities
# Line 4214  Line 4038 
4038      return undef;      return undef;
4039  }  }
4040    
4041  #TODO: BRUCE IS HERE  =head3 contig_of
4042    
4043    C<< my $contigID = $fig->contig_of($location); >>
4044    
4045    Return the ID of the contig containing a location.
4046    
4047    This method only works with SEED-style locations (I<contigID>C<_>I<beg>C<_>I<end>).
4048    For more comprehensive location parsing, use the B<Location> object.
4049    
4050    =over 4
4051    
4052    =item location
4053    
4054    A SEED-style location (I<contigID>C<_>I<beg>C<_>I<end>), or a comma-delimited list
4055    of SEED-style locations. In the latter case, only the first location in the list will
4056    be processed.
4057    
4058    =item RETURN
4059    
4060    Returns the contig ID from the first location in the incoming string.
4061    
4062    =back
4063    
4064    =cut
4065    
4066  sub contig_of  sub contig_of
4067  {  {
# Line 4225  Line 4072 
4072      return $1;      return $1;
4073  }  }
4074    
4075    =head3 beg_of
4076    
4077    C<< my $beg = $fig->beg_of($location); >>
4078    
4079    Return the beginning point of a location.
4080    
4081    This method only works with SEED-style locations (I<contigID>C<_>I<beg>C<_>I<end>).
4082    For more comprehensive location parsing, use the B<Location> object.
4083    
4084    =over 4
4085    
4086    =item location
4087    
4088    A SEED-style location (I<contigID>C<_>I<beg>C<_>I<end>), or a comma-delimited list
4089    of SEED-style locations. In the latter case, only the first location in the list will
4090    be processed.
4091    
4092    =item RETURN
4093    
4094    Returns the beginning point from the first location in the incoming string.
4095    
4096    =back
4097    
4098    =cut
4099    
4100  sub beg_of  sub beg_of
4101  {  {
4102      my ($self, $locus) = @_;      my ($self, $locus) = @_;
# Line 4234  Line 4106 
4106      return $1;      return $1;
4107  }  }
4108    
4109    =head3 end_of
4110    
4111    C<< my $end = $fig->end_of($location); >>
4112    
4113    Return the ending point of a location.
4114    
4115    This method only works with SEED-style locations (I<contigID>C<_>I<beg>C<_>I<end>).
4116    For more comprehensive location parsing, use the B<Location> object.
4117    
4118    =over 4
4119    
4120    =item location
4121    
4122    A SEED-style location (I<contigID>C<_>I<beg>C<_>I<end>), or a comma-delimited list
4123    of SEED-style locations. In the latter case, only the first location in the list will
4124    be processed.
4125    
4126    =item RETURN
4127    
4128    Returns the contig ID from the first location in the incoming string.
4129    
4130    =back
4131    
4132    =cut
4133    
4134  sub end_of  sub end_of
4135  {  {
4136      my ($self, $locus) = @_;      my ($self, $locus) = @_;
# Line 4243  Line 4140 
4140      return $1;      return $1;
4141  }  }
4142    
4143    =head3 strand_of
4144    
4145    C<< my $strand = $fig->contig_of($location); >>
4146    
4147    Return the strand (C<+> or C<->) of a location.
4148    
4149    This method only works with SEED-style locations (I<contigID>C<_>I<beg>C<_>I<end>).
4150    For more comprehensive location parsing, use the B<Location> object.
4151    
4152    =over 4
4153    
4154    =item location
4155    
4156    A comma-delimited list of SEED-style location (I<contigID>C<_>I<beg>C<_>I<end>).
4157    
4158    =item RETURN
4159    
4160    Returns C<+> if the list describes a forward-oriented location, and C<-> if the list
4161    described a backward-oriented location.
4162    
4163    =back
4164    
4165    =cut
4166    
4167  sub strand_of  sub strand_of
4168  {  {
4169      my ($self, $fid) = @_;      my ($self, $fid) = @_;
# Line 4257  Line 4178 
4178    
4179  =head3 find_contig_with_checksum  =head3 find_contig_with_checksum
4180    
4181    C<< my $contigID = $fig->find_contig_with_checksum($genome, $checksum); >>
4182    
4183  Find a contig in the given genome with the given checksum.  Find a contig in the given genome with the given checksum.
4184    
4185    This method is useful for determining if a particular contig has already been
4186    recorded for the given genome. The checksum is computed from the contig contents,
4187    so a matching checksum indicates that the contigs may have the same content.
4188    
4189    =over 4
4190    
4191    =item genome
4192    
4193    ID of the genome whose contigs are to be examined.
4194    
4195    =item checksum
4196    
4197    Checksum value for the desired contig.
4198    
4199    =item RETURN
4200    
4201    Returns the ID of a contig in the given genome that has the caller-specified checksum,
4202    or C<undef> if no such contig exists.
4203    
4204    =back
4205    
4206  =cut  =cut
4207    
4208  sub find_contig_with_checksum  sub find_contig_with_checksum
# Line 4318  Line 4262 
4262    
4263                              if (!($pid = open2($rd, $wr, "cksum")))                              if (!($pid = open2($rd, $wr, "cksum")))
4264                              {                              {
4265                                  die "Cannot run open2 cksum: $!";                                  Confess("Cannot run open2 cksum: $!");
4266                              }                              }
4267    
4268                              $wr->write($contig_txt, length($contig_txt));                              $wr->write($contig_txt, length($contig_txt));
# Line 4345  Line 4289 
4289      }      }
4290  }  }
4291    
4292    =head3 contig_checksum
4293    
4294    C<< my $checksum = $fig->contig_checksum($genome, $contig); >>
4295    
4296    or
4297    
4298    C<< my @checksum = $fig->contig_checksum($genome, $contig); >>
4299    
4300    Return the checksum of the specified contig. The checksum is computed from the
4301    contig's content in a parallel process. The process returns a space-delimited list
4302    of numbers. The numbers can be split into a real list if the method is invoked in
4303    a list context. For b
4304    
4305    =cut
4306    
4307  sub contig_checksum  sub contig_checksum
4308  {  {
4309      my($self, $genome, $contig) = @_;      my($self, $genome, $contig) = @_;
# Line 4358  Line 4317 
4317    
4318      if (!($pid = open2($rd, $wr, "cksum")))      if (!($pid = open2($rd, $wr, "cksum")))
4319      {      {
4320          die "Cannot run open2 cksum: $!";          Confess("Cannot run open2 cksum: $!");
4321      }      }
4322    
4323      $wr->write($contig_txt, length($contig_txt));      $wr->write($contig_txt, length($contig_txt));
# Line 4593  Line 4552 
4552    
4553  sub by_alias {  sub by_alias {
4554      my($self,$alias,$genome) = @_;      my($self,$alias,$genome) = @_;
4555      my($rdbH,$relational_db_response,$peg);      my($rdbH,$relational_db_response);
4556      my ($peg, $flag) = FIGRules::NormalizeAlias($alias);      my ($peg, $flag) = FIGRules::NormalizeAlias($alias);
4557          if ($flag) {          if ($flag) {
4558                  return $peg;                  return $peg;
# Line 4741  Line 4700 
4700    
4701  ################ Routines to process functional coupling for PEGs  ##########################  ################ Routines to process functional coupling for PEGs  ##########################
4702    
4703  =pod  =head3 coupled_to
4704    
4705  =head1 coupled_to  C<< my @coupled_to = $fig->coupled_to($peg); >>
4706    
4707  usage: @coupled_to = $fig->coupled_to($peg)  Return a list of functionally coupled PEGs.
4708    
4709  The new form of coupling and evidence computation is based on precomputed data.  The new form of coupling and evidence computation is based on precomputed data.
4710  The old form took minutes to dynamically compute things when needed.  The old form  The old form took minutes to dynamically compute things when needed.  The old form
4711  still works, ikf the directory Data/CouplingData is not present.  If it is present,  still works, if the directory B<Data/CouplingData> is not present.  If it is present,
4712  it is assumed to contain comprehensive coupling data in the form of precomputed scores  it theis assumed to contain comprehensive coupling data in the form of precomputed scores
4713  and PCHs.  and PCHs.
4714    
4715  If Data/CouplingData is present, this routine returns a list of 2-tuples [Peg,Sc].  It  If B<Data/CouplingData> is present, this routine returns a list of 2-tuples [Peg,Sc].  It
4716  returns the empty list if the peg is not coupled.  It returns undef, if Data/CouplingData  returns the empty list if the peg is not coupled. It returns C<undef> if B<Data/CouplingData>
4717  is not there.  is not there.
4718    
4719    =over 4
4720    
4721    =item peg
4722    
4723    ID of the protein encoding group whose functionally-coupled proteins are desired.
4724    
4725    =item RETURN
4726    
4727    Returns a list of 2-tuples, each consisting of the ID of a coupled PEG and a score. If
4728    there are no PEGs functionally coupled to the incoming PEG, it will return an empty
4729    list. If the PEG data is not present, it will return C<undef>.
4730    
4731    =back
4732    
4733  =cut  =cut
4734    
4735  sub coupled_to {  sub coupled_to {
# Line 4770  Line 4743 
4743      return @$relational_db_response;      return @$relational_db_response;
4744  }  }
4745    
4746  =pod  =head3 coupling_evidence
   
 =head1 coupling_evidence  
4747    
4748  usage: @evidence = $fig->coupling_evidence($peg1,$peg2)  usage: @evidence = $fig->coupling_evidence($peg1,$peg2)
4749    
# Line 4806  Line 4777 
4777      return @$relational_db_response;      return @$relational_db_response;
4778  }  }
4779    
4780  =pod  =head3 coupling_and_evidence
   
 =head1 coupling_and_evidence  
4781    
4782  usage: @coupling_data = $fig->coupling_and_evidence($fid,$bound,$sim_cutoff,$coupling_cutoff,$keep_record)  usage: @coupling_data = $fig->coupling_and_evidence($fid,$bound,$sim_cutoff,$coupling_cutoff,$keep_record)
4783    
# Line 5462  Line 5431 
5431  Note that no annotation is written.  This should normally be done in a separate  Note that no annotation is written.  This should normally be done in a separate
5432  call of the form  call of the form
5433    
5434    ????
5435    
5436  =cut  =cut
5437    
# Line 7306  Line 7275 
7275                load_attributes                load_attributes
7276                load_bbhs                load_bbhs
7277                load_literature                load_literature
7278                  load_couplings
7279             );             );
7280    
7281      push(@packages, "pegs_in_conflict | peg_to_subsystems > $FIG_Config::global/conflicted.pegs");      push(@packages, "pegs_in_conflict | peg_to_subsystems > $FIG_Config::global/conflicted.pegs");
# Line 8034  Line 8004 
8004    
8005  =head3 roles_of_function  =head3 roles_of_function
8006    
8007  usage: @roles = $fig->roles_of_function($func)  C<< my @roles = $fig->roles_of_function($func); >>
8008    
8009    Returns a list of the functional roles implemented by the specified function. This method
8010    parses the role data out of the function name, and does not require access to the database.
8011    
8012    =over 4
8013    
8014    =item func
8015    
8016  Returns a list of the functional roles implemented by $func.  Name of the function whose roles are to be parsed out.
8017    
8018    =item RETURN
8019    
8020    Returns a list of the roles performed by the specified function.
8021    
8022    =back
8023    
8024  =cut  =cut
8025    

Legend:
Removed from v.1.297  
changed lines
  Added in v.1.298

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3