[Bio] / FigKernelPackages / FIG.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/FIG.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.394, Wed Nov 23 17:49:12 2005 UTC revision 1.395, Wed Nov 23 20:34:59 2005 UTC
# Line 7883  Line 7883 
7883   get_values   get_values
7884   guess_value_format   guess_value_format
7885    
7886  By default all keys are case sensitive, and all keys have leading and trailing white space removed. Keys can not contain any of the following characters: space, tab, newline, @$!#%^&*()`~{}[]|\:;"'<>?,./ A method (clean_keys) is called for removing these characters from keys.  By default all keys are case sensitive, and all keys have leading and trailing white space removed. Keys can not contain anything but [a-zA-Z0-9_] (or things matched by \w)
7887    
7888  Attributes are not on a 1:1 correlation, so a single key can have several values.  Attributes are not on a 1:1 correlation, so a single key can have several values.
7889    
# Line 7934  Line 7934 
7934  You can request any PIRSF key like this  You can request any PIRSF key like this
7935  $fig->get_attributes(undef, 'PIRSF');  $fig->get_attributes(undef, 'PIRSF');
7936    
7937  You can request any google url like this  You can request any url like this
7938  $fig->get_attributes(undef, undef, undef, 'http://www.google.com');  $fig->get_attributes(undef, undef, undef, 'http://pir.georgetown.edu/sfcs-cgi/new/pirclassif.pl?id=SF001547');
7939    
7940  NOTE: If there are no attributes an empty array will be returned. You need to check for this and not assume that it will be undef.  NOTE: If there are no attributes an empty array will be returned. You need to check for this and not assume that it will be undef.
7941    
7942  =cut  =cut
7943    
 sub feature_attributes {  
     my $self=shift;  
     # RAE. Since I was in a changing mood, I renamed this from feature_attributes to get_attributes (we have genomes, now too).  
     # however I left this in here so as not to break things.  
     return $self->get_attributes(@_);  
 }  
7944    
7945  sub get_attributes {  sub get_attributes {
     # Esoteric question: get_attribute or get_attributes. Everything below (add/change/delete) is singular, but this does make  
     # more sense in the plural. Oh well, I could do both.  
   
7946      my($self,@request) = @_;      my($self,@request) = @_;
7947      my($rdbH,$relational_db_response);      my($rdbH,$relational_db_response);
7948    
# Line 7961  Line 7952 
7952      $rdbH = $self->db_handle;      $rdbH = $self->db_handle;
7953      return () unless ($rdbH);      return () unless ($rdbH);
7954    
7955      # build the SQL statement depending on what we have      # An error check to make sure that we are operating on the new version of attributes
7956      my @col=('fid','tag','val','url');      # If we are not, we will print an error and then return. Otherwise continue
7957      my $select = "SELECT DISTINCT fid,tag,val,url FROM attribute ";      eval {$rdbH->SQL("SELECT genome,ftype,id,key,val,url FROM attribute LIMIT 1")};
7958      my $first=1;      if ($@) {print STDERR "Please rerun load_attributes to install the newest set of attributes\n"; return []}
7959      for (my $i=0; $i<=3; $i++)  
7960      {      # columns are now genome, ftype, id, key, val, url
7961       next if (!$request[$i]);      # here we generate the select statement based on what is in the request. Only add those fields we need.
7962       if ($first) {      # we add the where conditional to the @where array and the value for that conditional to the @values array
7963        $select .= "WHERE ( ";      # and then join the @where into the select statement. The @values is provided to the SQL statement to merge
7964        undef($first);      my @where; my @values;
7965       }      if ($request[0]) {push @where, qw[genome ftype id]; push @values, $self->split_attribute_oid($request[0])}
7966       else {      if ($request[1]) {push @where, "key"; push @values, $request[1]}
7967        $select .= " AND ";      if ($request[2]) {push @where, "val"; push @values, $request[2]}
7968       }      if ($request[3]) {push @where, "url"; push @values, $request[3]}
7969       $select .= $col[$i] . " = \'" . $request[$i] . "\' ";  
7970        my $select = "SELECT genome,ftype,id,key,val,url FROM attribute where (".join(" = ? and ", @where)." = ?)";
7971        my $res=$rdbH->SQL($select, undef, @values);
7972    
7973        # the following line takes the first 3 elements from each array and puts them back
7974        # to be a feature or genome using join_attribute_oid and then puts them back in the array.
7975        map {unshift @$_, $self->join_attribute_oid(splice(@$_, 0, 3))} @$res;
7976        return @{$res};
7977      }      }
     if (!$first) {$select .= " )"}  
     return @{$rdbH->SQL($select)};  
 }  
   
   
 # RAE I moved this here as it made no sense where it was. Keeping all the stuff together.  
 # but then I went and renamed it :)  
7978    
7979  sub replace_peg_key_value {  sub replace_peg_key_value {
7980      my $self=shift;      my $self=shift;
# Line 7991  Line 7982 
7982      # but the name is more consistent with the other routines that I am adding (add, change, delete)      # but the name is more consistent with the other routines that I am adding (add, change, delete)
7983      # we should have some verbose switch like $self->{'verbose'} to allow warning of things like this that users shouldn't see      # we should have some verbose switch like $self->{'verbose'} to allow warning of things like this that users shouldn't see
7984    
7985      #if ($self->{'verbose'}) {print STDERR "replace_peg_key_value has been deprecated and changed to change_attributes\n"}      print STDERR "replace_peg_key_value has been deprecated and changed to change_attributes\n";
7986      return $self->change_attribute(@_);      return $self->change_attribute(@_);
7987  }  }
7988    
7989    
 # RAE Style: There are two (or more?) ways that I could have coded this. I though about making another subroutine with the SQL and file calls,  
 # and could do so easily, but I opted for the copy paste method which makes the code longer but probably (?) clearer.  
   
7990  =head3 add_attribute  =head3 add_attribute
7991    
7992  Add a new key/value pair to something. Something can be a genome id, a peg, an rna, prophage, whatever.  Add a new key/value pair to something. Something can be a genome id, a peg, an rna, prophage, whatever.
# Line 8011  Line 7999 
7999          optional URL to add          optional URL to add
8000          boolean to prevent writing to the transaction log. See above          boolean to prevent writing to the transaction log. See above
8001    
   
8002  =cut  =cut
8003    
   
   
 sub search_index_by_attribute {  
     # supports search_index method by finding attributes via the attribute table in  
     # the database rather than via glimpse indexes.  This will go away with Bobs  
     # migration to Lucene, but for now we've been asked to give immediate search  
     # capability on attributes without rerunning index building.  
     #  
     # return array of (peg, org, aliasList, function) where we'll set aliasList to  
     # the value of the alias and leave function blank.  
     #  
     # now case _in_sensitive  
     #  
   
     my($self,$searchTerm)=@_;  
     return unless( $searchTerm);  
     my $rdbH = $self->db_handle;  
   
     my $theTerm = uc( $searchTerm );  
     my $relational_db_response=$rdbH->SQL("SELECT fid,tag, val from attribute WHERE UPPER(tag) LIKE '%$theTerm%' OR UPPER(val) LIKE '%$theTerm%'");  
   
     my @results;  
     foreach my $res (@$relational_db_response) {  
         my ($fid, $tag, $value)=@$res;  
         $tag =~ s/^\s+//; $tag =~ s/\s+$//;  
         my $org = $self->genus_species( $self->genome_of($fid) );  
         push (@results, [$fid, $org, "[attribute $tag] $value", ""] );  
     }  
     return @results;  
 }  
   
   
 sub find_by_attribute {  
     # search by substrings in attribute values or attribute tags.  
     # This might replace the present search-for-attributes that works by  
     # glimpse.  The problem with the present approach is that you can't  
     # search until you rebuild indices with make_attribute_index  
     #  
   
     my($self,$searchTerm)=@_;  
     return unless( $searchTerm);  
     my $rdbH = $self->db_handle;  
   
     my $relational_db_response=$rdbH->SQL("SELECT fid,tag, val from attribute WHERE tag LIKE '%$searchTerm%' OR val LIKE '%$searchTerm%'");  
     my @results;  
   
     foreach my $res (@$relational_db_response) {  
         my ($fid, $tag, $value)=@$res;  
         $tag =~ s/^\s+//; $tag =~ s/\s+$//;  
         push (@results, [$fid, $tag, $value]);  
     }  
     return @results;  
 }  
   
   
   
8004  sub add_attribute {  sub add_attribute {
8005      my($self,$peg,$k,$v, $url, $notl) = @_;      my($self,$peg,$k,$v, $url, $notl) = @_;
8006      return unless ($peg && $k); # we must have at least a peg and a tag to add (though other things can be undef)      return unless ($peg && $k); # we must have at least a peg and a tag to add (though other things can be undef)
8007      $k =  $self->clean_attribute_key($k);      $k =  $self->clean_attribute_key($k);
8008      my $rdbH = $self->db_handle;      my $rdbH = $self->db_handle;
8009      $rdbH->SQL("INSERT INTO attribute ( fid,tag,val,url ) VALUES ( ?,?,?,?)", undef, $peg,$k,$v,$url);  
8010        # An error check to make sure that we are operating on the new version of attributes
8011        # If we are not, we will print an error and then return. Otherwise continue
8012        eval {$rdbH->SQL("SELECT genome,ftype,id,key,val,url FROM attribute LIMIT 1")};
8013        if ($@) {print STDERR "Please rerun load_attributes to install the newest set of attributes\n"; return []}
8014    
8015        # split the peg/feature/genome into pieces and parts
8016        $rdbH->SQL("INSERT INTO attribute ( genome,ftype,id,key,val,url ) VALUES ( ?,?,?,?,?,?)", undef, $self->split_attribute_oid($peg),$k,$v,$url);
8017      my $location=$self->attribute_location($peg);      my $location=$self->attribute_location($peg);
8018      &verify_dir("$location");      &verify_dir("$location");
8019      if (!$notl && open(TMPATTR,">>$location/transaction_log"))      if (!$notl && open(TMPATTR,">>$location/transaction_log"))
# Line 8143  Line 8081 
8081      return (0) unless ($peg && $k); # we must have at least a peg and a key.      return (0) unless ($peg && $k); # we must have at least a peg and a key.
8082      $k =  $self->clean_attribute_key($k);      $k =  $self->clean_attribute_key($k);
8083      my $rdbH = $self->db_handle;      my $rdbH = $self->db_handle;
8084    
8085        # An error check to make sure that we are operating on the new version of attributes
8086        # If we are not, we will print an error and then return. Otherwise continue
8087        eval {$rdbH->SQL("SELECT genome,ftype,id,key,val,url FROM attribute LIMIT 1")};
8088        if ($@) {print STDERR "Please rerun load_attributes to install the newest set of attributes\n"; return []}
8089    
8090      # Build the delete statement "@boundValues" will be the values replacing the      # Build the delete statement "@boundValues" will be the values replacing the
8091      # parameter marks.      # parameter marks.
8092      my @boundValues = ($peg, $k);      my @boundValues = ($self->split_attribute_oid($peg), $k);
8093      my $exc="DELETE FROM attribute WHERE fid = ? and tag = ?";      my $exc="DELETE FROM attribute WHERE genome = ? and ftype = ? and id = ? and key = ? ";
8094      if ($oldval) {      if ($oldval) {
8095          $exc .= " and val = ?";          $exc .= " and val = ?";
8096          push @boundValues, $oldval;          push @boundValues, $oldval;
# Line 8157  Line 8101 
8101      }      }
8102      $rdbH->SQL($exc, undef, @boundValues);      $rdbH->SQL($exc, undef, @boundValues);
8103      if (defined $newval) {      if (defined $newval) {
8104          $exc = "INSERT INTO attribute ( fid,tag,val,url ) VALUES ( ?,?,?,? )";          $exc = "INSERT INTO attribute (  genome,ftype,id,key,val,url ) VALUES ( ?,?,?,?,?,? )";
8105          $rdbH->SQL($exc, undef, $peg, $k, $newval, $newurl);          $rdbH->SQL($exc, undef, $self->split_attribute_oid($peg), $k, $newval, $newurl);
8106          # write to the transaction log if we add a new value (writing deletes is handled above)          # write to the transaction log if we add a new value (writing deletes is handled above)
8107          my $location = $self->attribute_location($peg);          my $location = $self->attribute_location($peg);
8108          &verify_dir("$location");          &verify_dir("$location");
# Line 8188  Line 8132 
8132  sub clean_attribute_key {  sub clean_attribute_key {
8133   my ($self, $key)=@_;   my ($self, $key)=@_;
8134   #$key =~ s/[\s\n\t\$\@\/\\\Q!#%^&*()`~{}[]|:;"'<>?,.\E]//g; # the \Q .. \E just allows not escaping all the intermediate metacharacters   #$key =~ s/[\s\n\t\$\@\/\\\Q!#%^&*()`~{}[]|:;"'<>?,.\E]//g; # the \Q .. \E just allows not escaping all the intermediate metacharacters
8135     my $old = $key;
8136     $key =~ s/\s+/\_/g;
8137     $key =~ s/\-/\_/g;
8138   $key =~ s/\W//g;   $key =~ s/\W//g;
8139     $key =~ s/\_+/\_/g;
8140     unless ($old eq $key)
8141     {
8142      print STDERR "The key $old had illegal characters removed and is now $key. The only characters allowed in keys are [a-zA-Z0-9_]\n";
8143     }
8144   return $key;   return $key;
8145  }  }
8146    
8147    
8148    =head2 Splitting and Joining Attributes "oids"
8149    
8150    There was a big problem with attributes being very slow to recover, and having to recover all attributes just to get those for a peg or a genome. The current implementation splits the original ID (oid) into three columns, genome, ftype, and id. The ftype is peg, rna, pp, etc. The id is the feature number. The genome is the genome number.
8151    
8152    Hence:
8153    fig|83333.1.peg.1345 becomes 83333.1, peg, and 1345
8154    83333.1 becomes 83333.1, '', and ''
8155    
8156    To split an oid into an array with three parts:
8157            $self->split_attribute_oid($peg);
8158    
8159    To join the three parts of a series of results:
8160    map {unshift @$_, $self->join_attribute_oid(splice(@$_, 0, 3))} @$res;
8161    
8162    This code splices the first three elements of the the array, joins them, and then unshifts the result of that join back into the start of the array. Cool, eh?
8163    
8164  =head3 split_attribute_oid()  =head3 split_attribute_oid()
8165    
8166  use my ($genome, $type, $id)=split_attribute_feature($id);  use my ($genome, $type, $id)=split_attribute_feature($id);
# Line 8219  Line 8188 
8188   }   }
8189  }  }
8190    
8191    =head3  join_attribute_oid()
8192    
8193    use my $id=join_attribute_oid($genome, $feature, $id);
8194    
8195    Joins an attribute back together after it has been pulled from the mysql database
8196    
8197    =cut
8198    
8199    sub join_attribute_oid {
8200     my ($self, @parts)=@_;
8201     if ($parts[0] =~ /^\d+\.\d+$/ && $parts[1] =~ /^\w+$/ && $parts[2] =~ /^\d+$/)
8202     {
8203      # it is a feature ID
8204      return "fig|$parts[0].$parts[1].$parts[2]";
8205     }
8206     elsif ($parts[0] =~ /^\d+\.\d+$/ && !($parts[1] && $parts[2]))
8207     {
8208      # it is a genome
8209      return $parts[0];
8210     }
8211     else
8212     {
8213      print STDERR "Not sure what ", join("", @parts), " should map to, it doesn't appear to be a valid feature or genome\n";
8214      return join("", @parts);
8215     }
8216    }
8217    
8218  =head3 read_attribute_transaction_log  =head3 read_attribute_transaction_log
8219    
# Line 8277  Line 8272 
8272    
8273   my %path_to_files; # this hash has the path as the key and the genome id as the value   my %path_to_files; # this hash has the path as the key and the genome id as the value
8274    
8275   # get all the tags we know about   # first, find all the features with our attribute
8276   my $tags=$self->get_keys();   foreach my $attributes ($self->get_attributes(undef, $attr))
  foreach my $type (keys %$tags)  
8277   {   {
8278    foreach my $label (keys %{$tags->{$type}})     unless ($attributes->[1] eq $attr)
8279    {    {
8280     next unless ($label eq $attr);      print STDERR "Warning : expected to erase $attr but we retrieved ", $attributes->[1], "\n";
8281     foreach my $peg (@{$tags->{$type}->{$label}})      next;
    {  
     # delete the attribute from the database  
     my $rdbH = $self->db_handle;  
     $rdbH->SQL("DELETE FROM attribute WHERE fid = \'$peg\' and tag = \'$label\'");  
     # make a hash of all files that we should delete, and then when we are done we can go move them  
     $path_to_files{$self->attribute_location($peg)}=$self->genome_of($peg);  
    }  
8282    }    }
8283       $self->delete_attribute(@$attributes, 1);
8284       $path_to_files{$self->attribute_location($attributes->[0])}=$self->genome_of($attributes->[0]);
8285   }   }
8286    
8287   # now we need to check that we have the files to delete   # now we need to check that we have the files to delete
# Line 8306  Line 8295 
8295    
8296   return 1 unless (scalar @files); # don't continue if there are no files to move   return 1 unless (scalar @files); # don't continue if there are no files to move
8297    
8298   `mkdir -p $FIG_Config::temp/Attributes/deleted_attributes, 0755` unless (-e "$FIG_Config::temp/Attributes/deleted_attributes");   $self->verify_dir("$FIG_Config::temp/Attributes/deleted_attributes");
  unless (-e "$FIG_Config::temp/Attributes/deleted_attributes") {  
    die "directory structure not constructed. wanted:\n$FIG_Config::temp/Attributes/deleted_attributes\nCan't proceed";  
  }  
8299    
8300   foreach my $path (@files)   foreach my $path (@files)
8301   {   {
# Line 8332  Line 8318 
8318   return scalar @files;   return scalar @files;
8319  }  }
8320    
8321    =head2 Methods to get keys and values
8322    
8323  =head3 get_keys  I have added several methods to just get keys and values. Each of the methods has "distinct" in the name when appopriate. This is both to distinguish these methods from the older methods with similar names and to act as a reminder that the methods just return distinct values.
8324    
8325  Get all the keys that we know about.  =head3 get_distinct_keys
   
 Without any arguments:  
   
 Returns a reference to a hash, where the key is the type of feature (peg, genome, rna, prophage, etc), and the value is a reference to a hash where the key is the key name and the value is a reference to an array of all features with that id.  
   
 e.g.  
8326    
8327  print "There are  " , scalar @{{$fig->get_keys}->{'peg'}->{'PIRSF'}}, " PIRSF keys in the database\n";  Get all the keys and only the keys. This takes two optional arguments, ftype and id.
   
 my $keys=$fig->get_keys;  
 foreach my $type (keys %$keys)  
 {  
  foreach my $label (keys %{$keys->{$type}})  
  {  
   foreach my $peg (@{$keys->{$type}->{$label}})  
   {  
     .. do something to each peg and genome here  
   }  
  }  
 }  
   
 With an argument (that should be a recognized type like peg, rna, genome, etc):  
   
 Returns a reference to a hash where the key is the key name and the value is the reference to the array. This should use less memory than above.  
 The argument should be (currently) peg, rna, pp, genome, or any other recognized feature type (generally defined as the .peg. in the fid). The default is to return all keys, and this can also be specified with all  
8328    
8329  =cut  =cut
8330    
8331  sub get_tags {  sub get_distinct_ftype_keys {
8332   my $self=shift @_;   my($self, $ftype, $id)=@_;
  # deprecated method replaced with get_keys  
  return $self->get_keys(@_);  
 }  
   
 sub get_keys {  
  my($self, $want)=@_;  
  unless ($want) {$want = "all"}  
8333   my $rdbH = $self->db_handle;   my $rdbH = $self->db_handle;
  my $relational_db_response=$rdbH->SQL("SELECT fid,tag from attribute");  
  my $tags;  
  foreach my $res (@$relational_db_response) {  
   my ($fid, $tag)=@$res;  
   $tag =~ s/^\s+//; $tag =~ s/\s+$//;  
   my $type=$self->ftype($fid);  
   if ($type && ($want eq $type || $want eq "all")) {  
    push (@{$tags->{$type}->{$tag}}, $fid);  
   } elsif (($fid =~ /^\d+\.\d+$/) && (lc($want) eq "genome" || $want eq "all")) {  
    push (@{$tags->{'genome'}->{$tag}}, $fid);  
   }  
  }  
  if ($want eq "all") {return $tags} else {return $tags->{$want}}  
 }  
8334    
8335  =head2 get_all_keys()   # An error check to make sure that we are operating on the new version of attributes
8336     # If we are not, we will print an error and then return. Otherwise continue
8337     eval {$rdbH->SQL("SELECT genome,ftype,id,key,val,url FROM attribute LIMIT 1")};
8338     if ($@) {print STDERR "Please rerun load_attributes to install the newest set of attributes\n"; return []}
8339    
8340  Just get all the keys and return them. No processing involved.   return @{$rdbH->SQL("SELECT DISTINCT key from attribute where (genome is not null and ftype = ? and id = ?)", undef, $ftype, $id)};
8341    }
8342    
8343  =cut  =head3 key_info
8344    
8345  sub get_all_keys {  Access a hash of key information. The data that are returned are currently:
  my($self)=@_;  
  my $rdbH = $self->db_handle;  
  return @{$rdbH->SQL("SELECT fid,tag from attribute")};  
 }  
8346    
8347  =head3 get_values  hash key name           what is it                      data type
8348    single                                                  [boolean]
8349    description             Explanation of key              [free text]
8350    readonly                whether to allow read/write     [boolean]
8351    is_cv                   attribute is a cv term          [boolean]
8352    
8353  Get all the values that we know about  Single is a boolean, if it is true only the last value returned should be used. Note that the other methods willl still return all the values, it is upto the implementer to ensure that only the last value is used.
8354    
8355  Without any arguments:  Explanation is a user-derived explanation that can be free text
8356    
8357  Returns a reference to a hash, where the key is the type of feature (peg, genome, rna, prophage, etc), and the value is a reference to a hash where the key is the value and the value is the number of occurences  If a reference to a hash is provided, along with the key, those values will be set to the attribute_keys file
8358    
8359    Returns an empty hash if the key is not provieded or doesn't exist
8360    
8361  e.g. print "There are  " , {$fig->get_values}->{'peg'}->{'100'}, " keys with the value 100 in  the database\n";  e.g.
8362    $fig->key_info($key, \%data); # set the data
8363    $data=$fig->key_info($key); # get the data
8364    
8365  With a single argument:  This data is stored in a file called $FIG_Config::global/Attributes/attribute_metadata and in a database called attribute_metadata. The data is strictly on a last in last out basis, so that if a datapoint is changed, the last datapoint in the database or file is returned. At the moment I am not coding the ability to edit data.
8366    
8367  The argument is assumed to be the type (rna, peg, genome, etc).  The method takes the following arguments
8368    
8369  With two arguments:  =over 4
8370    
8371  The first argument is the type (rna, peg, genome, etc), and the second argument is the key.  =item key
8372    
8373  In each case it will return a reference to a hash.  The key to look for or add data to.
8374    
8375  E.g.  =item $data
8376    
8377          $fig->get_values(); # will get all values  A reference to a hash containing the new data to add to the database. If provided this will cause the database to be updated
8378    
8379          $fig->get_values('peg'); # will get all values for pegs  =item $nowrite
8380    
8381          $fig->get_values('peg', 'pirsf'); # will get all values for pegs with attribute pirsf  Do not write the new data to the attributes_metadata file. This is mainly used by load_attributes to prevent a circular read/write condition.
8382    
8383          $fig->get_values(undef, 'pirsf'); # will get all values for anything with that attribute  =back
8384    
8385  =cut  =cut
8386    
8387  sub get_values {  sub key_info {
8388   my ($self, $want, $tag)=@_;   my ($self, $key, $data, $nowrite)=@_;
8389   unless ($want) {$want="all"}   return unless ($key);
8390     $key =  $self->clean_attribute_key($key);
8391   my $rdbH = $self->db_handle;   my $rdbH = $self->db_handle;
  $tag =~ s/^\s+//; $tag =~ s/\s+$//; $tag=uc($tag);  
   
  my $sql="SELECT fid,val FROM attribute";  
  if ($tag) {$sql .= " WHERE tag = \'$tag\'"}  
8392    
8393   my $relational_db_response=$rdbH->SQL($sql);   # An error check to make sure that we are operating on the new version of attributes
8394     # If we are not, we will print an error and then return. Otherwise continue
8395     eval {$rdbH->SQL("SELECT genome,ftype,id,key,val,url FROM attribute LIMIT 1")};
8396     if ($@) {print STDERR "Please rerun load_attributes to install the newest set of attributes\n"; return []}
8397    
8398   my $tags;   unless ($data)
8399   foreach my $res (@$relational_db_response) {   {
8400    my ($fid, $val)=@$res;    # we can just return the info right away
8401    my $type=$self->ftype($fid);    return $self->{'key_info'}->{$key} if ($self->{'key_info'}->{$key});
8402    if ($type && ($want eq $type || $want eq "all")) {    my $res=$rdbH->SQL("SELECT  metakey, metaval from attribute_metadata where attrkey = ?", undef, $key);
8403     $tags->{$type}->{$val}++;    foreach my $result (@$res)
8404    } elsif (($fid =~ /^\d+\.\d+$/) && (lc($want) eq "genome" || $want eq "all")) {    {
8405     $tags->{'genome'}->{$val}++;     $self->{'key_info'}->{$key}->{$result->[0]}=$result->[1];
   }  
8406   }   }
8407   if ($want eq "all") {return $tags} else {return $tags->{$want}}    return $self->{'key_info'}->{$key};
8408  }  }
8409    
8410  =head3 key_info   # there is new data to add
8411     # first, check if we have an old style attributes file and update it. eventually we should be able to delete this line.
8412     if (-e "$FIG_Config::global/Attributes/attribute_keys") {$self->update_attributes_metadata("$FIG_Config::global/Attributes/attribute_keys")}
8413    
8414  Access a hash of key information. The data that are returned are:   # now append the new data to the attributes_metadata file
8415     unless ($nowrite)
8416     {
8417      open (OUT, ">>$FIG_Config::global/Attributes/attribute_metadata") || die "Can't append to $FIG_Config::global/Attributes/attribute_metadata";
8418     }
8419     foreach my $datum (keys %$data)
8420     {
8421      unless (defined $data->{$datum}) {$data->{$datum}='true'} # just make it true so that it exists
8422      unless ($nowrite) {print OUT "$key\t$datum\t", $data->{$datum}, "\n"}
8423    
8424  hash key name           what is it                      data type    $rdbH->SQL("INSERT INTO attribute_metadata (attrkey, metakey, metaval) VALUES (?,?,?) ", undef, $key, $datum, $data->{$datum});
8425  single                                                  [boolean]   }
8426  description             Explanation of key              [free text]   unless ($nowrite) {close OUT}
8427  readonly                whether to allow read/write     [boolean]   my $res=$rdbH->SQL("SELECT  metakey, metaval from attribute_metadata where attrkey = ?", undef, $key);
8428  is_cv                   attribute is a cv term          [boolean]   foreach my $result ($res)
8429     {
8430      $self->{'key_info'}->{$key}->{$result->[0]}=$result->[1];
8431     }
8432     return $self->{'key_info'}->{$key};
8433    }
8434    
8435  Single is a boolean, if it is true only the last value returned should be used. Note that the other methods willl still return all the values, it is upto the implementer to ensure that only the last value is used.  =head3 update_attributes_metadata()
8436    
8437  Explanation is a user-derived explanation that can be free text  This method exists solely to update the attributes metadata file and make sure that it is in the right format.
8438    This method can probably be deleted in a while, but it needs to be run on all machines with attributes data before then!
8439    
8440  If a reference to a hash is provided, along with the key, those values will be set to the attribute_keys file  It is only called if an old attributes metadata file is found.
8441    
8442  Returns an empty hash if the key is not provieded or doesn't exist  The method returns the filename where the data is now stored.
   
 e.g.  
 $fig->key_info($key, \%data); # set the data  
 $data=$fig->key_info($key); # get the data  
8443    
8444  =cut  =cut
8445    
 sub key_info {  
  my ($self, $key, $data)=@_;  
  return unless ($key);  
  $key =  $self->clean_attribute_key($key);  
  return $self->{'key_info'}->{$key} if (!$data && $self->{'key_info'}->{$key});  
8446    
8447    sub update_attributes_metadata {
8448     my ($self, $file)=@_;
8449   my $version=1;   my $version=1;
8450   my $attr = {};   my $attr;
8451   if (-e "$FIG_Config::global/Attributes/attribute_keys") {   open(IN, $file) || die "Can't open $file for reading";
   open(IN, "$FIG_Config::global/Attributes/attribute_keys") || die "Can't open $FIG_Config::global/Attributes/attribute_keys although it exists";  
8452    while (<IN>) {    while (<IN>) {
8453    if (/^\#\s*Version\s*(\d+)/) {$version=$1}    if (/^\#\s*Version\s*(\d+)/) {$version=$1}
8454     next if (/^\s*\#/);     next if (/^\s*\#/);
# Line 8499  Line 8457 
8457     my @a=split /\t/;     my @a=split /\t/;
8458     # fix old versions of attribute_keys     # fix old versions of attribute_keys
8459     if ($version==1) {$attr->{$a[0]}->{'single'}=$a[1]; $attr->{$a[0]}->{'description'}=$a[2]; next}     if ($version==1) {$attr->{$a[0]}->{'single'}=$a[1]; $attr->{$a[0]}->{'description'}=$a[2]; next}
   
8460     $attr->{$a[0]}->{$a[1]}=$a[2];     $attr->{$a[0]}->{$a[1]}=$a[2];
8461    }    }
8462    close IN;    close IN;
8463   }   unlink($file);
8464    
8465   if ($data)   my $rdbH = $self->db_handle;
8466   {  
8467    $attr->{$key}=$data;   # An error check to make sure that we are operating on the new version of attributes
8468    open(OUT, ">$FIG_Config::global/Attributes/attribute_keys") || die "Can't open $FIG_Config::global/Attributes/attribute_keys for writing";   # If we are not, we will print an error and then return. Otherwise continue
8469     eval {$rdbH->SQL("SELECT genome,ftype,id,key,val,url FROM attribute LIMIT 1")};
8470     if ($@) {print STDERR "Please rerun load_attributes to install the newest set of attributes\n"; return []}
8471    
8472     unless (-e "$FIG_Config::global/Attributes/attribute_metadata")
8473     {
8474      open (OUT, ">$FIG_Config::global/Attributes/attribute_metadata") || die "Can't open $FIG_Config::global/Attributes/attribute_metadata";
8475     print OUT "# Version 2\n# This file contains information about the attribute keys in this database. The columns are:\n";     print OUT "# Version 2\n# This file contains information about the attribute keys in this database. The columns are:\n";
8476     print OUT "# attribute key\n# tag associated for that key\n# value of that tag\n";     print OUT "# attribute key\n# tag associated for that key\n# value of that tag\n";
8477     print OUT "# Each attribute key can have as many of these as you want. The last one in the file will be used. This is used to store data applicable to\n";     print OUT "# Each attribute key can have as many of these as you want. The last one in the file will be used. This is used to store data applicable to\n";
8478     print OUT "# every key in the attributes\n";     print OUT "# every key in the attributes\n";
8479      close OUT;
8480     }
8481    
8482    #whatever this does, it wasn't quite right, so we wrote in enlgish,  below.   open (OUT, ">>$FIG_Config::global/Attributes/attribute_metadata") || die "Can't open $FIG_Config::global/Attributes/attribute_metadata";
   #map {my $k=$_; map {print OUT "$k\t$_\t", $attr->{$k}->{$_}, "\n"} keys %{$attr->{$k}}} keys %$attr;  
   
8483    foreach my $keyName (keys %$attr) {    foreach my $keyName (keys %$attr) {
8484        foreach my $attrName (keys %{$attr->{$keyName}} ) {        foreach my $attrName (keys %{$attr->{$keyName}} ) {
8485            print OUT "$keyName\t$attrName\t$attr->{$keyName}->{$attrName}\n";     unless (defined $attr->{$keyName}->{$attrName}) {$attr->{$keyName}->{$attrName}=1}
8486       print OUT "$keyName\t$attrName\t", $attr->{$keyName}->{$attrName}, "\n";
8487       my $res=$rdbH->SQL("INSERT INTO attribute_metadata (attrkey, metakey, metaval) VALUES (?,?,?)", undef, $keyName, $attrName, $attr->{$keyName}->{$attrName});
8488        }        }
8489    }    }
   
8490    close OUT;    close OUT;
8491     return "$FIG_Config::global/Attributes/attribute_metadata";
8492   }   }
8493    
  $self->{'key_info'}=$attr;  
  if (exists $attr->{$key}) {return $attr->{$key}} else {return}  
   
 }  
   
 =head3 get_key_value  
   
 Given a key and a value will return anything that has both  
   
 E.g.  
   
         my @nonmotile_genomes = $fig->get_key_value('motile', 'non-motile');  
         my @bluepegs          = $fig->get_key_value('color', 'blue');  
   
 If either the key or the value is ommitted will return all the matching sets.  
   
 =cut  
   
 sub get_tag_value {  
  # deprecated to get_key_value  
  my $self=shift @_;  
  return $self->get_key_value(@_);  
 }  
   
 sub get_key_value {  
  my ($self, $key, $val) = @_;  
   
  $key =  $self->clean_attribute_key($key);  
   
  my $sql="SELECT fid,tag,val FROM attribute";  
  # AT THE MOMENT THIS IS NOT WORKING. Not sure why  
  # so I am going to do it in two stages.  
  #if ($key && $val) {$sql .= " WHERE ( tag = \'$key\' and val = \'$val\' )"}  
   
  if ($key && $val) {$sql .= " WHERE (tag = \'$key\' and val = \'$val\')"}  
  elsif ($key) {$sql .= " WHERE tag = \'".$key."\'"}  
  elsif ($val) {$sql .= " WHERE val = \'$val\'"}  
  else  
  {  
   # neither key nor value requested  
   warn("neither key nor value sent to get_key_value");  
   return();  
  }  
   
   
  my $rdbH = $self->db_handle;  
  my $relational_db_response=$rdbH->SQL($sql);  
   
  my @results;  
  map {push @results, $_->[0]} @$relational_db_response;  
  return @results;  
 }  
8494    
8495  =head3 guess_value_format  =head3 guess_value_format
8496    
# Line 8588  Line 8500 
8500   2. numbers   2. numbers
8501   3. percentiles ( a type of number, I know)   3. percentiles ( a type of number, I know)
8502    
   
8503  In these cases, I may want to know something about them and do something interesting with them. This will try and guess what the values are for a given key so that you can try and limit what people add. At the moment this is pure guess work, although I suppose we could put some restrictions on t/v pairs I don't feel like.  In these cases, I may want to know something about them and do something interesting with them. This will try and guess what the values are for a given key so that you can try and limit what people add. At the moment this is pure guess work, although I suppose we could put some restrictions on t/v pairs I don't feel like.
8504    
8505  This method will return a reference to an array. If the element is a string there will only be one element in that array, the word "string". If the value is a number, there will be three elements, the word "float" in position 0, and then the minimum and maximum values. You can figure out if it is a percent :-)  This method will return a reference to an array. If the element is a string there will only be one element in that array, the word "string". If the value is a number, there will be three elements, the word "float" in position 0, and then the minimum and maximum values. You can figure out if it is a percent :-)
# Line 8752  Line 8663 
8663      return;      return;
8664  }  }
8665    
8666    sub search_index_by_attribute {
8667        # please don't put a method between its description and the method. Honor the docs that we have.
8668        # Please add pod for these methods, too.
8669    
8670        # supports search_index method by finding attributes via the attribute table in
8671        # the database rather than via glimpse indexes.  This will go away with Bobs
8672        # migration to Lucene, but for now we've been asked to give immediate search
8673        # capability on attributes without rerunning index building.
8674        #
8675        # return array of (peg, org, aliasList, function) where we'll set aliasList to
8676        # the value of the alias and leave function blank.
8677        #
8678        # now case _in_sensitive
8679        #
8680        my($self,$searchTerm)=@_;
8681        return unless( $searchTerm);
8682        my $rdbH = $self->db_handle;
8683    
8684        # An error check to make sure that we are operating on the new version of attributes
8685        # If we are not, we will print an error and then return. Otherwise continue
8686        eval {$rdbH->SQL("SELECT genome,ftype,id,key,val,url FROM attribute LIMIT 1")};
8687        if ($@) {print STDERR "Please rerun load_attributes to install the newest set of attributes\n"; return []}
8688    
8689        my $theTerm = uc( $searchTerm );
8690        my $relational_db_response=$rdbH->SQL("SELECT genome,ftype,id,tag,val from attribute WHERE UPPER(tag) LIKE '%$theTerm%' OR UPPER(val) LIKE '%$theTerm%'");
8691    
8692        my @results;
8693        foreach my $res (@$relational_db_response) {
8694            my ($genome,$ftype,$id, $tag, $value)=@$res;
8695            my $fid=$self->join_attribute_oid($genome,$ftype,$id);
8696            my $org = $self->genus_species( $self->genome_of($fid) );
8697            push (@results, [$fid, $org, "[attribute $tag] $value", ""] );
8698        }
8699        return @results;
8700    }
8701    
8702    
8703    sub find_by_attribute {
8704        # search by substrings in attribute values or attribute tags.
8705        # This might replace the present search-for-attributes that works by
8706        # glimpse.  The problem with the present approach is that you can't
8707        # search until you rebuild indices with make_attribute_index
8708        #
8709    
8710    
8711        my($self,$searchTerm)=@_;
8712        return unless( $searchTerm);
8713        my $rdbH = $self->db_handle;
8714    
8715        # An error check to make sure that we are operating on the new version of attributes
8716        # If we are not, we will print an error and then return. Otherwise continue
8717        eval {$rdbH->SQL("SELECT genome,ftype,id,key,val,url FROM attribute LIMIT 1")};
8718        if ($@) {print STDERR "Please rerun load_attributes to install the newest set of attributes\n"; return []}
8719    
8720        my $relational_db_response=$rdbH->SQL("SELECT genome,ftype,id,tag,val from attribute WHERE tag LIKE '%$searchTerm%' OR val LIKE '%$searchTerm%'");
8721        my @results;
8722    
8723        foreach my $res (@$relational_db_response) {
8724            my ($genome,$ftype,$id, $tag, $value)=@$res;
8725            my $fid=$self->join_attribute_oid($genome,$ftype,$id);
8726            push (@results, [$fid, $tag, $value]);
8727        }
8728        return @results;
8729    }
8730    
8731    
8732    
8733  =head3 search_cv_file  =head3 search_cv_file
8734    

Legend:
Removed from v.1.394  
changed lines
  Added in v.1.395

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3