[Bio] / FigKernelPackages / FigGFF.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/FigGFF.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.6, Sat Apr 9 17:21:15 2005 UTC revision 1.11, Sun Apr 24 16:33:26 2005 UTC
# Line 85  Line 85 
85  {  {
86      my($dbxref) = @_;      my($dbxref) = @_;
87    
88        # if it is not a valid xref just return it
89        return $dbxref unless (m/:/);
90    
91      my($type, $ref) = split(/:/, $dbxref, 2);      my($type, $ref) = split(/:/, $dbxref, 2);
92    
93      if ($type eq "NCBI_NP")      if ($type eq "NCBI_NP")
# Line 109  Line 112 
112          return "sp|$ref";          return "sp|$ref";
113      }      }
114    
115      return undef;      return $dbxref; # just return itself if we don't know what it is.
116  }  }
117    
118  package GFFWriter;  package GFFWriter;
# Line 614  Line 617 
617          }          }
618          elsif (/^\#\#FASTA/)          elsif (/^\#\#FASTA/)
619          {          {
620              print "Got fasta directive\n";              # print "Got fasta directive\n";
621              $_ = <$fh>;              $_ = <$fh>;
622              chomp;              chomp;
623              $self->parse_fasta($fh, $_);              $self->parse_fasta($fh, $_);
# Line 670  Line 673 
673      my($self, $directive, $rest) = @_;      my($self, $directive, $rest) = @_;
674    
675      $directive = lc($directive);      $directive = lc($directive);
676      my @rest=split /\t/, $rest;      # this should catch both #seed and ##seed :-)
677        if ($directive eq "seed")
     if ($directive eq "genome")  
     {  
         $self->current_file->genome_id($rest[0]);  
         $self->current_file->genome_name($rest[1]);  
     }  
     elsif ($directive eq "genome_md5")  
678      {      {
679          $self->current_file->set_genome_checksum(@rest[0,1]);        return $self->parse_seed_directive($rest);
680      }      }
681      elsif ($directive eq "origin")  
682        my @rest=split /\t/, $rest;
683    
684        # removed genome, genome_md5, origin, taxnomy as they are not real gff directives. These are in seed_directives below
685        if ($directive eq "project")
686      {      {
687          print STDERR "We have a directive called origin but this should be changed as it will conflict with NCBI's ORIGIN indicating beginning of the sequence\n";          # I am not sure if PROJECT is a seed directive or a GFF directive
         print STDERR "At the moment ORIGIN is returned by \$feat->project\n";  
688          $self->current_file->project($rest[0]);          $self->current_file->project($rest[0]);
689      }      }
     elsif ($directive eq "taxonomy")  
     {  
         $self->current_file->taxonomy($rest);  
     }  
690      elsif ($directive eq "sequence-region")      elsif ($directive eq "sequence-region")
691      {      {
692            $self->current_file->contigs($rest[0]);
693          $self->{contig_length_cache}->{$rest[0]}=$rest[2]-$rest[1];          $self->{contig_length_cache}->{$rest[0]}=$rest[2]-$rest[1];
694          $self->{contig_start_cache}->{$rest[0]}=$rest[1];          $self->{contig_start_cache}->{$rest[0]}=$rest[1];
695          $self->{contig_end_cache}->{$rest[0]}=$rest[2];          $self->{contig_end_cache}->{$rest[0]}=$rest[2];
696      }      }
697      else      else
698      {      {
699          print "Have gff3 directive '$directive' rest='$rest'\n";          print STDERR "Have gff3 directive '$directive' rest='$rest'\n";
700      }      }
701    
702  }  }
# Line 711  Line 708 
708      my($verb, @rest) = split(/\t/, $rest);      my($verb, @rest) = split(/\t/, $rest);
709    
710      # are we case sensitive? I don't think so      # are we case sensitive? I don't think so
711      $verb-lc($verb);      $verb=lc($verb);
712    
713      if ($verb eq "anno_start")      if ($verb eq "genome")
714        {
715            $self->current_file->genome_id($rest[0]);
716            $self->current_file->genome_name($rest[1]);
717        }
718        elsif ($verb eq "genome_md5")
719        {
720            $self->current_file->set_genome_checksum(@rest[0,1]);
721        }
722        elsif ($verb eq "project")
723        {
724            # I am not sure if PROJECT is a seed directive or a GFF directive
725            $self->current_file->project($rest[0]);
726        }
727        elsif ($verb eq "taxonomy")
728        {
729            $self->current_file->taxonomy($rest);
730        }
731        elsif ($verb eq "anno_start")
732      {      {
733          $self->current_file->anno_start($rest[0]);          $self->current_file->anno_start($rest[0]);
734      }      }
# Line 731  Line 746 
746  {  {
747      my($self, $directive, $rest) = @_;      my($self, $directive, $rest) = @_;
748    
749      print "Have local directive '$directive' rest='$rest'\n";      print STDERR "Have local directive '$directive' rest='$rest'\n";
750  }  }
751    
752  sub parse_feature  sub parse_feature
# Line 765  Line 780 
780    
781          my @values = map { uri_unescape($_) } split(/,/, $value);          my @values = map { uri_unescape($_) } split(/,/, $value);
782    
783            # handle the aliases
784            if ($name eq "Alias") {
785             foreach my $val (@values)
786             {
787               $val = FigGFF::map_dbxref_to_seed_alias($val);
788             }
789            }
790    
791          #          #
792          # This might be a little goofy for the users, but we will use it          # This might be a little goofy for the users, but we will use it
793          # for now:          # for now:
# Line 864  Line 887 
887      my($self, $id, $data) = @_;      my($self, $id, $data) = @_;
888    
889      my $len = length($data);      my $len = length($data);
890      $self->current_file->set_fasta_data($id, $data);      $self->current_file->fasta_data($id, $data);
891  }  }
892    
893  package GFFFeature;  package GFFFeature;
# Line 1023  Line 1046 
1046      $self->{contig_checksum}->{$genome}->{$contig} = $md5sum;      $self->{contig_checksum}->{$genome}->{$contig} = $md5sum;
1047  }  }
1048    
1049    =head2 fasta_data()
1050    
1051    Get or set the fasta data. Given an id and some data will set the data for that id. Given an id will return the data for that id. Called without arguments will return a reference to a hash of sequences.
1052    
1053    This means that if you give it an id and sequence it will return that sequence. Hmmm.
1054    
1055  sub set_fasta_data  =cut
1056    
1057    sub fasta_data
1058  {  {
1059      my($self, $id, $data) = @_;      my($self, $id, $data) = @_;
1060        $id && $data && ($self->{fasta_data}->{$id} = $data);
1061      $self->{fasta_data}->{$id} = $data;      $id && return $self->{fasta_data}->{$id};
1062        return $self->{fasta_data};
1063  }  }
1064    
1065    
# Line 1041  Line 1072 
1072  sub contigs  sub contigs
1073  {  {
1074      my($self, $contig) = @_;      my($self, $contig) = @_;
1075        if ($contig && $contig =~ /\w\w\_\d+\.\d+/) {
1076          print STDERR "WARNING: $contig appears to have a version number. We should standardize on timming that somewhere\n";
1077        }
1078      $contig && (push @{$self->{contigs}}, $contig);      $contig && (push @{$self->{contigs}}, $contig);
     if (!$self->{contigs} && $self->{contig_length_cache}) {$self->{contigs} = keys %{$self->{contig_length_cache}}}  
1079      return $self->{contigs};      return $self->{contigs};
1080  }  }
1081    

Legend:
Removed from v.1.6  
changed lines
  Added in v.1.11

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3