[Bio] / FigKernelPackages / FigGFF.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/FigGFF.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.8, Sat Apr 9 21:09:30 2005 UTC revision 1.19, Wed Nov 2 21:56:33 2005 UTC
# Line 12  Line 12 
12    
13  use base qw(Exporter);  use base qw(Exporter);
14  use vars qw(@EXPORT);  use vars qw(@EXPORT);
15  @EXPORT = qw(map_seed_alias_to_dbxref);  @EXPORT = qw(map_seed_alias_to_dbxref map_dbxref_to_seed_alias);
16    
17  #  #
18  # General GFF-related routines.  # General GFF-related routines.
# Line 57  Line 57 
57      {      {
58          return "NCBI_NP:$1";          return "NCBI_NP:$1";
59      }      }
60        elsif (/^NM_(\d+.*)/)
61        {
62            return "NCBI_NM:$1";
63        }
64      elsif (/^gi\|(\d+)/)      elsif (/^gi\|(\d+)/)
65      {      {
66          return "NCBI_gi:$1";          return "NCBI_gi:$1";
# Line 86  Line 90 
90      my($dbxref) = @_;      my($dbxref) = @_;
91    
92      # if it is not a valid xref just return it      # if it is not a valid xref just return it
93      return $dbxref unless (m/:/);      return $dbxref unless $dbxref =~ m/:/;
94    
95      my($type, $ref) = split(/:/, $dbxref, 2);      my($type, $ref) = split(/:/, $dbxref, 2);
96    
97      if ($type eq "NCBI_NP")      if (lc($type) eq "ncbi_np")
98        {
99            return "$ref";
100        }
101        elsif (lc($type) eq "ncbi_nm")
102        {
103            return "$ref";
104        }
105        elsif (lc($type) eq "ncbi_pid")
106      {      {
107          return "NP_$ref";          return "$ref";
108      }      }
109      elsif ($type eq "NCBI_gi")      elsif (lc($type) eq "ncbi_gi")
110      {      {
111          return "gi|$ref";          return "gi|$ref";
112      }      }
113      elsif ($type eq "KEGG")      elsif (lc($type) eq "kegg")
114      {      {
115          $ref =~ s/ /:/;          $ref =~ s/ /:/;
116          return "kegg|$ref";          return "kegg|$ref";
117      }      }
118      elsif ($type eq "UniProt")      elsif (lc($type) eq "uniprot")
119      {      {
120          return "uni|$ref";          return "uni|$ref";
121      }      }
122      elsif ($type eq "Swiss-Prot")      elsif (lc($type) eq "swiss-prot")
123      {      {
124          return "sp|$ref";          return "sp|$ref";
125      }      }
# Line 381  Line 393 
393                  chomp($addseq);                  chomp($addseq);
394                  $fastasequences .= ">$protein_id\n$addseq\n";                  $fastasequences .= ">$protein_id\n$addseq\n";
395    
396                  my $addseq = uc($fig->dna_seq($genome, @location));                  $addseq = uc($fig->dna_seq($genome, @location));
397                  $addseq =~ s/(.{$linelength})/$1\n/g; chomp($addseq);                  $addseq =~ s/(.{$linelength})/$1\n/g; chomp($addseq);
398    
399                  $fastasequences .= ">$cds_id\n$addseq\n";                  $fastasequences .= ">$cds_id\n$addseq\n";
# Line 605  Line 617 
617      while (<$fh>)      while (<$fh>)
618      {      {
619          chomp;          chomp;
620            next unless ($_); # ignore empty lines
621          #          #
622          # Check first for the fasta directive so we can run off and parse that          # Check first for the fasta directive so we can run off and parse that
623          # separately.          # separately.
# Line 617  Line 630 
630          }          }
631          elsif (/^\#\#FASTA/)          elsif (/^\#\#FASTA/)
632          {          {
633              print "Got fasta directive\n";              # print "Got fasta directive\n";
634              $_ = <$fh>;              $_ = <$fh>;
635              chomp;              chomp;
636              $self->parse_fasta($fh, $_);              $self->parse_fasta($fh, $_);
# Line 673  Line 686 
686      my($self, $directive, $rest) = @_;      my($self, $directive, $rest) = @_;
687    
688      $directive = lc($directive);      $directive = lc($directive);
689      my @rest=split /\t/, $rest;      # this should catch both #seed and ##seed :-)
690        if ($directive eq "seed")
     if ($directive eq "genome")  
     {  
         $self->current_file->genome_id($rest[0]);  
         $self->current_file->genome_name($rest[1]);  
     }  
     elsif ($directive eq "genome_md5")  
691      {      {
692          $self->current_file->set_genome_checksum(@rest[0,1]);        return $self->parse_seed_directive($rest);
693      }      }
694      elsif ($directive eq "origin")  
695        my @rest=split /\t/, $rest;
696    
697        # removed genome, genome_md5, origin, taxnomy as they are not real gff directives. These are in seed_directives below
698        if ($directive eq "project")
699      {      {
700          print STDERR "We have a directive called origin but this should be changed as it will conflict with NCBI's ORIGIN indicating beginning of the sequence\n";          # I am not sure if PROJECT is a seed directive or a GFF directive
         print STDERR "At the moment ORIGIN is returned by \$feat->project\n";  
701          $self->current_file->project($rest[0]);          $self->current_file->project($rest[0]);
702      }      }
     elsif ($directive eq "taxonomy")  
     {  
         $self->current_file->taxonomy($rest);  
     }  
703      elsif ($directive eq "sequence-region")      elsif ($directive eq "sequence-region")
704      {      {
705          $self->current_file->contigs($rest[0]);          $self->current_file->contigs($rest[0]);
# Line 703  Line 709 
709      }      }
710      else      else
711      {      {
712          print "Have gff3 directive '$directive' rest='$rest'\n";          print STDERR "Have gff3 directive '$directive' rest='$rest'\n";
713      }      }
714    
715  }  }
# Line 715  Line 721 
721      my($verb, @rest) = split(/\t/, $rest);      my($verb, @rest) = split(/\t/, $rest);
722    
723      # are we case sensitive? I don't think so      # are we case sensitive? I don't think so
724      $verb-lc($verb);      $verb=lc($verb);
725    
726      if ($verb eq "anno_start")      if ($verb eq "genome_id")
727        {
728            $self->current_file->genome_id($rest[0]);
729        }
730        elsif ($verb eq "name")
731        {
732            $self->current_file->genome_name($rest[0]);
733        }
734        elsif ($verb eq "genome_md5")
735        {
736            $self->current_file->set_genome_checksum($rest[0]);
737        }
738        elsif ($verb eq "project")
739        {
740            # I am not sure if PROJECT is a seed directive or a GFF directive
741            $self->current_file->project($rest[0]);
742        }
743        elsif ($verb eq "taxonomy")
744        {
745            $self->current_file->taxonomy($rest[0]);
746        }
747        elsif ($verb eq "taxon_id")
748        {
749            $self->current_file->taxon_id($rest[0]);
750        }
751        elsif ($verb eq "anno_start")
752      {      {
753          $self->current_file->anno_start($rest[0]);          $self->current_file->anno_start($rest[0]);
754      }      }
# Line 735  Line 766 
766  {  {
767      my($self, $directive, $rest) = @_;      my($self, $directive, $rest) = @_;
768    
769      print "Have local directive '$directive' rest='$rest'\n";      print STDERR "Have local directive '$directive' rest='$rest'\n";
770  }  }
771    
772  sub parse_feature  sub parse_feature
# Line 797  Line 828 
828    
829              $value = \@values;              $value = \@values;
830          }          }
831            else
832            {
833                $value = $values[0];
834            }
835    
836    
837          $athash->{$name} = $value;          $athash->{$name} = $value;
838    
# Line 842  Line 878 
878      my($self, $fh, $first_line) = @_;      my($self, $fh, $first_line) = @_;
879      my($cur, $cur_id);      my($cur, $cur_id);
880    
881      for ($_ = $first_line; $_;  $_ = <$fh>, chomp)      for ($_ = $first_line; defined($_);  $_ = <$fh>, chomp)
882      {      {
883          if (/^>\s*(\S+)/)          if (/^>\s*(\S+)/)
884          {          {
# Line 889  Line 925 
925    
926  map { $GFF_Tags{$_} = 1 } @GFF_Tags;  map { $GFF_Tags{$_} = 1 } @GFF_Tags;
927    
928  __PACKAGE__->mk_accessors(qw(fig seqid source type start end score strand phase attributes genome fig_id),  __PACKAGE__->mk_accessors(qw(fig seqid source type start end score strand phase attributes
929                                 genome fig_id),
930                            @GFF_Tags);                            @GFF_Tags);
931    
932    
# Line 968  Line 1005 
1005  use strict;  use strict;
1006  use base qw(Class::Accessor);  use base qw(Class::Accessor);
1007    
1008  __PACKAGE__->mk_accessors(qw(fig filename features feature_index anno_start anno_end));  __PACKAGE__->mk_accessors(qw(fig filename features feature_index anno_start anno_end taxon_id genome_id));
1009    
1010  #  #
1011  # Package to hold the contents of a GFF file, and to hold the code  # Package to hold the contents of a GFF file, and to hold the code
# Line 986  Line 1023 
1023          features => [],          features => [],
1024          contigs  => [],          contigs  => [],
1025          feature_index => {},          feature_index => {},
1026          genome_checksum => {},          genome_checksum => '',
1027          contig_checksum => {},          contig_checksum => {},
1028          features_by_genome => {},          features_by_genome => {},
1029      };      };
# Line 1011  Line 1048 
1048    
1049  sub genome_checksum  sub genome_checksum
1050  {  {
     my($self, $genome) = @_;  
   
     return $self->{genome_checksum}->{$genome};  
 }  
   
 sub genome_checksums  
 {  
1051      my($self) = @_;      my($self) = @_;
1052    
1053      return [map { [$_, $self->{genome_checksum}->{$_}] } keys(%{$self->{genome_checksum}})];      return $self->{genome_checksum};
1054  }  }
1055    
1056  sub set_genome_checksum  sub set_genome_checksum
1057  {  {
1058      my($self, $genome, $md5sum) = @_;      my($self, $md5sum) = @_;
1059      $self->{genome_checksum}->{$genome} = $md5sum;      $self->{genome_checksum} = $md5sum;
1060  }  }
1061    
1062  sub set_contig_checksum  sub set_contig_checksum
# Line 1061  Line 1091 
1091  sub contigs  sub contigs
1092  {  {
1093      my($self, $contig) = @_;      my($self, $contig) = @_;
1094        if ($contig && $contig =~ /\w\w\_\d+\.\d+/) {
1095          print STDERR "WARNING: $contig appears to have a version number. We should standardize on timming that somewhere\n";
1096        }
1097      $contig && (push @{$self->{contigs}}, $contig);      $contig && (push @{$self->{contigs}}, $contig);
1098      return $self->{contigs};      return $self->{contigs};
1099  }  }

Legend:
Removed from v.1.8  
changed lines
  Added in v.1.19

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3