[Bio] / FigKernelPackages / FigGFF.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/FigGFF.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.21, Sun Feb 5 00:45:19 2006 UTC revision 1.22, Fri May 18 13:59:59 2007 UTC
# Line 570  Line 570 
570    
571  use base qw(Class::Accessor);  use base qw(Class::Accessor);
572    
573  __PACKAGE__->mk_accessors(qw(fig current_file));  __PACKAGE__->mk_accessors(qw(fig current_file features_by_genome feature_index features filename fasta_data contig_checksum genome_checksum contigs));
574    
575  my $count;  my $count;
576    
577    =pod
578    
579    =head1 GFFParser
580    
581    A parser for GFF3 files.
582    
583    =head2 new()
584    
585    Instantiate
586    my $fgff = GFFParser->new($fig);
587    
588    =cut
589    
590    
591  #  #
592  # GFF file parser. Creates GFFFiles.  # GFF file parser. Creates GFFFiles.
# Line 590  Line 603 
603      return bless($self, $class);      return bless($self, $class);
604  }  }
605    
606    =head2 parse()
607    
608    Takes a filename as an argument, and returns a file object.
609    
610    The file object is a reference to a hash with the following keys:
611            features_by_genome
612                    An array of all the features in this genome
613            feature_index
614                    A hash with a key of the features by ID and the value being the GFFFeature
615            features
616                    All the features in the genome, as an array with each element being a GFFFeature element
617            filename
618                    The filename of the file that was parsed
619            fasta_data
620                    A hash with the key being the ID and the value being the sequence
621    
622    Not sure about:
623            contig_checksum
624            genome_checksum
625            contigs
626            fig
627    
628    This is method now stores the data internally, so you can then access the data as:
629            $fgff->features_by_genome->{}
630            $fgff->feature_index->{}
631            $fgff->features->{}
632            $fgff->filename->{}
633            $fgff->fasta_data->{}
634            $fgff->contig_checksum->{}
635            $fgff->genome_checksum->{}
636            $fgff->contigs->{}
637            $fgff->fig->{}
638    =cut
639    
640  sub parse  sub parse
641  {  {
642      my($self, $file) = @_;      my($self, $file) = @_;
# Line 608  Line 655 
655      }      }
656      else      else
657      {      {
658          open($fh, "<$file") or confess "Cannot open $file: $!";          if ($file =~ /\.gz$/) {open($fh, "gunzip -c $file |") or confess "can't open a pipe to gunzip $file"}
659            else {open($fh, "<$file") or confess "Cannot open $file: $!";}
660          $fobj->filename($file);          $fobj->filename($file);
661          $close_handle = 1;          $close_handle = 1;
662      }      }
# Line 703  Line 751 
751          }          }
752      }      }
753    
754        foreach my $k (qw[features_by_genome feature_index features filename fasta_data contig_checksum genome_checksum contigs])
755        {
756                $self->{$k}=$fobj->{$k};
757        }
758    
759      return $fobj;      return $fobj;
760  }  }
761    
762    =head2 feature_tree
763    
764    Generate and return a feature tree for the features in the GFF3 file. Most features have Parent/Child relationships, eg. an exon is a child of a gene, and a CDS is a child of an mRNA. This method will return the tree so that you can recurse up and down it.
765    
766    =cut
767    
768    sub feature_tree {
769            my $self=shift;
770            return $self->{'tree'} if (defined $self->{'tree'});
771            my $tree;
772            my $fc;
773            foreach my $k (keys %{$self->features_by_genome})
774            {
775    # first create a hash with only parents, and an array that houses thei children
776                    my $children;
777                    foreach my $feat (@{$self->features_by_genome->{$k}})
778                    {
779                            my $parent;
780                            if (defined $feat->{'Parent'}) {$parent=$feat->{'Parent'}}
781                            elsif (defined $feat->{'attributes'}->{'Parent'}) {$parent=$feat->{'attributes'}->{'Parent'}}
782    
783                            if (defined $parent) {push @{$children->{$parent}}, $feat->{'ID'}}
784                            else {$tree->{$feat->{'ID'}}=undef}
785                    }
786    
787    # now add them to a tree
788                    $self->_add2tree($tree, [keys %$tree], $children);
789            }
790            $self->{'tree'}=$tree;
791            return $tree;
792    }
793    
794    sub _add2tree {
795            my ($self, $tree, $parents, $children)=@_;
796            foreach my $parent (@$parents)
797            {
798                    if ($children->{$parent})
799                    {
800                            map {$tree->{$parent}->{$_}=undef} @{$children->{$parent}};
801                            $self->_add2tree($tree->{$parent}, $children->{$parent}, $children);
802                    }
803            }
804    }
805    
806    
807    
808    
809    
810    =head2 parse_gff3_directive()
811    
812    Pases the directives within the files (e.g. headers, flags for FASTA, and so on).
813    
814    =cut
815    
816    
817    
818  sub parse_gff3_directive  sub parse_gff3_directive
819  {  {
820      my($self, $directive, $rest) = @_;      my($self, $directive, $rest) = @_;
# Line 739  Line 848 
848    
849  }  }
850    
851    =head2 parse_seed_directive()
852    
853    Parse out seed information that we hide in the headers, eg, project, name, taxid, and so on. These are our internal representations, but are generally treated as comments by other gff3 parsers
854    
855    =cut
856    
857  sub parse_seed_directive  sub parse_seed_directive
858  {  {
859      my($self, $rest) = @_;      my($self, $rest) = @_;
# Line 787  Line 902 
902      }      }
903  }  }
904    
905    =head2 parse_local_directive()
906    
907    I haven't seen one of these :)
908    
909    =cut
910    
911  sub parse_local_directive  sub parse_local_directive
912  {  {
913      my($self, $directive, $rest) = @_;      my($self, $directive, $rest) = @_;
# Line 794  Line 915 
915      print STDERR "Have local directive '$directive' rest='$rest'\n";      print STDERR "Have local directive '$directive' rest='$rest'\n";
916  }  }
917    
918    =head2 parse_feature
919    
920    Reads a feature line and stuffs it into the right places, as appropriate.
921    
922    =cut
923    
924  sub parse_feature  sub parse_feature
925  {  {
926      my($self, $seqid, $source, $type, $start, $end, $score, $strand, $phase, $attributes) = @_;      my($self, $seqid, $source, $type, $start, $end, $score, $strand, $phase, $attributes) = @_;
# Line 898  Line 1025 
1025  # in order to support the backward-compatiblity syntax that  # in order to support the backward-compatiblity syntax that
1026  # lets a file skip the ##FASTA directive if it wishes.  # lets a file skip the ##FASTA directive if it wishes.
1027  #  #
1028    
1029    =head2 parse_fasta()
1030    
1031    Read the fasta sequence into memory
1032    
1033    =cut
1034    
1035  sub parse_fasta  sub parse_fasta
1036  {  {
1037      my($self, $fh, $first_line) = @_;      my($self, $fh, $first_line) = @_;
# Line 940  Line 1074 
1074      $self->current_file->fasta_data($id, $data);      $self->current_file->fasta_data($id, $data);
1075  }  }
1076    
1077    =pod
1078    
1079    =head1 GFFFeature
1080    
1081    A GFFFeature that acceesses the data
1082    
1083    =head2 methods
1084    
1085    fig seqid source type start end score strand phase attributes genome fig_id
1086    
1087    =cut
1088    
1089  package GFFFeature;  package GFFFeature;
1090    
1091  use strict;  use strict;

Legend:
Removed from v.1.21  
changed lines
  Added in v.1.22

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3