[Bio] / FigKernelPackages / SeedV.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/SeedV.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Thu Dec 17 21:35:51 2009 UTC revision 1.3, Tue Aug 10 19:38:47 2010 UTC
# Line 23  Line 23 
23    
24  package SeedV;  package SeedV;
25    
26    use FileHandle;
27  use Carp;  use Carp;
28  use strict;  use strict;
29  use Data::Dumper;  use Data::Dumper;
30  use DB_File;  use DB_File;
31  use File::Basename;  use File::Basename;
32    use SeedUtils;
33    use CorrTableEntry;
34    use SAPserver;
35    
36    my $pseed_url = 'http://servers.nmpdr.org/pseed/sapling/server.cgi';
37    
38  sub new {  sub new {
39      my ($class, $org_dir ) = @_;      my ($class, $org_dir ) = @_;
# Line 37  Line 43 
43      $self->{_orgdir} = $org_dir;      $self->{_orgdir} = $org_dir;
44      ($org_dir =~ /(\d+\.\d+)$/) || confess("$org_dir must be a path ending in the genome ID");      ($org_dir =~ /(\d+\.\d+)$/) || confess("$org_dir must be a path ending in the genome ID");
45      $self->{_genome} = $1;      $self->{_genome} = $1;
46    
47        $self->{_sap} = SAPserver->new($pseed_url);
48    
49      return bless $self, $class;      return bless $self, $class;
50  }  }
51    
# Line 402  Line 411 
411    
412          my $contig_lengths = $self->{_contig_len_index};          my $contig_lengths = $self->{_contig_len_index};
413    
414          $self->load_contigs_index();      # $self->load_contigs_index();
415          if (!tied(%$contig_lengths))          if (!tied(%$contig_lengths))
416          {          {
417              $self->load_contig_len_cache();              $self->load_contig_len_cache();
# Line 946  Line 955 
955    
956  search_features_by_annotation  search_features_by_annotation
957    
958  Takes a string to find and an optional case boolean, and returns a hash with keys are the function and values are a reference to an array of the IDs that have that function.  Takes a string to find and an optional case boolean, and returns a
959    hash with keys are the function and values are a reference to an array
960    of the IDs that have that function.
961    
962  If the case boolean is set the search is case insensitive. Otherwise case sensitive.  If the case boolean is set the search is case insensitive. Otherwise case sensitive.
963    
964  Note that this was originally based on the find above, but this uses a regexp for matching. Will likely be a lot slower which is why I only search for a single term. There may be an SQL way of doing this, if so let Rob know how to do it and I'll replace this method.  Note that this was originally based on the find above, but this uses a
965    regexp for matching. Will likely be a lot slower which is why I only
966    search for a single term. There may be an SQL way of doing this, if so
967    let Rob know how to do it and I'll replace this method.
968    
969    
970  =cut  =cut
# Line 1453  Line 1467 
1467      return 0;      return 0;
1468  }  }
1469    
1470    sub load_correspondences
1471    {
1472        my($self) = @_;
1473    
1474        return if exists($self->{correspondence_index});
1475    
1476        my $dir = $self->{_orgdir};
1477        my $index = {};
1478    
1479        for my $cfile (<$dir/CorrToReferenceGenomes/*>)
1480        {
1481            if ($cfile =~ m,/\d+\.\d+$,)
1482            {
1483                if (open(my $fh, "<", $cfile))
1484                {
1485                    while (<$fh>)
1486                    {
1487                        my $ent = CorrTableEntry->new($_);
1488                        push(@{$index->{$ent->id1}}, $ent);
1489                    }
1490                    close($fh);
1491                }
1492                else
1493                {
1494                    warn "Could not open $cfile: $!\n";
1495                }
1496            }
1497        }
1498        $self->{correspondence_index} = $index;
1499    }
1500    
1501    sub get_correspondences
1502    {
1503        my($self, $id) = @_;
1504    
1505        $self->load_correspondences();
1506    
1507        return $self->{correspondence_index}->{$id};
1508    }
1509    
1510    sub get_best_correspondences
1511    {
1512        my($self, $id, $cutoff) = @_;
1513        my $corrs = $self->get_correspondences($id);
1514    
1515        my $lg;
1516        my $out = [];
1517    
1518        for my $hit (sort { SeedUtils::genome_of($a->id2) <=> SeedUtils::genome_of($b->id2) or
1519                            $a->psc <=> $b->psc }
1520                     grep { (!defined($cutoff) or $_->psc < $cutoff) and
1521                            ($_->hitinfo eq '<=>' or $_->func1 eq $_->func2) }
1522                     @$corrs)
1523        {
1524            if (defined($lg) && $lg ne SeedUtils::genome_of($hit->id2))
1525            {
1526                push(@$out, $hit);
1527            }
1528            $lg = SeedUtils::genome_of($hit->id2);
1529        }
1530        return $out;
1531    
1532    }
1533    
1534    sub get_pin
1535    {
1536        my($self, $peg, $n, $cutoff) = @_;
1537    
1538        my $hits = $self->get_best_correspondences($peg, $cutoff);
1539    #    print join("\t", $_->id2, $_->func2) . "\n" for @$hits;
1540        my @pegs = map { $_->id2 }
1541                     sort { $b->bsc <=> $a->bsc or
1542                            abs($a->len1 - $a->len2) <=> abs($b->len1 - $b->len2) }
1543                     @$hits;
1544        $#pegs = ($n - 1) if @pegs > $n;
1545        return \@pegs;
1546    }
1547    
1548    sub get_context
1549    {
1550        my($self, $peg, $pin, $width) = @_;
1551    
1552        #
1553        # Determine local context.
1554        #
1555    
1556        my $peg_loc = $self->feature_location($peg);
1557        my ($contig, $min, $max, $dir) = &SeedUtils::boundaries_of($peg_loc);
1558        my $center = ($min + $max) / 2;
1559    
1560        my ($my_genes, $minV, $maxV) = $self->genes_in_region($contig,
1561                                                              int($center - $width / 2),
1562                                                              int($center + $width / 2));
1563    
1564        my $left_extent = $center - $minV;
1565        my $right_extent = $maxV - $center;
1566    
1567        #
1568        # Determine other context. Get locations for the pin, then find the
1569        # regions.
1570        #
1571        my $sap = $self->{_sap};
1572    
1573        my $locs = $sap->fid_locations(-ids => $pin, -boundaries => 1);
1574    
1575        my @extended_locs;
1576        for my $lpeg (keys %$locs)
1577        {
1578            my $loc = $locs->{$lpeg};
1579            my($lcontig, $lmin, $lmax, $ldir) = &SeedUtils::boundaries_of($loc);
1580            my $center = ($lmin + $lmax) / 2;
1581            my $left_end = int($center - $left_extent);
1582            $left_end = 0 if $left_end < 0;
1583            my $right_end = int($center + $right_extent);
1584            push(@extended_locs, &SeedUtils::location_string($lcontig, $left_end, $right_end));
1585        }
1586    
1587        my $regions = $sap->genes_in_region(-locations => \@extended_locs, -includeLocation => 1);
1588    
1589        #
1590        # Determine functions.
1591        #
1592        my @ext_pegs = map { keys %$_ } values %$regions;
1593        my $funcs = $sap->ids_to_functions(-ids => \@ext_pegs);
1594    
1595        #
1596        # Overlay with local functions.
1597    
1598        $funcs->{$_} = $self->function_of($_) for @$my_genes;
1599    
1600        #
1601        # We have the whole shebang now. We can assemble and return.
1602        #
1603        # Each genome is a list of genes.
1604        # Each gene is a list [fid, contig, beg, end, dir, func].
1605        #
1606    
1607        my @context;
1608        my $cref = {};
1609        my $row = 0;
1610    
1611        #
1612        # Start with local context.
1613        #
1614    
1615        my @loc = map { my($contig, $min, $max, $dir) = &SeedUtils::boundaries_of($self->feature_location($_));
1616                         [$_, $contig, $min, $max, $dir, $funcs->{$_}, $row] } @$my_genes;
1617        push @context, [ sort { $a->[2] <=> $b->[2] } @loc ];
1618        $cref->{$_->[0]} = $_ for @loc;
1619    
1620        $row++;
1621        #
1622        # And include the pinned region.
1623        #
1624        for my $loc (@extended_locs)
1625        {
1626            my $region = $regions->{$loc};
1627            my @row;
1628            for my $peg (keys %$region)
1629            {
1630                my($contig, $min, $max, $dir) = &SeedUtils::boundaries_of($region->{$peg});
1631                my $ent = [$peg, $contig, $min, $max, $dir, $funcs->{$peg}, $row];
1632                $cref->{$peg} = $ent;
1633                push(@row, $ent);
1634            }
1635            $row++;
1636    
1637            push @context, [ sort { $a->[2] <=> $b->[2] } @row ];
1638        }
1639    
1640        return \@context, $cref;
1641    }
1642    
1643    
1644  1;  1;

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.3

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3