[Bio] / FigKernelPackages / FIG.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/FIG.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.375, Tue Oct 11 20:38:38 2005 UTC revision 1.376, Wed Oct 12 02:28:16 2005 UTC
# Line 1244  Line 1244 
1244      return map { $_->[0] } @$res;      return map { $_->[0] } @$res;
1245  }  }
1246    
1247    =head3 compute_clusters
1248    
1249    C<< my @clusterList = $fig->compute_clusters(\@pegList, $subsystem, $distance); >>
1250    
1251    Partition a list of PEGs into sections that are clustered close together on
1252    the genome. The basic algorithm used builds a graph connecting PEGs to
1253    other PEGs close by them on the genome. Each connected subsection of the graph
1254    is then separated into a cluster. Singleton clusters are thrown away, and
1255    the remaining ones are sorted by length. All PEGs in the incoming list
1256    should belong to the same genome, but this is not a requirement. PEGs on
1257    different genomes will simply find themselves in different clusters.
1258    
1259    =over 4
1260    
1261    =item pegList
1262    
1263    Reference to a list of PEG IDs.
1264    
1265    =item subsystem
1266    
1267    Subsystem object for the relevant subsystem. This parameter is not used, but is
1268    required for compatability with Sprout.
1269    
1270    =item distance (optional)
1271    
1272    The maximum distance between PEGs that makes them considered close. If omitted,
1273    the distance is 5000 bases.
1274    
1275    =item RETURN
1276    
1277    Returns a list of lists. Each sub-list is a cluster of PEGs.
1278    
1279    =back
1280    
1281    =cut
1282    
1283    sub compute_clusters {
1284        # Get the parameters.
1285        my ($self, $pegList, $subsystem, $distance) = @_;
1286        if (! defined $distance) {
1287            $distance = 5000;
1288        }
1289        # Create a hash of the PEG IDs we're considering for cluster membership.
1290        my %myPeg = map { $_ => 1 } @{$pegList};
1291        # This next hash serves as our connection graph. We map each PEG to a list of
1292        # the PEGs that are near it. The GREP filter insures that a PEG is not
1293        # connected to itself and that only PEGs from the caller's list are
1294        # included.
1295        my %conn = ();
1296        for my $peg (keys %myPeg) {
1297            $conn{$peg} =
1298                [grep { $myPeg{$_} && ($_ ne $peg) } $self->close_genes($peg, $distance)];
1299        }
1300        # Our third and final hash tracks the PEGs we've already processed. This prevents
1301        # a PEG from being put in more than one cluster or in the same cluster twice.
1302        my %seen = ();
1303        # Now we create the list of clusters.
1304        my @clusters = ();
1305        # Loop through the pegs.
1306        for my $peg (keys %myPeg) {
1307            # Only proceed if this PEG has not been put into a cluster.
1308            if (! $seen{$peg}) {
1309                # Create a new cluster for this PEG.
1310                my $subList = [$peg];
1311                # Denote we've seen it.
1312                $seen{$peg} = 1;
1313                # Now we recursively build this cluster. The "$subList" acts as a
1314                # queue. We run through it from the beginning, adding connected
1315                # pegs to the list. The process stops when we run out of new PEGs to
1316                # add.
1317                for (my $i=0; $i < @$subList; $i++) {
1318                    # Get a list of the PEGs connected to the current cluster PEG.
1319                    # Only PEGs we haven't clustered yet will be processed.
1320                    my $subPeg = $subList->[$i];
1321                    my @tmp = grep { ! $seen{$_} } @{$conn{$subPeg}};
1322                    # Only proceed if we found at least one new PEG.
1323                    if (@tmp > 0) {
1324                        # For each new PEG, denote we've seen it and
1325                        # stuff it into the queue.
1326                        for my $peg1 (@tmp) { $seen{$peg1} = 1 }
1327                        push @$subList, @tmp;
1328                    }
1329                }
1330                # If the queue we've built is not a singleton, we push it on
1331                # the master cluster list.
1332                if (@$subList > 1) {
1333                    push @clusters, $subList;
1334                }
1335            }
1336        }
1337        # Sort the clusters by length. The shortest clusters will be bubbled to
1338        # the front.
1339        my @retVal = sort { @$a <=> @$b } @clusters;
1340        # Return the sorted and pruned cluster list.
1341        return @retVal;
1342    }
1343    
1344  =head3 get_sim_pool_info  =head3 get_sim_pool_info
1345    
1346  C<< my ($total_entries, $n_finished, $n_assigned, $n_unassigned) = $fig->get_sim_pool_info($pool_id); >>  C<< my ($total_entries, $n_finished, $n_assigned, $n_unassigned) = $fig->get_sim_pool_info($pool_id); >>
# Line 1679  Line 1776 
1776      return undef;      return undef;
1777  }  }
1778    
1779    =head3 FIG
1780    
1781    C<< my $realFig = $fig->FIG(); >>
1782    
1783    Return this object. This method is provided for compatability with SFXlate.
1784    
1785    =cut
1786    
1787    sub FIG {
1788        my ($self) = @_;
1789        return $self;
1790    }
1791    
1792  =head3 get_peer_last_update  =head3 get_peer_last_update
1793    
1794  C<< my $date = $fig->get_peer_last_update($peer_id); >>  C<< my $date = $fig->get_peer_last_update($peer_id); >>
# Line 8956  Line 9066 
9066    
9067  =head3 names_of_compound  =head3 names_of_compound
9068    
9069  usage: @names = $fig->names_of_compound  usage: @names = $fig->names_of_compound($cid)
9070    
9071  Returns a list containing all of the names assigned to the KEGG compounds.  The list  Returns a list containing all of the names assigned to the KEGG compounds.  The list
9072  will be ordered as given by KEGG.  will be ordered as given by KEGG.
# Line 10905  Line 11015 
11015  #    @maps             = $fig->role_to_maps($role)  #    @maps             = $fig->role_to_maps($role)
11016  #    @subsystems = $fig->peg_to_subsystems($peg);  #    @subsystems = $fig->peg_to_subsystems($peg);
11017    
11018    =head3 get_subsystem
11019    
11020    C<< my $subsysObject = $fig->get_subsystem($name, $force_load); >>
11021    
11022    Return a subsystem object for manipulation of the named subsystem. If the
11023    subsystem does not exist, an undefined value will be returned.
11024    
11025    =over 4
11026    
11027    =item name
11028    
11029    Name of the desired subsystem.
11030    
11031    =item force_load
11032    
11033    TRUE to reload the subsystem from the data store even if it is already cached in
11034    memory, else FALSE.
11035    
11036    =item RETURN
11037    
11038    Returns a blessed object that allows access to subsystem data.
11039    
11040    =back
11041    
11042    =cut
11043    
11044  sub get_subsystem :Scalar  sub get_subsystem :Scalar
11045  {  {
11046      my($self, $subsystem, $force_load) = @_;      my($self, $subsystem, $force_load) = @_;

Legend:
Removed from v.1.375  
changed lines
  Added in v.1.376

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3