[Bio] / FigKernelScripts / p3-collate.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/p3-collate.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : parrello 1.1 =head1 Extract the First N Rows for Each Value of a Column
2 :    
3 :     p3-collate.pl N [options]
4 :    
5 :     This script will read through a tab-delimited file on the standard input and output the first N rows for each specific
6 :     value found in the key column. For example, if we have a set of genomes sorted by quality and we ask for a 3-row sample
7 :     based on the species column, it will extract the 3 best-quality genomes for each species.
8 :    
9 :     =head2 Parameters
10 :    
11 :     The positional parameter must be the number of rows to extract for each value.
12 :    
13 :     The standard input can be overridden using the options in L<P3Utils/ih_options>.
14 :    
15 :     Use the options in L<P3Utils/col_options> to select the key column.
16 :     =cut
17 :    
18 :     use strict;
19 :     use P3DataAPI;
20 :     use P3Utils;
21 :    
22 :    
23 :     # Get the command-line options.
24 :     my $opt = P3Utils::script_opts('N', P3Utils::col_options(), P3Utils::ih_options(),
25 :     );
26 :     # Open the input file.
27 :     my $ih = P3Utils::ih($opt);
28 :     # Read the incoming headers.
29 :     my ($outHeaders, $keyCol) = P3Utils::process_headers($ih, $opt);
30 :     # Form the full header set and write it out.
31 :     if (! $opt->nohead) {
32 :     P3Utils::print_cols($outHeaders);
33 :     }
34 :     # Get the number of rows to sample. The default is 1.
35 :     my ($N) = @ARGV;
36 :     $N //= 1;
37 :     if ($N < 1) {
38 :     die "Collation specifies no output.";
39 :     }
40 :     # This is the collation hash.
41 :     my %groups;
42 :     # Loop through the input.
43 :     while (! eof $ih) {
44 :     my $couplets = P3Utils::get_couplets($ih, $keyCol, $opt);
45 :     for my $couplet (@$couplets) {
46 :     my ($key, $row) = @$couplet;
47 :     if (! $groups{$key}) {
48 :     $groups{$key} = [$row];
49 :     } else {
50 :     my $group = $groups{$key};
51 :     if (scalar @$group < $N) {
52 :     push @$group, $row;
53 :     }
54 :     }
55 :     }
56 :     }
57 :     # Write the output.
58 :     for my $key (sort keys %groups) {
59 :     my $group = $groups{$key};
60 :     for my $row (@$group) {
61 :     P3Utils::print_cols($row);
62 :     }
63 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3