[Bio] / FigKernelPackages / SeedUtils.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/SeedUtils.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.19, Fri Dec 4 21:16:58 2009 UTC revision 1.20, Mon Dec 14 02:11:58 2009 UTC
# Line 1147  Line 1147 
1147      return $retVal;      return $retVal;
1148  }  }
1149    
1150    =head3 display_id_and_seq
1151    
1152        SeedUtils::display_id_and_seq($id_and_comment, $seqP, $fh);
1153    
1154    
1155    
1156    Display a fasta ID and sequence to the specified open file. This method is designed
1157    to work well with L</read_fasta_sequence> and L</rev_comp>, because it takes as
1158    input a string pointer rather than a string. If the file handle is omitted it
1159    defaults to STDOUT.
1160    
1161    The output is formatted into a FASTA record. The first line of the output is
1162    preceded by a C<< > >> symbol, and the sequence is split into 60-character
1163    chunks displayed one per line. Thus, this method can be used to produce
1164    FASTA files from data gathered by the rest of the system.
1165    
1166    =over 4
1167    
1168    =item id_and_comment
1169    
1170    The sequence ID and (optionally) the comment from the sequence's FASTA record.
1171    The ID
1172    
1173    =item seqP
1174    
1175    Reference to a string containing the sequence. The sequence is automatically
1176    formatted into 60-character chunks displayed one per line.
1177    
1178    =item fh
1179    
1180    Open file handle to which the ID and sequence should be output. If omitted,
1181    C<\*STDOUT> is assumed.
1182    
1183    =back
1184    
1185    =cut
1186    
1187    sub display_id_and_seq {
1188    
1189        if (UNIVERSAL::isa($_[0],__PACKAGE__)) {
1190            shift @_;
1191            #Trace("Invalid call to display_id_and_seq.");
1192        }
1193    
1194        my( $id, $seqP, $fh ) = @_;
1195    
1196        if (! defined($fh) )  { $fh = \*STDOUT; }
1197    
1198        print $fh ">$id\n";
1199        &display_seq($seqP, $fh);
1200    }
1201    
1202    =head3 display_seq
1203    
1204        SeedUtils::display_seq(\$seqP, $fh);
1205    
1206    Display a fasta sequence to the specified open file. This method is designed
1207    to work well with L</read_fasta_sequence> and L</rev_comp>, because it takes as
1208    input a string pointer rather than a string. If the file handle is omitted it
1209    defaults to STDOUT.
1210    
1211    The sequence is split into 60-character chunks displayed one per line for
1212    readability.
1213    
1214    =over 4
1215    
1216    =item seqP
1217    
1218    Reference to a string containing the sequence.
1219    
1220    =item fh
1221    
1222    Open file handle to which the sequence should be output. If omitted,
1223    C<STDOUT> is assumed.
1224    
1225    =back
1226    
1227    =cut
1228    
1229    sub display_seq {
1230    
1231        shift if UNIVERSAL::isa($_[0],__PACKAGE__);
1232    
1233        my ( $seqP, $fh ) = @_;
1234        my ( $i, $n, $ln );
1235    
1236        if (! defined($fh) )  { $fh = \*STDOUT; }
1237    
1238        $n = length($$seqP);
1239    #   confess "zero-length sequence ???" if ( (! defined($n)) || ($n == 0) );
1240        for ($i=0; ($i < $n); $i += 60) {
1241            if (($i + 60) <= $n) {
1242                $ln = substr($$seqP,$i,60);
1243            } else {
1244                $ln = substr($$seqP,$i,($n-$i));
1245            }
1246            print $fh "$ln\n";
1247        }
1248    }
1249    
1250    
1251    =head3 roles_of_function
1252    
1253        my @roles = $fig->roles_of_function($func);
1254    
1255    Returns a list of the functional roles implemented by the specified function. This method
1256    parses the role data out of the function name, and does not require access to the database.
1257    
1258    =over 4
1259    
1260    =item func
1261    
1262    Name of the function whose roles are to be parsed out.
1263    
1264    =item RETURN
1265    
1266    Returns a list of the roles performed by the specified function.
1267    
1268    =back
1269    
1270    =cut
1271    
1272    sub roles_of_function {
1273        shift if UNIVERSAL::isa($_[0],__PACKAGE__);
1274        my $func = (@_ == 1) ? $_[0] : $_[1];
1275    
1276        $func =~ s/\s*[\!\#].*$//;
1277        my %roles = map { $_ => 1 } (split(/\s*;\s+|\s+[\@\/]\s+/,$func),($func =~ /\d+\.\d+\.\d+\.\d+/g),$func);
1278        return sort keys(%roles);
1279    }
1280    
1281    
1282    =head3 reverse_comp
1283    
1284        my $dnaR = FIG::reverse_comp($dna);
1285    
1286    or
1287    
1288        my $dnaR = $fig->reverse_comp($dna);
1289    
1290    Return the reverse complement os the specified DNA sequence.
1291    
1292    NOTE: for extremely long DNA strings, use L</rev_comp>, which allows you to
1293    pass the strings around in the form of pointers.
1294    
1295    =over 4
1296    
1297    =item dna
1298    
1299    DNA sequence whose reverse complement is desired.
1300    
1301    =item RETURN
1302    
1303    Returns the reverse complement of the incoming DNA sequence.
1304    
1305    =back
1306    
1307    =cut
1308    #: Return Type $;
1309    sub reverse_comp {
1310        shift if UNIVERSAL::isa($_[0],__PACKAGE__);
1311        my($seq) = @_;
1312    
1313        return ${&rev_comp(\$seq)};
1314    }
1315    
1316    =head3 rev_comp
1317    
1318        my $dnaRP = FIG::rev_comp(\$dna);
1319    
1320    or
1321    
1322        my $dnaRP = $fig->rev_comp(\$dna);
1323    
1324    Return the reverse complement of the specified DNA sequence. The DNA sequence
1325    is passed in as a string reference rather than a raw string for performance
1326    reasons. If this is unnecessary, use L</reverse_comp>, which processes strings
1327    instead of references to strings.
1328    
1329    =over 4
1330    
1331    =item dna
1332    
1333    Reference to the DNA sequence whose reverse complement is desired.
1334    
1335    =item RETURN
1336    
1337    Returns a reference to the reverse complement of the incoming DNA sequence.
1338    
1339    =back
1340    
1341    =cut
1342    #: Return Type $;
1343    sub rev_comp {
1344        shift if UNIVERSAL::isa($_[0],__PACKAGE__);
1345        my( $seqP ) = @_;
1346        my( $rev  );
1347    
1348        $rev =  reverse( $$seqP );
1349        $rev =~ tr/A-Z/a-z/;
1350        $rev =~ tr/acgtumrwsykbdhv/tgcaakywsrmvhdb/;
1351        return \$rev;
1352    }
1353    
1354    sub abbrev {
1355        my($genome_name) = @_;
1356    
1357        $genome_name =~ s/^(\S{3})\S+/$1./;
1358        $genome_name =~ s/^(\S+)\s+(\S{3})\S+/$1$2./;
1359        $genome_name =~ s/ //g;
1360        if (length($genome_name) > 10) {
1361            $genome_name = substr($genome_name,0,10);
1362        }
1363        return $genome_name;
1364    }
1365    
1366  1;  1;

Legend:
Removed from v.1.19  
changed lines
  Added in v.1.20

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3