[Bio] / FigKernelScripts / check_sims_for_added_genome.pl Repository:
ViewVC logotype

View of /FigKernelScripts/check_sims_for_added_genome.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Fri May 3 17:29:19 2013 UTC (6 years, 6 months ago) by redwards
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
 a set of scripts for adding phage genomes to the seed

=pod

When I add a genome, I have a problem that sims are not getting added correctly, especially the peg.synonyms file is not being updated. This checks to see whether we need to add things to the peg.synonyms and prints out the new stuff you need

=cut


use strict;
use FIG;
my $fig=new FIG;
use Getopt::Std;
my %opts;
getopts('g:p:m:', \%opts);
unless ($opts{g} && $opts{p} && $opts{m}) {
	print STDERR <<EOF;
$0
-g genome id of the genome to test
-p name of the file to write the missing peg.synonyms out to
-m name of the file to write the ids for the missing sims to

After this is complete you should use merge_peg.synonyms to create a new peg.synonyms file, and merge_missing_sims to create a new sims file.

EOF
}


my $dbh=$fig->db_handle;

my $getmd5  = $dbh->{_dbh}->prepare(qq(select * from protein_sequence_MD5 where id = ?));
my $getsyns = $dbh->{_dbh}->prepare(qq(select * from peg_synonyms where maps_to = ?));
my $addsyns = $dbh->{_dbh}->prepare(qq/insert into peg_synonyms (maps_to, maps_to_ln, syn_id, syn_ln) values (?, ?, ?, ?)/);

if (-e $opts{m}) {
	print STDERR "The file called '$opts{m}' already exists. Not overwriting\n";
	exit(-1);
}
if (-e $opts{p}) {
	print STDERR "The file called '$opts{p}' already exists. Not overwriting\n";
	exit(-1);
}

open(ID, ">$opts{m}") || die "can't write to $opts{m}";
open(PS, ">$opts{p}") || die "can't write to $opts{p}";


# process all the pegs in a genome
my @pegs= $fig->pegs_of($opts{g});
foreach my $peg (@pegs) {
	print STDERR "$peg  : ";
	$getmd5->execute($peg) || die  $dbh->{_dbh}->errstr;
	my $row = $getmd5->fetchrow_arrayref();
	my $md5="gnl|md5|".$row->[2];

# now check and see if the md5 is in the peg_synonyms table
	$getsyns->execute($md5) || die  $dbh->{_dbh}->errstr;
	my ($len, $others)=(0, undef);
	while (my $synrow = $getsyns->fetchrow_arrayref()) {
		$len = $synrow->[1];
		push @$others, [$synrow->[2], $synrow->[3]];
	}

	if ($len > 0) {
		# the peg is in the db, so we can add it and make a new peg synonyms line
		$addsyns->execute($md5, $len, $peg, $len) || die  $dbh->{_dbh}->errstr; 
		push @$others, [$peg, $len];
		print PS $md5, ",", $len, "\t", (join(";", map {$_->[0] . ",". $_->[1]} @$others)), "\n";
		print STDERR " already there\n";
	}
	else {
# the peg is not in the db, so we need to add it to the peg.syns and make a fasta file 
# so we can blast it
		my $prot=$fig->get_translation($peg);
		$len=length($prot);
		$addsyns->execute($md5, $len, $peg, $len) || die  $dbh->{_dbh}->errstr; 
		push @$others, [$peg,$len];
		print PS $md5, ",", $len, "\t", (join(";", map {$_->[0] . ",". $_->[1]} @$others)), "\n";
		print ID "$peg\t$md5\n";
		print STDERR " new\n";
	}
}


1;




MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3