[Bio] / FigKernelPackages / ExpressionDir.pm Repository:
ViewVC logotype

View of /FigKernelPackages/ExpressionDir.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Fri Jan 7 17:27:15 2011 UTC (9 years, 5 months ago) by olson
Branch: MAIN
Changes since 1.2: +69 -1 lines
add more stuff

package ExpressionDir;

use Data::Dumper;
use strict;
use SeedAware;
use File::Copy;
use File::Temp 'tempfile';
use File::Spec::Functions;
use base 'Class::Accessor';
use Carp;
use Fcntl ':seek';
    
__PACKAGE__->mk_accessors(qw(genome_dir expr_dir error_dir));

=head3 new

    my $edir = ExpressionDir->new($genome_source);

Create a new ExpressionDir object to be associated with the given genome directory.
If a subdirectory ExpressionData does not yet exist, one is created.

=cut

sub new
{
    my($class, $genome_dir) = @_;

    my $edir = catfile($genome_dir, "ExpressionData");
    if (! -d $edir)
    {
	mkdir($edir);
    }
    my $errdir = catfile($genome_dir, 'errors');
    if (! -d $errdir)
    {
	mkdir($errdir);
    }
    

    my $self = {
	genome_dir => $genome_dir,
	expr_dir => $edir,
	error_dir => $errdir,
    };
    return bless $self, $class;
}

sub parse_probe_format_1
{
    my($self, $in_file, $out_file) = @_;

    my($fh);

    open($fh, "<", $in_file) or confess "Cannot open $in_file for reading: $!";
    my $l = <$fh>;
    chomp $l;
    $l =~ s/\r//;
    my @hdrs = split(/\t/, $l);
    my %hdrs;
    $hdrs{$hdrs[$_]} = $_ for 0..$#hdrs;

    my $x_col = $hdrs{"Probe X"};
    my $y_col = $hdrs{"Probe Y"};
    my $seq_col = $hdrs{"Probe Sequence"};
    if (!(defined($x_col) && defined($y_col) && defined($seq_col)))
    {
	close($fh);
	return undef;
    }

    my $out;
    open($out, ">", $out_file) or confess "Cannot open $out for writing: $!";

    while (<$fh>)
    {
	chomp;
	s/\r//g;
	my @flds = split(/\t/,$_);
	my($x,$y,$seq);
	$x = $flds[$x_col];
	$y = $flds[$y_col];
	$seq = $flds[$seq_col];
	my $id = "$x\_$y";
	print $out "$id\t$seq\n";
    }
    close($fh);
    close($out);
    return 1;
}

sub parse_probe_format_2
{
    my($self, $in_file, $out_file) = @_;

    my($fh);

    local $/ = "\n>";

    open($fh, "<", $in_file) or confess "Cannot open $in_file for reading: $!";
    my $l = <$fh>;
    chomp $l;
    $l =~ s/\r//;

    if ($l !~ /^>?\S+:(\d+):(\d+);\s+Interrogation_Position=\d+;\s+Antisense;\n([ACGT]+)/s)
    {
	close($fh);
	return undef;
    }
    seek($fh, 0, SEEK_SET);

    my $out;
    open($out, ">", $out_file) or confess "Cannot open $out for writing: $!";

    while (<$fh>)
    {
	chomp;

	if ($_ =~ /^>?\S+:(\d+):(\d+);\s+Interrogation_Position=\d+;\s+Antisense;\n([ACGT]+)/s)
	{
	    if (length($3) < 15)
	    {
		close($fh);
		confess "Bad length at line $. of $in_file";
	    }
	    print $out "$1\_$2\t$3\n";
	}
	else
	{
	    confess "Bad input at line $. of $in_file";
	}
    }
    close($out);
    close($fh);
    return 1;
}

#
# Our "native" format, used for passing through pre-parsed data.
#
sub parse_probe_format_3
{
    my($self, $in_file, $out_file) = @_;

    my($fh);

    open($fh, "<", $in_file) or confess "Cannot open $in_file for reading: $!";
    my $l = <$fh>;
    chomp $l;
    $l =~ s/\r//;

    if ($l !~ /^\d+_\d+\t[ACGT]+$/)
    {
	close($fh);
	return undef;
    }
    seek($fh, 0, SEEK_SET);

    my $out;
    open($out, ">", $out_file) or confess "Cannot open $out for writing: $!";

    while (<$fh>)
    {
	if ($_ =~ /^\d+_\d+\t[ACGT]+$/)
	{
	    print $out $_;
	}
	else
	{
	    confess "Bad input at line $. of $in_file";
	}
    }
    close($out);
    close($fh);
    return 1;
}

sub compute_probe_to_peg
{
    my($self, $probes) = @_;

    my $my_probes = catfile($self->expr_dir, "probes.in");
    
    copy($probes, $my_probes) or confess "Cannot copy $probes to $my_probes: $!";

    my $probes_fasta = catfile($self->expr_dir, "probes.fasta");

    #
    # Attempt to translate probe file.
    #
    my $success;
    for my $meth (qw(parse_probe_format_1 parse_probe_format_2 parse_probe_format_3))
    {
	if ($self->$meth($my_probes, $probes_fasta))
	{
	    print STDERR "Translated $probes to $probes_fasta using $meth\n";
	    $success = 1;
	    last;
	}
	else
	{
	    print STDERR "Failed to translate $probes to $probes_fasta using $meth\n";
	}
    }
    if (!$success)
    {
	confess "Could not translate $probes\n";
    }

    my $peg_probe_table = catfile($self->expr_dir, 'peg.probe.table');
    my $probe_occ_table = catfile($self->expr_dir, 'probe.occ.table');

    my $feature_dir = catfile($self->genome_dir, "Features");
    my @tbls;
    for my $ftype (qw(peg rna))
    {
	my $tfile = catfile($feature_dir, $ftype, 'tbl');
	if (-f $tfile)
	{
	    push(@tbls, $tfile);
	}
    }
    if (@tbls == 0)
    {
	confess "Could not find any tbl files in $feature_dir";
    }

    system_with_redirect([executable_for("make_probes_to_genes"),
			  $probes_fasta,
			  catfile($self->genome_dir, 'contigs'),
			  $tbls[0],
			  $peg_probe_table,
			  $probe_occ_table,
			  @tbls[1..$#tbls],
			  ],
		         { stderr => catfile($self->expr_dir, 'problems') });
			 

    system_with_redirect([executable_for("remove_multiple_occurring_probes"),
			  $peg_probe_table,
			  $probe_occ_table],
		          { stdout => catfile($self->expr_dir, 'peg.probe.table.no.multiple') } );

    $self->make_missing_probes($peg_probe_table, $probes_fasta,
			       catfile($self->expr_dir, 'probe.no.match'));
}

sub make_missing_probes
{
    my($self, $probe_table, $probes, $output) = @_;
    open(MATCH,"<", $probe_table) or die "Cannot open $probe_table: $!";
    open(PROBES,"<", $probes) or die "Cannot open $probes: $!";
    open(OUTPUT, ">", $output) or die "Cannot open $output: $!";
    my %locations;
    while(<MATCH>)
    {
	chomp;
	my($peg,$loc)=split "\t";
	$locations{$loc} = $peg;
    }
    
    while(<PROBES>)
    {
	chomp;
	my($loc,$seq) = split "\t";
	print OUTPUT $loc, "\n" if ! exists $locations{$loc};
    }
    close(MATCH);
    close(PROBES);
    close(OUTPUT);
}

#
# we don't copy the experiment files in here because
# they may be very large. This may change.
#
# We do copy the cdf.
#
sub compute_rma_normalized
{
    my($self, $cdf_file, $expt_dir) = @_;

    my $my_cdf = catfile($self->expr_dir, "expr.cdf");
    copy($cdf_file, $my_cdf) or confess "Cannot copy $cdf_file to $my_cdf: $!";

    #
    # We need to build the R library for this cdf.
    #
    my($fh, $tempfile) = tempfile();
#m = make.cdf.package("S_aureus.cdf", cdf.path="..",packagename="foo",package.path="/tmp")

    my $cdf_path = $self->expr_dir;
    my $libdir = catfile($self->expr_dir, "r_lib");
    -d $libdir or mkdir $libdir;
    my $pkgdir = catfile($self->expr_dir, "r_pkg");
    -d $pkgdir or mkdir $pkgdir;

    print $fh "library(makecdfenv);\n";
    print $fh qq(make.cdf.package("expr.cdf", cdf.path="$cdf_path", packagename="datacdf", package.path="$pkgdir", species="genome name");\n);
    close($fh);
    system("cat", $tempfile);
    system("Rscript", $tempfile);
    system("R", "CMD", "INSTALL", "-l", $libdir, "$pkgdir/datacdf");

    local($ENV{R_LIBS}) = $libdir;
    my @cmd = (executable_for("RunRMA"),
	       "data",
	       catfile($self->expr_dir, "peg.probe.table"),
	       catfile($self->expr_dir, "probe.no.match"),
	       $expt_dir,
	       $self->expr_dir);
    
    my $rc = system_with_redirect(\@cmd);
#    my $rc = system_with_redirect(\@cmd, { stderr => "rma.stderr" });
    if ($rc != 0)
    {
	confess("Error running RMA: rc=$rc cmd=@cmd");
    }
    my $output = catfile($self->expr_dir, "rma_normalized.tab");
    if (! -f $output)
    {
	confess("Output file $output was not generated");
    }
}

sub compute_atomic_regulons
{
    my($self) = @_;

    my $coreg_clusters = catfile($self->expr_dir, "coregulated.clusters");
    my $coreg_subsys = catfile($self->expr_dir, "coregulated.subsys");
    my $merged_clusters = catfile($self->expr_dir, "merged.clusters");
}

sub get_experiment_names
{
    my($self) = @_;
    my $f = catfile($self->expr_dir, "experiment.names");
    my $fh;
    open($fh, "<", $f) or confess "Could not open $f: $!";
    my @out = map { chomp; my($num, $name) = split(/\t/); $name } <$fh>;
    close($fh);
    return @out;
}

sub get_experiment_on_off_calls
{
    my($self, $expt) = @_;

    my $f= catfile($self->expr_dir, "final_on_off_calls.txt");
    my $fh;
    open($fh, "<", $f) or confess "Could not open $f: $!";
    my $names = <$fh>;
    chomp $names;
    my @names = split(/\t/, $names);
    my $idx = 0;
    my $expt_idx;
    foreach my $n (@names)
    {
	if ($n eq $expt)
	{
	    $expt_idx = $idx;
	    last;
	}
	$idx++;
    }
    if (!defined($expt_idx))
    {
	confess("Could not find experiment $expt in $f");
    }

    my $calls = {};
    while (<$fh>)
    {
	chomp;
	my($peg, @calls) = split(/\t/);
	#
	# +1 because the row[0] element is the peg, and our index is
	# zero-based.
	#
	$calls->{$peg} = $calls[$expt_idx + 1];
    }

    close($fh);
    return($calls);
	
}

=head3 save_model_gene_activity

    $e->save_model_gene_activity($data)

Save the results of a modeling run for a given experiment.

$data is of the form { experiment_id => $data_hash }

=cut

sub save_model_gene_activity
{
    my($self, $data) = @_;
}

1;



MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3