[Bio] / FigKernelScripts / extract_subset_from_sprout_data.pl Repository:
ViewVC logotype

View of /FigKernelScripts/extract_subset_from_sprout_data.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Mon Dec 5 18:56:37 2005 UTC (13 years, 11 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, caBIG-05Apr06-00, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, caBIG-13Feb06-00, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
Changes since 1.1: +17 -0 lines
Add license words.

#!/Users/fig/FIGdisk/env/mac/bin/perl
#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#


BEGIN {
    @INC = qw(
	/Users/fig/FIGdisk/env/mac/lib/perl5/5.8.4/darwin-2level
	/Users/fig/FIGdisk/env/mac/lib/perl5/5.8.4/darwin-2level
	/Users/fig/FIGdisk/env/mac/lib/perl5/5.8.4
	/Users/fig/FIGdisk/env/mac/lib/perl5/site_perl/5.8.4/darwin-2level
	/Users/fig/FIGdisk/env/mac/lib/perl5/site_perl/5.8.4/darwin-2level
	/Users/fig/FIGdisk/env/mac/lib/perl5/site_perl/5.8.4
	/Users/fig/FIGdisk/env/mac/lib/perl5/site_perl/5.8.4/darwin-2level
	/Users/fig/FIGdisk/env/mac/lib/perl5/site_perl/5.8.4
	/Users/fig/FIGdisk/env/mac/lib/perl5/site_perl
	/Users/fig/FIGdisk/config
	/Users/fig/FIGdisk/dist/releases/current/mac/lib
	/Users/fig/FIGdisk/dist/releases/current/mac/lib/FigKernelPackages
	/Users/fig/FIGdisk/env/mac/lib/perl5/5.8.4/darwin-2level
	/Users/fig/FIGdisk/env/mac/lib/perl5/5.8.4
	/Users/fig/FIGdisk/env/mac/lib/perl5/site_perl/5.8.4/darwin-2level
	/Users/fig/FIGdisk/env/mac/lib/perl5/site_perl/5.8.4
	/Users/fig/FIGdisk/env/mac/lib/perl5/site_perl
);
}
use Data::Dumper;
use Carp;
# Following block is expanded by switch_to_release to add use lib directives
# to point at the correct locations in the release directory.
#BEGIN switch_to_release generated code
use lib '/Users/fig/FIGdisk/dist/releases/dev/mac/lib';
use lib '/Users/fig/FIGdisk/dist/releases/dev/mac/lib/FigKernelPackages';
#END switch_to_release generated code

use lib "/Users/fig/FIGdisk/config";
use FIG_Config;

#### END tool_hdr ####

$usage = "usage: extract_subset_from_sprout_data FromDir ToDir GenomesFile";

(
 ($fromD    = shift @ARGV) &&
 ($toD      = shift @ARGV) &&
 ($genomesF = shift @ARGV)
)
    || die $usage;

opendir(FROM,$fromD) || die "$fromD could not be opened";
(! -d $toD)          || die "$toD already exists; delete it and rerun";
mkdir($toD,0777)     || die "could not make $toD";
(-s $genomesF)       || die "you gave an invalid genomes file ($genomesF)";

%genomes = map { $_ =~ /^(\S+)/; $1 => 1 } `cat $genomesF`;
%old     = map { $_ =~ /^(\S+)/; $1 => 1 } `cat $fromD/Genome`;
foreach $genome (keys(%genomes))
{
    if (! $old{$genome})
    {
	print STDERR "FATAL: $genome is not in $fromD/Genome\n";
	$must_die = 1;
    }
}

if ($must_die) { die "Fix your genomes file and retry" }

@files = grep { $_ !~ /^\./ } readdir(FROM);
closedir(FROM);

&handle_annotations($fromD,$toD,\%genomes);
&handle_contigs($fromD,$toD,\%genomes);
&handle_sscells($fromD,$toD,\%genomes);

&handle_just_copy($fromD,$toD,["Diagram",
			       "OccursInSubsystem",
			       "Property",
			       "Role",
			       "RoleOccursIn",
			       "RoleName",
			       "RoleOccursIn",
			       "Source",
			       "SourceURL",
			       "SproutUser",
			       "Subsystem",
			       "UserAccess"
			       ]);

&handle_standard($fromD,$toD,\%genomes,["ComesFrom",
					"ContainsFeature",
					"Feature",
					"FeatureAlias",
					"FeatureLink",
					"FeatureTranslation",
					"FeatureUpstream",
					"Genome",
					"HasProperty",
					"IsBidirectionalBestHitOf",
					"IsClusteredOnChromosomeWith",
					"IsLocatedIn",
					"ParticipatesIn"
					]);
					

sub handle_annotations {
    my($fromD,$toD,$genomes) = @_;

    my %to_keep;

    if (open(TIN,"<$fromD/IsTargetOfAnnotation"))
    {
	open(TOUT,">$toD/IsTargetOfAnnotation") || die "could not open $toD/IsTargetOfAnnotation";
	while (defined($_ = <TIN>))
	{
	    if ($_ =~ /^fig\|(\d+\.\d+)\.\S+\t(\S+)/)
	    {
		if ($genomes->{$1})
		{
		    $to_keep{$2} = 1;
		    print TOUT $_;
		}
	    }
	}
	close(TIN);
	close(TOUT);
    }
    else
    {
	print STDERR "No annotations seem to exist\n";
	return;
    }

    if (open(MADEIN,"<$fromD/MadeAnnotation"))
    {
	open(MADEOUT,">$toD/MadeAnnotation") || die "could not open $toD/MadeAnnotation";
	while (defined($_ = <MADEIN>))
	{
	    if ($_ =~ /^\S+\t(\S+)/)
	    {
		if ($to_keep{$1})
		{
		    print MADEOUT $_;
		}
	    }
	}
	close(MADEIN);
	close(MADEOUT);
    }

    if (open(ANNIN,"<$fromD/Annotation"))
    {
	open(ANNOUT,">$toD/Annotation") || die "could not open $toD/Annotation";
	while (defined($_ = <ANNIN>))
	{
	    if ($_ =~ /^(\S+)/)
	    {
		if ($to_keep{$1})
		{
		    print ANNOUT $_;
		}
	    }
	}
	close(ANNIN);
	close(ANNOUT);
    }
}

sub handle_contigs {
    my($fromD,$toD,$genomes) = @_;
    
    my %to_keep;

    if (open(HCIN,"<$fromD/HasContig"))
    {
	open(HCOUT,">$toD/HasContig") || die "could not open $toD/HasContig";
	while (defined($_ = <HCIN>))
	{
	    if ($_ =~ /^(\S+)\t(\S+)/)
	    {
		if ($genomes->{$1})
		{
		    $to_keep{$2} = 1;
		    print HCOUT $_;
		}
	    }
	}
	close(HCIN);
	close(HCOUT);
    }
    
    if (open(MADEIN,"<$fromD/IsMadeUpOf"))
    {
	open(MADEOUT,">$toD/IsMadeUpOf") || die "could not open $toD/IsMadeUpOf";
	while (defined($_ = <MADEIN>))
	{
	    if ($_ =~ /^(\S+)\t(\S+)/)
	    {
		if ($to_keep{$1})
		{
		    $to_keep{$2} = 1;
		    print MADEOUT $_;
		}
	    }
	}
	close(MADEIN);
	close(MADEOUT);
    }

    if (open(MADEIN,"<$fromD/Sequence"))
    {
	open(MADEOUT,">$toD/Sequence") || die "could not open $toD/Sequence";
	while (defined($_ = <MADEIN>))
	{
	    if ($_ =~ /^(\S+)\t(\S+)/)
	    {
		if ($to_keep{$1})
		{
		    print MADEOUT $_;
		}
	    }
	}
	close(MADEIN);
	close(MADEOUT);
    }

    if (open(CONTIGIN,"<$fromD/Contig"))
    {
	open(CONTIGOUT,">$toD/Contig") || die "could not open $toD/Contig";
	while (defined($_ = <CONTIGIN>))
	{
	    if ($_ =~ /^(\S+)/)
	    {
		if ($to_keep{$1})
		{
		    print CONTIGOUT $_;
		}
	    }
	}
	close(CONTIGIN);
	close(CONTIGOUT);
    }
}

sub handle_sscells {
    my($fromD,$toD,$genomes) = @_;
    
    my %to_keep;

    if (open(IGOIN,"<$fromD/IsGenomeOf"))
    {
	open(IGOOUT,">$toD/IsGenomeOf") || die "could not open $toD/IsGenomeOf";
	while (defined($_ = <IGOIN>))
	{
	    if ($_ =~ /^(\S+)\t(\S+)/)
	    {
		if ($genomes->{$1})
		{
		    $to_keep{$2} = 1;
		    print IGOOUT $_;
		}
	    }
	}
	close(IGOIN);
	close(IGOOUT);
    }
    
    if (open(HASIN,"<$fromD/HasSSCell"))
    {
	open(HASOUT,">$toD/HasSSCell") || die "could not open $toD/HasSSCell";
	while (defined($_ = <HASIN>))
	{
	    if ($_ =~ /^\S+\t(\S+)/)
	    {
		if ($to_keep{$1})
		{
		    print HASOUT $_;
		}
	    }
	}
	close(HASIN);
	close(HASOUT);
    }

    if (open(IROIN,"<$fromD/IsRoleOf"))
    {
	open(IROOUT,">$toD/IsRoleOf") || die "could not open $toD/IsRoleOf";
	while (defined($_ = <IROIN>))
	{
	    if ($_ =~ /^[^\t]+\t(\S+)/)
	    {
		if ($to_keep{$1})
		{
		    print IROOUT $_;
		}
	    }
	}
	close(IROIN);
	close(IROOUT);
    }

    if (open(SSCIN,"<$fromD/SSCell"))
    {
	open(SSCOUT,">$toD/SSCell") || die "could not open $toD/SSCell";
	while (defined($_ = <SSCIN>))
	{
	    if ($_ =~ /^(\S+)/)
	    {
		if ($to_keep{$1})
		{
		    print SSCOUT $_;
		}
	    }
	}
	close(SSCIN);
	close(SSCOUT);
    }
}

sub handle_just_copy {
    my($fromD,$toD,$files) = @_;
    my $file;

    foreach $file (@$files)
    {
	if (open(FROM,"<$fromD/$file"))
	{
	    open(TO,">$toD/$file") || die "could not open $toD/$file";
	    while (defined($_ = <FROM>))
	    {
		print TO $_;
	    }
	    close(TO);
	    close(FROM);
	}
    }
}

sub handle_standard {
    my($fromD,$toD,$genomes,$files) = @_;
    my $file;

    foreach $file (@$files)
    {
	if (open(FROM,"<$fromD/$file"))
	{
	    open(TO,">$toD/$file") || die "could not open $toD/$file";
	    while (defined($_ = <FROM>))
	    {
		if ($_ =~ /fig\|(\d+\.\d+)/)
		{
		    if ($genomes->{$1})
		    {
			print TO $_;
		    }
		}
		elsif (($_ =~ /^(\S+)/) && $genomes->{$1})
		{
		    print TO $_;
		}
	    }
	    close(TO);
	    close(FROM);
	}
    }
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3