diff -uNr meme_4.6.1/scripts/meme2meme.pl.in meme_4.6.1_patch_1/scripts/meme2meme.pl.in
--- meme_4.6.1/scripts/meme2meme.pl.in 2011-02-04 05:23:03.000000000 +1000
+++ meme_4.6.1_patch_1/scripts/meme2meme.pl.in 2011-05-23 16:09:52.274185111 +1000
@@ -490,18 +490,30 @@
while ($line = <$fp>) {
if ($line =~ m/Background letter frequencies \(from(\s+[^\)]*)?/) {
my $source = $1;
+ my $alphindex = 0;
+ my @freqs = ();
$source =~ s/^\s*//; #trim left
# read the background frequencies
- do {
+ while ($alphindex < scalar(@alphabet)) {
$line = <$fp>;
- } while (defined($line) && $line =~ m/^\s*$/);# skip empty lines
- die("Expected background frequencies on line following landmark \"Background letter frequencies (\" but got EOF.") unless defined($line);
- my @freqs = split(/\s+/, $line);
- for (my $i = 0; $i < scalar(@alphabet); $i++) {
- die("Failed parsing background from \"$line\" in \"$file\", letter " . uc($freqs[$i*2]). " doesn't match expected " . $alphabet[$i] . ".\n")
- if (uc($freqs[$i*2]) ne $alphabet[$i]);
- my $freq = parse_double($freqs[$i*2 + 1]);
- $bg{$alphabet[$i]} = $freq;
+ die("Encountered EOF while reading background frequencies.")
+ unless defined $line;
+ chomp($line); # remove EOL
+ $line =~ s/^\s*//; # trim left
+ $line =~ s/\s*$//; # trim right
+ my @line_freqs = split(/\s+/, $line);
+ push(@freqs, @line_freqs);
+ while(scalar(@freqs) > 2 and $alphindex < scalar(@alphabet)) {
+ my $letter = uc(shift(@freqs));
+ if ($letter ne $alphabet[$alphindex]) {
+ die("Failed parsing background from \"$line\" in \"$file\", " .
+ "letter \"" . $letter. "\" doesn't match expected " .
+ $alphabet[$alphindex] . ".\n");
+ }
+ my $freq = parse_double(shift(@freqs));
+ $bg{$alphabet[$alphindex]} = $freq;
+ $alphindex++;
+ }
}
$bg{source} = $source;
$bg{dna} = $is_dna;
diff -uNr meme_4.6.1/scripts/meme-chip.pl.in meme_4.6.1_patch_1/scripts/meme-chip.pl.in
--- meme_4.6.1/scripts/meme-chip.pl.in 2011-03-30 08:36:37.000000000 +1000
+++ meme_4.6.1_patch_1/scripts/meme-chip.pl.in 2011-05-23 16:50:42.064184029 +1000
@@ -9,7 +9,7 @@
use Fcntl qw(O_CREAT O_WRONLY O_TRUNC SEEK_SET);
use File::Basename qw(fileparse);
use File::Temp qw(tempfile);
-use File::Spec::Functions qw(abs2rel catfile catdir);
+use File::Spec::Functions qw(abs2rel catfile catdir splitdir tmpdir);
use Getopt::Long;
use HTML::Template;
use List::Util qw(min max);
@@ -34,6 +34,8 @@
-ccut : maximum size of a sequence before it is cut down to a centered section
-desc : description of the job
-fdesc : file containing plain text description of the job
+ -run-mast : run MAST - motif alignment & search tool
+ -run-ama : run AMA - Average motif affinity.
-noecho : don't echo the commands run
-tar : create a tar.gz file of the outputs
-help : display this help message
@@ -70,6 +72,9 @@
my $bindir = '@BINDIR@';
my $site_url = '@SITE_URL@';
my $template_file = '@APPCONFIGDIR@/meme-chip.tmpl';
+my $tmpdir = '@TMP_DIR@';
+# use the perl default if none is supplied or the replace fails
+$tmpdir = &tmpdir() if ($tmpdir eq '' || $tmpdir =~ m/^\@TMP[_]DIR\@$/);
# Required Argument
my $sequences;
@@ -78,6 +83,8 @@
my $help = 0; # FALSE
my $echo = 1; # TRUE
my $tar = 0; # FALSE
+my $run_mast = 0; # FALSE
+my $run_ama = 0; # FALSE
my $appverbosity = 1; #all the applications write to stderr currently so had to disable this
my $clobber = 1; # TRUE
my $outdir = 'memechip_out';
@@ -101,7 +108,6 @@
# Derived Globals
my $outfile;
-my $tarfile;
my $stderr_txt;
my $stdout_txt;
my @tomtom_dbnames = ();
@@ -120,14 +126,16 @@
eval {
# redirect stderr to a temp file as we want to log it
my $olderr;
- my ($tmperr, $tnmerr) = tempfile(UNLINK => 1);
- open $olderr, ">&STDERR" or die("Can't dup STDERR: $!");
- open STDERR, '>', $tnmerr or die("Can't dredirect STDERR to temp file: $!");
+ my $tmperr = tempfile('GetOptions_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1);
+ open($olderr, ">&STDERR") or die("Can't dup STDERR: $!");
+ open(STDERR, '>&', $tmperr) or die("Can't redirect STDERR to temp file: $!");
# parse options
$options_success = GetOptions(
'help|?' => \$help,
'noecho' => sub {$echo = 0},
'tar' => \$tar,
+ 'run-mast' => \$run_mast,
+ 'run-ama' => \$run_ama,
'o=s' => sub {$clobber = 0; shift; $outdir = shift},
'oc=s' => \$outdir,
'desc=s' => \$desc,
@@ -153,7 +161,7 @@
'meme-pal' => \$meme_palindromes,
);
# reset STDERR
- open STDERR, ">&", $olderr, or die("Can't reset STDERR: $!");
+ open(STDERR, ">&", $olderr) or die("Can't reset STDERR: $!");
# slurp errors
seek($tmperr, SEEK_SET, 0);
chomp($options_err = do {local ($/); <$tmperr>});
@@ -256,7 +264,6 @@
}
# calculate some file names
$outfile = catfile($outdir, 'index.html');
- $tarfile = catfile($outdir, 'meme-chip.tar.gz');
$stdout_txt = catfile($outdir,'stdout.txt');
$stderr_txt = catfile($outdir, 'stderr.txt');
};
@@ -271,8 +278,7 @@
&main();
&write_html("", 0); # html to go in tar
if ($tar) {
- &tar_output();
- my $tarname = fileparse($tarfile);
+ my $tarname = &tar_output();
&write_html("", 0, undef, $tarname); # index which links to tar
}
};
@@ -340,8 +346,18 @@
$i--;
}
}
+ # we want to tar including the outdir to avoid a tarbomb
+ # find the real name of the outdir (user could have passed '.')
+ my $folder = (splitdir(abs_path($outdir)))[-1];
+ my $folder_dir = abs_path(catdir($outdir, '..'));
+ # append the folder to the outfiles
+ for (my $i = 0; $i < scalar(@outfiles); $i++) {
+ $outfiles[$i] = catfile($folder, $outfiles[$i]);
+ }
+ my $tarname = $folder . ".tar.gz";
+ my $tarfile = catfile($outdir, $tarname);
# run tar
- system('tar', '-czf', $tarfile, '-C', $outdir, @outfiles);
+ system('tar', '-czf', $tarfile, '-C', $folder_dir, @outfiles);
my $errmsg = '';
if ($? == -1) {
$errmsg = "Failed to execute tar: $!";
@@ -352,6 +368,7 @@
$errmsg = sprintf("tar exited with value %d indicating failure.", $? >> 8);
}
die($errmsg) if $errmsg;
+ return $tarname;
}
################################################################################
@@ -470,28 +487,32 @@
##################################################################
# Run MAST on MEME motifs
##################################################################
- my $mast_prog = "mast";
- my $mast_outdir = "meme_mast_out";
- my @mast_args = ('-oc', catdir($outdir, $mast_outdir), $meme_motifs, $sequences, '-ev', $num_centered_sequences);
- push(@mast_args, '-bfile', $bfile) if defined($bfile);
- push(@mast_args,'-nostatus') unless $appverbosity >= 2;
- my $mast_outputs = ["mast.html", "mast.txt", "mast.xml"];
- my $mast_comment = 'Predicted locations of all MEME motifs (p < 0.0001) in the input sequences.';
- &report_and_run(\@report_commands, \@report_outputs,
- $level, $bindir, $mast_prog, \@mast_args, $mast_outputs, $mast_outdir, $mast_comment);
+ if ($run_mast) {
+ my $mast_prog = "mast";
+ my $mast_outdir = "meme_mast_out";
+ my @mast_args = ('-oc', catdir($outdir, $mast_outdir), $meme_motifs, $sequences, '-ev', $num_centered_sequences);
+ push(@mast_args, '-bfile', $bfile) if defined($bfile);
+ push(@mast_args,'-nostatus') unless $appverbosity >= 2;
+ my $mast_outputs = ["mast.html", "mast.txt", "mast.xml"];
+ my $mast_comment = 'Predicted locations of all MEME motifs (p < 0.0001) in the input sequences.';
+ &report_and_run(\@report_commands, \@report_outputs,
+ $level, $bindir, $mast_prog, \@mast_args, $mast_outputs, $mast_outdir, $mast_comment);
+ }
##################################################################
# Run AMA on MEME motifs
##################################################################
- my $ama_prog = "ama";
- my $ama_outdir = "meme_ama_out";
- my @ama_args = ('--verbosity', $appverbosity, '--oc', catdir($outdir, $ama_outdir));
- push(@ama_args, '--sdbg', 0) unless defined($bfile);
- push(@ama_args, $meme_motifs, $sequences);
- push(@ama_args, $bfile) if defined($bfile);
- my $ama_outputs = ["", "ama.txt", "ama.xml"];
- my $ama_comment = 'Estimated binding affinity of each MEME motif to each input sequence.';
- &report_and_run(\@report_commands, \@report_outputs,
- $level, $bindir, $ama_prog, \@ama_args, $ama_outputs, $ama_outdir, $ama_comment);
+ if ($run_ama) {
+ my $ama_prog = "ama";
+ my $ama_outdir = "meme_ama_out";
+ my @ama_args = ('--verbosity', $appverbosity, '--oc', catdir($outdir, $ama_outdir));
+ push(@ama_args, '--sdbg', 0) unless defined($bfile);
+ push(@ama_args, $meme_motifs, $sequences);
+ push(@ama_args, $bfile) if defined($bfile);
+ my $ama_outputs = ["", "ama.txt", "ama.xml"];
+ my $ama_comment = 'Estimated binding affinity of each MEME motif to each input sequence.';
+ &report_and_run(\@report_commands, \@report_outputs,
+ $level, $bindir, $ama_prog, \@ama_args, $ama_outputs, $ama_outdir, $ama_comment);
+ }
$level--;
}
@@ -532,28 +553,32 @@
##################################################################
# Run MAST on DREME motifs
##################################################################
- my $mast_prog = "mast";
- my $mast_outdir = "dreme_mast_out";
- my @mast_args = ('-oc', catdir($outdir, $mast_outdir), $dreme_motifs, $sequences, '-ev', $num_centered_sequences);
- push(@mast_args, '-bfile', $bfile) if defined($bfile);
- push(@mast_args, '-nostatus') unless $appverbosity >= 2;
- my $mast_outputs = ["mast.html", "mast.txt", "mast.xml"];
- my $mast_comment = 'Predicted locations of all DREME motifs (p < 0.0001) in the input sequences.';
- &report_and_run(\@report_commands, \@report_outputs,
- $level, $bindir, $mast_prog, \@mast_args, $mast_outputs, $mast_outdir, $mast_comment);
+ if ($run_mast) {
+ my $mast_prog = "mast";
+ my $mast_outdir = "dreme_mast_out";
+ my @mast_args = ('-oc', catdir($outdir, $mast_outdir), $dreme_motifs, $sequences, '-ev', $num_centered_sequences);
+ push(@mast_args, '-bfile', $bfile) if defined($bfile);
+ push(@mast_args, '-nostatus') unless $appverbosity >= 2;
+ my $mast_outputs = ["mast.html", "mast.txt", "mast.xml"];
+ my $mast_comment = 'Predicted locations of all DREME motifs (p < 0.0001) in the input sequences.';
+ &report_and_run(\@report_commands, \@report_outputs,
+ $level, $bindir, $mast_prog, \@mast_args, $mast_outputs, $mast_outdir, $mast_comment);
+ }
##################################################################
# Run AMA on DREME motifs
##################################################################
- my $ama_prog = "ama";
- my $ama_outdir = "dreme_ama_out";
- my @ama_args = ('--verbosity', $appverbosity, '--oc', catdir($outdir, $ama_outdir));
- push(@ama_args, '--sdbg', 0) unless defined($bfile);
- push(@ama_args, $dreme_motifs, $sequences);
- push(@ama_args, $bfile) if defined($bfile);
- my $ama_outputs = ["", "ama.txt", "ama.xml"];
- my $ama_comment = 'Estimated binding affinity of each DREME motif to each input sequence.';
- &report_and_run(\@report_commands, \@report_outputs,
- $level, $bindir, $ama_prog, \@ama_args, $ama_outputs, $ama_outdir, $ama_comment);
+ if ($run_ama) {
+ my $ama_prog = "ama";
+ my $ama_outdir = "dreme_ama_out";
+ my @ama_args = ('--verbosity', $appverbosity, '--oc', catdir($outdir, $ama_outdir));
+ push(@ama_args, '--sdbg', 0) unless defined($bfile);
+ push(@ama_args, $dreme_motifs, $sequences);
+ push(@ama_args, $bfile) if defined($bfile);
+ my $ama_outputs = ["", "ama.txt", "ama.xml"];
+ my $ama_comment = 'Estimated binding affinity of each DREME motif to each input sequence.';
+ &report_and_run(\@report_commands, \@report_outputs,
+ $level, $bindir, $ama_prog, \@ama_args, $ama_outputs, $ama_outdir, $ama_comment);
+ }
$level--;
}
@@ -589,40 +614,37 @@
my ($filename) = @_;
# redirect stdout to a temp file
my $oldout;
- my ($tmpout, $tnmout) = tempfile(UNLINK => 1);
- open $oldout, ">&STDOUT" or die("Can't dup STDOUT: $!");
- open STDOUT, '>', $tnmout or die("Can't redirect STDOUT to temp file: $!");
+ my $tmpout = tempfile('getsize_stdout_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1);
+ open($oldout, ">&STDOUT") or die("Can't dup STDOUT: $!");
+ open(STDOUT, '>&', $tmpout) or die("Can't redirect STDOUT to temp file: $!");
# redirect stderr to a temp file
my $olderr;
- my ($tmperr, $tnmerr) = tempfile(UNLINK => 1);
- open $olderr, ">&STDERR" or die("Can't dup STDERR: $!");
- open STDERR, '>', $tnmerr or die("Can't dredirect STDERR to temp file: $!");
+ my $tmperr = tempfile('getsize_stderr_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1);
+ open($olderr, ">&STDERR") or die("Can't dup STDERR: $!");
+ open(STDERR, '>&', $tmperr) or die("Can't dredirect STDERR to temp file: $!");
# run command
system($bindir.'/getsize', $filename);
# copy status
my $status = $?;
# reset stderr
- open STDERR, ">&", $olderr, or die("Can't reset STDERR: $!");
+ open(STDERR, ">&", $olderr) or die("Can't reset STDERR: $!");
# reset stdout
- open STDOUT, ">&", $oldout or die("Can't reset STDOUT: $!");
+ open(STDOUT, ">&", $oldout) or die("Can't reset STDOUT: $!");
# slurp output and errors
- my $lineterm = $/;
- undef $/;
- seek($tmpout, SEEK_SET, 0);
- my $output = <$tmpout>;
+ seek($tmpout, 0, SEEK_SET);
+ my $output = do {local $/ = undef; <$tmpout>};
close($tmpout);
- seek($tmperr, SEEK_SET, 0);
- my $errors = <$tmperr>;
+ seek($tmperr, 0, SEEK_SET);
+ my $errors = do {local $/ = undef; <$tmperr>};
close($tmperr);
- $/ = $lineterm;
#log errors
if ($errors) {
- my $msg = "'getsize $filename': ".$errors;
+ my $msg = "'getsize $filename': " . $errors;
$logger->error($msg) if ($logger);
- print STDERR $msg, "\n";
+ print(STDERR $msg, "\n");
}
# check status
diff -uNr meme_4.6.1/scripts/meme-chip_webservice.pl.in meme_4.6.1_patch_1/scripts/meme-chip_webservice.pl.in
--- meme_4.6.1/scripts/meme-chip_webservice.pl.in 2011-03-30 08:35:54.000000000 +1000
+++ meme_4.6.1_patch_1/scripts/meme-chip_webservice.pl.in 2011-05-23 16:37:38.394184374 +1000
@@ -23,6 +23,8 @@
meme-chip_webservice [options]
Options:
+ -run-mast : run MAST - motif alignment & search tool
+ -run-ama : run AMA - average motif affinity
-bfile : background file
MEME Specific Options:
@@ -45,6 +47,8 @@
my $log_file = 'memechip-log';
# Options
+my $run_mast = 0; # FALSE
+my $run_ama = 0; #FALSE
my $bfile = undef;
my $meme_mod = undef;
my $meme_minw = undef;
@@ -73,6 +77,8 @@
eval {
GetOptions(
+ 'run-mast' => \$run_mast,
+ 'run-ama' => \$run_ama,
'bfile=s' => \$bfile,
'meme-mod=s' => \$meme_mod,
'meme-minw=i' => \$meme_minw,
@@ -122,6 +128,8 @@
my $exe = catfile($bin_dir, 'meme-chip');
# prepare meme-chip arguments
my @args = ('-noecho', '-tar', '-oc', '.', '-meme-time', $MAXTIME);
+push(@args, '-run-mast') if $run_mast;
+push(@args, '-run-ama') if $run_ama;
push(@args, '-fdesc', 'description') if (-e 'description');
push(@args, '-bfile', $bfile) if $bfile;
foreach my $db (@motif_dbs) {
diff -uNr meme_4.6.1/scripts/MemeWebUtils.pm.in meme_4.6.1_patch_1/scripts/MemeWebUtils.pm.in
--- meme_4.6.1/scripts/MemeWebUtils.pm.in 2011-03-10 08:31:41.000000000 +1000
+++ meme_4.6.1_patch_1/scripts/MemeWebUtils.pm.in 2011-05-23 16:50:42.064184029 +1000
@@ -13,22 +13,37 @@
valid_address valid_meme_version add_status_msg update_status loggable_args loggable_date write_invocation_log invoke);
use Cwd;
-use Fcntl qw(O_APPEND O_CREAT O_WRONLY SEEK_SET);
+use Fcntl qw(O_APPEND O_CREAT O_WRONLY O_TRUNC SEEK_SET);
use File::Basename qw(fileparse);
use File::Copy qw(copy);
-use File::Spec::Functions qw(catfile splitdir);
+use File::Spec::Functions qw(catfile splitdir tmpdir);
use File::Temp qw(tempfile);
use HTTP::Request::Common qw(GET);
use XML::Simple;
use HTML::PullParser;
use HTML::Template;
use Sys::Hostname;
+use Time::HiRes qw(gettimeofday tv_interval);
use lib qw(@PERLLIBDIR@);
use CatList qw(load_categories load_entry);
+# Setup logging
+my $logger = undef;
+eval {
+ require Log::Log4perl;
+ Log::Log4perl->import();
+};
+unless ($@) {
+ Log::Log4perl::init('@APPCONFIGDIR@/logging.conf');
+ $logger = Log::Log4perl->get_logger('meme.cgi.utils');
+}
+
my $template_dir = '@WEB_DIR@/cgi-bin';
my $service_invocation_log_dir = '@MEMELOGS@';
+my $tmpdir = '@TMP_DIR@';
+# use the perl default if none is supplied or the replace fails
+$tmpdir = &tmpdir() if ($tmpdir eq '' || $tmpdir =~ m/^\@TMP[_]DIR\@$/);
##############################################################################
# Functions
##############################################################################
@@ -98,6 +113,17 @@
sub is_numeric { defined &getnum($_[0]) }
#
+# Output large integers with commas
+#
+#
+sub commify {
+ my $input = shift;
+ $input = reverse $input;
+ $input =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
+ return reverse $input;
+}
+
+#
# get the alphabet of a string: DNA or PROTEIN
#
# Used in
@@ -227,8 +253,8 @@
push(@found_files, $entry) if (defined($file) && -e $file && -s $file);
}
- my $fh; # I'm suspicious that the 0777 may be too permissive (but that's what we had before)
- sysopen($fh, $output_file, O_CREAT | O_WRONLY, 0777) or die("Failed to open \"$output_file\".");
+ my $fh;
+ sysopen($fh, $output_file, O_CREAT | O_WRONLY | O_TRUNC) or die("Failed to open \"$output_file\".");
my $template = HTML::Template->new(filename => "$template_dir/job_status.tmpl");
$template->param(program => $program, refresh => $refresh, files => \@found_files, msgs => $msg_list, status => $status);
print $fh $template->output;
@@ -342,21 +368,36 @@
# PROG => program name
# BIN => program directory
# ARGS => reference to array of program arguments
-# IN_FILE => file to set as stdin
+# IN_FILE => file name or handle to set as stdin
# IN_VAR => variable (or reference to variable) to feed in as stdin
-# OUT_FILE => file to store stdout
+# IN_NAME => the displayed name for the source of stdin
+# ALL_FILE => file name or handle to store stdout and stderr
+# ALL_VAR => reference to variable to store stdout and stderr
+# ALL_NAME => the displayed name for the destination of output
+# OUT_FILE => file name or handle to store stdout
# OUT_VAR => reference to variable to store stdout
-# ERR_FILE => file to store stderr
+# OUT_NAME => the displayed name for the destination of stdout
+# ERR_FILE => file name or handle to store stderr
# ERR_VAR => reference to variable to store stderr
+# ERR_NAME => the displayed name for the destination of stderr
# CHECK_STATUS => true to die on bad status codes
+# TRUNCATE => true to truncate output files if they exist
# CMD => reference to store a human readable form of the command run
+# TIME => reference to store the running time in seconds (floating point)
+# TMPDIR => directory to create temporary files
#
sub invoke {
my %opts = @_;
+ my $logger = $opts{LOGGER};
+ $logger->trace("sub invoke") if $logger;
+ # output truncates?
+ my $dir = ($opts{TRUNCATE} ? '>' : '>>'); #direction
+ # temp file directory
+ my $tmpdir = ($opts{TMPDIR} ? $opts{TMPDIR} : &tmpdir());
# get program
my $prog = $opts{PROG};
- my $exe = (defined($opts{BIN}) ? catfile($opts{BIN}, $prog) : $prog);
- die("No program passed to _invoke") unless defined($prog);
+ die("No program passed to invoke") unless defined($prog);
+ my $exe = (defined($opts{BIN}) ? &catfile($opts{BIN}, $prog) : $prog);
# get args
my $args_ref = $opts{ARGS};
my @args = ();
@@ -364,85 +405,161 @@
@args = @{$args_ref};
}
# make command line for printing
- my $cmd = loggable_args($prog, @args);
+ my $cmd = &loggable_args($prog, @args);
+ # do redirection
+ my $display_name;
# check if we're redirecting stdin
my ($in_old, $in_tmp, $in_nam);
if (defined($opts{IN_FILE}) || defined($opts{IN_VAR})) {
- #save stdin
+ $logger->trace("invoke - redirecing stdin") if $logger;
+ # save stdin
open($in_old, "<&STDIN") or die("Can't dup STDIN: $!");
+ # redirect stdin
if (defined($opts{IN_FILE})) { # read stdin from specified file
- $in_nam = $opts{IN_FILE};
- $cmd .= ' < '.loggable_arg($in_nam);
+ if (ref($opts{IN_FILE})) { # file handle (we hope)
+ my $handle = $opts{IN_FILE};
+ open(STDIN, '<&', $handle) or die("Can't redirect STDIN: $!");
+ $display_name = 'input_file';
+ } else { # file name (we hope)
+ my $name = $opts{IN_FILE};
+ open(STDIN, '<', $name) or die("Can't redirect STDIN: $!");
+ $display_name = &loggable_arg($name);
+ }
} else { # read stdin from a temp file which we preload with the var
- # make a temporary file
- ($in_tmp, $in_nam) = tempfile(UNLINK => 1);
- # write the variable to the file
- my $var = $opts{IN_VAR};
+ $in_tmp = &tempfile('stdin_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); # make a temporary file
+ my $var = $opts{IN_VAR}; # variable could be passed as a ref or scalar
my $var_ref = (ref($var) ? $var : \$var);
- print $in_tmp ${$var_ref};
- $cmd .= ' < $input';
+ print $in_tmp ${$var_ref}; # write variable to file
+ seek($in_tmp, 0, SEEK_SET); # rewind file
+ open(STDIN, '<&', $in_tmp) or die("Can't redirect STDIN: $!");
+ $display_name = '$input';
}
- open(STDIN, '<', $in_nam) or die("Can't redirect STDIN: $!");
+ $display_name = $opts{IN_NAME} if defined($opts{IN_NAME});
+ $cmd .= ' < ' . $display_name;
}
- # check if we're redirecting stdout
- my ($out_old, $out_tmp, $out_nam);
- if (defined($opts{OUT_FILE}) || defined($opts{OUT_VAR})) {
- #save stdout
+ # check for output redirection
+ my ($out_old, $err_old);
+ my ($all_tmp, $out_tmp, $err_tmp);
+ if (defined($opts{ALL_FILE}) || defined($opts{ALL_VAR})) {
+ $logger->trace("invoke - redirecing output") if $logger;
+ # save stdout and stderr
open($out_old, ">&STDOUT") or die("Can't dup STDOUT: $!");
- if (defined($opts{OUT_FILE})) { # send stdout to specified file
- $out_nam = $opts{OUT_FILE};
- $cmd .= ' 1> '.loggable_arg($out_nam);
- } else { # send stdout to a temp file which we can read in to the var
- # make a temporary file
- ($out_tmp, $out_nam) = tempfile(UNLINK => 1);
- $cmd .= ' 1> $output';
- }
- open(STDOUT, '>', $out_nam) or die("Can't redirect STDOUT: $!");
- }
- # check if we're redirecting stderr
- my ($err_old, $err_tmp, $err_nam);
- if (defined($opts{ERR_FILE}) || defined($opts{ERR_VAR})) {
- #save stderr
open($err_old, ">&STDERR") or die("Can't dup STDERR: $!");
- if (defined($opts{ERR_FILE})) { # send stderr to specified file
- $err_nam = $opts{ERR_FILE};
- $cmd .= ' 2> '.loggable_arg($err_nam);
- } else { # send stderr to a temp file which we can read in to the var
- # make a temporary file
- ($err_tmp, $err_nam) = tempfile(UNLINK => 1);
- $cmd .= ' 2> $error';
+ # redirect stdout and stderr
+ if (defined($opts{ALL_FILE})) { # send output to specified file
+ truncate($opts{ALL_FILE}, 0) if ($opts{TRUNCATE});
+ if (ref($opts{ALL_FILE})) { # file handle (we hope)
+ my $handle = $opts{ALL_FILE};
+ open(STDOUT, '>>&', $handle) or die("Can't redirect STDOUT: $!");
+ open(STDERR, '>>&', $handle) or die("Can't redirect STDERR: $!");
+ $display_name = 'output_file';
+ } else { # file name (we hope)
+ my $name = $opts{ALL_FILE};
+ open(STDOUT, '>>', $name) or die("Can't redirect STDOUT: $!");
+ open(STDERR, '>>', $name) or die("Can't redirect STDERR: $!");
+ $display_name = &loggable_arg($name);
+ }
+ } else {
+ $all_tmp = &tempfile('allout_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); # make a temporary file
+ open(STDOUT, '>>&', $all_tmp) or die("Can't redirect STDOUT: $!");
+ open(STDERR, '>>&', $all_tmp) or die("Can't redirect STDERR: $!");
+ $display_name = '$all_messages';
+ }
+ # turn off buffering so output order is maintained
+ my $oldfh;
+ $oldfh = select(STDOUT);
+ $| = 1;
+ select(STDERR);
+ $| = 1;
+ select($oldfh);
+ # update command
+ $display_name = $opts{ALL_NAME} if defined($opts{ALL_NAME});
+ $cmd .= ' &'. $dir . ' ' . $display_name;
+ } else {
+ # check if we're redirecting stdout
+ if (defined($opts{OUT_FILE}) || defined($opts{OUT_VAR})) {
+ $logger->trace("invoke - redirecing stdout") if $logger;
+ # save stdout
+ open($out_old, ">&STDOUT") or die("Can't dup STDOUT: $!");
+ # redirect stdout
+ if (defined($opts{OUT_FILE})) { # send stdout to specified file
+ if (ref($opts{OUT_FILE})) { # file handle (we hope)
+ my $handle = $opts{OUT_FILE};
+ open(STDOUT, $dir.'&', $handle) or die("Can't redirect STDOUT: $!");
+ $display_name = 'output_file';
+ } else { # file name (we hope)
+ my $name = $opts{OUT_FILE};
+ open(STDOUT, $dir, $name) or die("Can't redirect STDOUT: $!");
+ $display_name = &loggable_arg($name);
+ }
+ } else { # send stdout to a temp file which we can read in to the var
+ $out_tmp = &tempfile('stdout_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); # make a temporary file
+ open(STDOUT, '>&', $out_tmp) or die("Can't redirect STDOUT: $!");
+ $display_name = '$output_messages';
+ }
+ $display_name = $opts{OUT_NAME} if defined($opts{OUT_NAME});
+ $cmd .= ' 1'. $dir . ' ' . $display_name;
+ }
+ # check if we're redirecting stderr
+ if (defined($opts{ERR_FILE}) || defined($opts{ERR_VAR})) {
+ $logger->trace("invoke - redirecing stderr") if $logger;
+ # save stderr
+ open($err_old, ">&STDERR") or die("Can't dup STDERR: $!");
+ # redirect stderr
+ if (defined($opts{ERR_FILE})) { # send stderr to specified file
+ if (ref($opts{ERR_FILE})) { # file handle (we hope)
+ my $handle = $opts{ERR_FILE};
+ open(STDERR, $dir.'&', $handle) or die("Can't redirect STDERR: $!");
+ $display_name = 'error_file';
+ } else { # file name (we hope)
+ my $name = $opts{ERR_FILE};
+ open(STDERR, $dir, $name) or die("Can't redirect STDERR: $!");
+ $display_name = &loggable_arg($name);
+ }
+ } else { # send stderr to a temp file which we can read in to the var
+ $err_tmp = &tempfile('stderr_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); # make a temporary file
+ open(STDERR, '>&', $err_tmp) or die("Can't redirect STDERR: $!");
+ $display_name = '$error_messages';
+ }
+ $display_name = $opts{ERR_NAME} if defined($opts{ERR_NAME});
+ $cmd .= ' 2' . $dir . ' ' . $display_name;
}
- open(STDERR, '>', $err_nam) or die("Can't redirect STDERR: $!");
}
+ # record the time before starting the program
+ $logger->trace("invoke - recording start time") if $logger;
+ my $t0 = [&gettimeofday()];
# run the command
+ $logger->trace("invoke - running") if $logger;
my $status = system($exe, @args);
- # reset stdin
+ # record the time after completing the program
+ $logger->trace("invoke - recording end time") if $logger;
+ my $t1 = [&gettimeofday()];
+ # check if the caller wants the elapsed time
+ if (defined($opts{TIME})) {
+ ${$opts{TIME}} = &tv_interval($t0, $t1);
+ }
+ # reset file descriptors
if (defined($in_old)) {
+ $logger->trace("invoke - reseting stdin") if $logger;
open(STDIN, "<&", $in_old) or die("Can't reset STDIN: $!");
}
- # reset stdout
if (defined($out_old)) {
+ $logger->trace("invoke - reseting stdout") if $logger;
open(STDOUT, ">&", $out_old) or die("Can't reset STDOUT: $!");
}
- # reset stderr
if (defined($err_old)) {
+ $logger->trace("invoke - reseting stderr") if $logger;
open(STDERR, ">&", $err_old) or die("Can't reset STDERR: $!");
}
- # clean up stdin temp file
- if (defined($in_tmp)) {
- close($in_tmp);
- }
- # read and close stdout temp file
- if (defined($out_tmp)) {
- ${$opts{OUT_VAR}} = do {local $/ = undef; <$out_tmp>};
- close($out_tmp);
- }
- # read and close stderr temp file
- if (defined($err_tmp)) {
- ${$opts{ERR_VAR}} = do {local $/ = undef; <$err_tmp>};
- close($err_tmp);
- }
+ # close stdin temporary file
+ close($in_tmp) if (defined($in_tmp));
+ # rewind, slurp and close temporary files
+ ${$opts{ALL_VAR}} = &rewind_slurp_close($all_tmp) if (defined($all_tmp));
+ ${$opts{OUT_VAR}} = &rewind_slurp_close($out_tmp) if (defined($out_tmp));
+ ${$opts{ERR_VAR}} = &rewind_slurp_close($err_tmp) if (defined($err_tmp));
+
if ($opts{CHECK_STATUS}) {
+ $logger->trace("invoke - checking status") if $logger;
# check status
if ($status == -1) {
die("Failed to execute command '". $cmd . "': $!");
@@ -458,9 +575,19 @@
${$opts{CMD}} = $cmd;
}
+ $logger->trace("invoke - returning") if $logger;
return $status;
}
+# utility used by invoke
+sub rewind_slurp_close {
+ my ($fh) = @_;
+ seek($fh, 0, SEEK_SET);
+ my $content = do {local $/ = undef; <$fh>};
+ close($fh);
+ return $content;
+}
+
##############################################################################
# Object Methods
##############################################################################
@@ -639,6 +766,20 @@
}
} # check_description
+#
+# I wanted to use the unlink1 used by File::Temp
+# but for some reason they don't allow it to be
+# exported. It shouldn't matter that I don't
+# get the stat compare before the delete though
+# as there shouldn't be any doubt that they
+# refer to the same file.
+#
+sub unlink1 {
+ $logger->trace("sub unlink1") if $logger;
+ my ($fh, $filename) = @_;
+ close($fh);
+ unlink($filename);
+}
#
# get sequence data
@@ -657,6 +798,7 @@
# glam2.pl
#
sub get_sequence_data {
+ $logger->trace("sub get_sequence_data") if $logger;
my $self = shift;
die("Expected Utils object") unless ref($self) eq 'MemeWebUtils';
my $PROGRAM = $self->{PROGRAM};
@@ -668,7 +810,11 @@
$shuffle, # shuffle sequences if true
$purge_score, # if nonempty the score for purge
$dust_cutoff, # if nonempty the cutoff for dust
+ $dataset_name # if nonempty the name of the dataset used in messages
) = @_;
+ $logger->debug('get_sequence_data - file: "' . $file . '" file size: ' . (-s $file)) if $logger;
+
+ $dataset_name = 'input dataset' unless defined($dataset_name);
# return values
my $fasta_data = ""; # sequence data in FASTA format
my $alphabet = "UNRECOGNIZED"; # sequence alphabet DNA/PROTEIN
@@ -688,14 +834,16 @@
my ($fasta_tmp, $fasta_nam);
# other vars
- my ($status, $error);
+ my ($status, $errors, $size, $has_problems);
# check that sequence data was provided
if (!$file && !$data) {
$self->whine(
- "You haven't entered any sequence data. ",
- "If you still wish to submit a query, please go back and enter the",
- "name of a sequence file or the actual sequences."
+ "No data was entered for the $dataset_name but $PROGRAM requires the ",
+ "$dataset_name to run. ",
+ "If you still wish to submit a query, please go back and either select ",
+ "a sequence file to upload or paste the actual sequences for the ",
+ "$dataset_name."
);
goto DONE;
}
@@ -703,128 +851,135 @@
# don't allow both datafile and data
if ($file && $data) {
$self->whine(
- "You may not enter both the name of a sequence file and sequences. ",
- "If you still wish to submit a query, please go back and erase either",
- "what you have written in the name of a file field or",
- "in the actual sequences field."
+ "Both the sequence file and actual sequences were entered for the ",
+ "$dataset_name. ",
+ "If you still wish to submit a query, please go back and either clear",
+ "the file selection or erase the content of the actual sequences field."
);
goto DONE;
}
# don't allow empty sequence files
if ($file && (-s $file) == 0) {
- $self->whine("Your sequence file is empty.");
+ $self->whine(
+ "The sequence file entered for the $dataset_name is empty ",
+ "but $PROGRAM requires at least 1 sequence to run. ",
+ "If you still wish to submit a query, please go back and select ",
+ "a non-empty sequence file to upload or paste actual sequences for ",
+ "the $dataset_name."
+ );
goto DONE;
}
- #
- # create a file containing the raw sequences
- #
-
- # slurp the uploaded file into a scalar if there was no textbox data
- $data = do {local $/; <$file>} unless ($data);
-
- # convert to UNIX EOL
- $data =~ s/\r\n/\n/g; # Windows -> UNIX eol
- $data =~ s/\r/\n/g; # MacOS -> UNIX eol
-
# print raw sequences to a file
- ($raw_tmp, $raw_nam) = tempfile(UNLINK => 1);
- print $raw_tmp $data;
-
- #
- # convert raw sequences to FASTA format using READSEQ
- #
- ($cooked_tmp, $cooked_nam) = tempfile(UNLINK => 1);
- $status = &invoke(BIN => $BIN_DIR, PROG => 'readseq', ARGS => ['-a', '-f=8', $raw_nam], OUT_FILE => $cooked_nam, ERR_VAR => \$error);
-
- # check for errors
- if ($status) {
- $self->whine(
- "An error occurred when the READSEQ program attempted to convert",
- "your dataset to FASTA format. ",
- "READSEQ returned:
$error
"
- );
- goto DONE;
+ ($raw_tmp, $raw_nam) = tempfile('raw_seqs_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1);
+ $logger->debug('get_sequence_data - $raw_tmp: ' . $raw_nam) if $logger;
+ if ($data) {
+ # convert to UNIX EOL
+ $data =~ s/\r\n/\n/g; # Windows -> UNIX eol
+ $data =~ s/\r/\n/g; # MacOS -> UNIX eol
+ # print to file
+ print $raw_tmp $data;
+ } else {
+ # copy the file changing to UNIX EOL
+ my $line;
+ while ($line = <$file>) {
+ chomp($line);
+ print $raw_tmp $line, "\n";
+ }
}
- #
- # get information on sequences
- #
- my($getsize_seqs, $getsize_cooked);
+ # run GETSIZE on the raw sequences and see if it reports any errors
+ $status = &invoke(BIN => $BIN_DIR, PROG => 'getsize', ARGS => [$raw_nam],
+ OUT_VAR => \$size, ERR_VAR => \$errors, LOGGER => $logger);
+ $logger->debug('get_sequence_data - GETSIZE on raw seqs: ' . $size) if $logger;
+
+ if ($errors || $status) {
+ # maybe the file is not FASTA format? Attempt to convert it using READSEQ
+ ($cooked_tmp, $cooked_nam) = tempfile('cooked_seqs_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1);
+ $status = &invoke(BIN => $BIN_DIR, PROG => 'readseq', ARGS =>
+ ['-a', '-f=8', $raw_nam], OUT_FILE => $cooked_nam, ERR_VAR => \$errors);
- # Run the 'getsize' program to get information on the raw sequence data.
- &invoke(BIN => $BIN_DIR, PROG => 'getsize', ARGS => [$raw_nam], OUT_VAR => \$getsize_seqs, ERR_FILE => '/dev/null');
+ # check for errors
+ if ($status) {
+ $self->whine(
+ "The sequences submitted for the $dataset_name could not be read as ",
+ "FASTA format and automatic conversion using the READSEQ program ",
+ "failed. ",
+ "READSEQ returned:
$errors
",
+ "If you still wish to submit a query, please go back and select ",
+ "a FASTA formatted sequence file to upload or paste actual ",
+ "sequences in FASTA format for the $dataset_name."
+ );
+ goto DONE;
+ }
- # Run the 'getsize' program to get information on converted data; will
- # be unchanged if it is FASTA (note -nd means do not print warnings about duplicate sequences)
- $status = &invoke(BIN => $BIN_DIR, PROG => 'getsize', ARGS => ['-nd', $cooked_nam], OUT_VAR => \$getsize_cooked, ERR_VAR => \$error);
+ #run GETSIZE on the converted sequences
+ #(note -nd means do not print warnings about duplicate sequences)
+ $status = &invoke(BIN => $BIN_DIR, PROG => 'getsize', ARGS =>
+ ['-nd', $cooked_nam], OUT_VAR => \$size,
+ ERR_VAR => \$errors);
+ $logger->debug('get_sequence_data - GETSIZE on cooked seqs: ' . $size) if $logger;
- # check for errors
- if ($error || $status) {
- $self->whine(
- "After converting to FASTA format using the READSEQ program,",
- "the following errors in your dataset were detected:
$error
",
- " Make sure all your sequences are in the same format since READSEQ",
- "assumes that all sequences are in the same format as the first sequence.",
- );
- goto DONE;
- }
-
- # choose between the original and the cooked version
- if ($getsize_seqs ne $getsize_cooked) {
- # use cooked dataset
- $getsize_seqs = $getsize_cooked;
+ # check for errors
+ if ($errors || $status) {
+ $self->whine(
+ "The sequences submitted for the $dataset_name were converted to ",
+ "FASTA format but in the process of detecting the alphabet and size ",
+ "the following errors in your dataset were detected:
$errors
",
+ " Make sure all your sequences are in the same format since READSEQ",
+ "assumes that all sequences are in the same format as the first sequence."
+ );
+ goto DONE;
+ }
+ # use converted dataset
($fasta_tmp, $fasta_nam) = ($cooked_tmp, $cooked_nam);
- close($raw_tmp);
+ &unlink1($raw_tmp, $raw_nam); # this should be done when perl exits as well
} else {
# use original dataset
($fasta_tmp, $fasta_nam) = ($raw_tmp, $raw_nam);
- close($cooked_tmp);
}
- $raw_tmp = undef;
- $raw_nam = undef;
- $cooked_tmp = undef;
- $cooked_nam = undef;
+ # just so we don't close a file twice later
+ $raw_tmp = undef; $raw_nam = undef; $cooked_tmp = undef; $cooked_nam = undef;
# extract out the sequence stats from getsize's response
my $letters;
- ($nseqs, $min, $max, $ave, $total, $letters) = split (' ', $getsize_seqs);
+ ($nseqs, $min, $max, $ave, $total, $letters) = split (' ', $size);
+
+ $has_problems = 0;
- #
- # final checks
- #
# check for problem reading the dataset
if ($nseqs <= 0) {
$self->whine(
- "Your dataset appears to be in a format that $PROGRAM does not recognize.",
- " Please check to be sure that your data is",
+ "The sequences submitted for the $dataset_name appear to be ",
+ "in a format that $PROGRAM does not recognize. ",
+ "Please check to be sure that your data is ",
"formatted properly."
);
- goto DONE;
+ $has_problems = 1;
}
# check for bad sequences
if ($nseqs > 0 && $min == 0) {
$self->whine(
- "Your dataset appears to contain one or more zero-length sequences.",
- " Please check to be sure that your data is",
+ "The sequences submitted for the $dataset_name appear to ",
+ "contain one or more zero-length sequences. ",
+ "Please check to be sure that your data is ",
" formatted properly."
);
- goto DONE;
+ $has_problems = 1;
}
# Make sure there isn't too much data.
if ($total > $maxsize) {
$self->whine(
- "The data you have entered contains more than $maxsize characters.",
- "$PROGRAM cannot process it at this time. ",
+ "The sequences submitted for the $dataset_name contain ",
+ &commify($total) . " characters but $PROGRAM can only accept ",
+ &commify($maxsize) . " characters. ",
"Please submit a smaller dataset."
);
- goto DONE;
+ $has_problems = 1;
}
- #
# prepare sequences and alphabet and do filtering if requested
- #
# calculate the alphabet
my $bad_symbols;
@@ -837,31 +992,35 @@
my @bad_lines = $self->find_bad_sequence_data($bad_symbols, $data);
$self->whine(
- "Your sequences contained the following unrecognized letters: $bad_symbols. ",
+ "The sequences submitted for the $dataset_name contained the ",
+ "following unrecognized letters: $bad_symbols. ",
"The unrecognized letters occurred in the following locations:",
@bad_lines,
" ",
"Please convert your sequences to one of the recognized sequence",
"alphabets."
);
- goto DONE;
+ $has_problems = 1;
}
+ goto DONE if $has_problems;
+
# shuffle sequences if requested
if ($shuffle) {
- ($shuffled_tmp, $shuffled_nam) = tempfile(UNLINK => 1);
+ ($shuffled_tmp, $shuffled_nam) = tempfile('shuffled_seqs_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1);
$status = &invoke(BIN => $BIN_DIR, PROG => 'fasta-shuffle-letters', ARGS => ['-tod'],
- IN_FILE => $fasta_nam, OUT_FILE => $shuffled_nam, ERR_VAR => \$error);
+ IN_FILE => $fasta_nam, OUT_FILE => $shuffled_nam, ERR_VAR => \$errors);
- if ($error || $status) {
+ if ($errors || $status) {
$self->whine(
- "After shuffling, the following errors resulted:
$error
",
+ "When shuffling the $dataset_name, the following errors ",
+ "resulted:
$errors
",
" Please check your sequences."
);
goto DONE;
}
- close($fasta_tmp);
+ &unlink1($fasta_tmp, $fasta_nam); # this should be done when perl exits as well
($fasta_tmp, $fasta_nam) = ($shuffled_tmp, $shuffled_nam);
$shuffled_tmp = undef;
$shuffled_nam = undef;
@@ -876,20 +1035,21 @@
# instead it always writes to a file called .
my $purge_out = $fasta_nam . '.' . $purge_score;
die("Purge output file already exists!") if (-e $purge_out);
- $status = &invoke(BIN => $BIN_DIR, PROG => 'purge', ARGS => \@purge_args, ERR_VAR => \$error);
+ $status = &invoke(BIN => $BIN_DIR, PROG => 'purge', ARGS => \@purge_args, ERR_VAR => \$errors);
# check for errors
- if ($error || $status) {
- $self->whine("From purge, the following errors resulted:
$error
");
+ if ($errors || $status) {
+ $self->whine("When calling purge on the $dataset_name, the following ",
+ "errors resulted: