diff -uNr meme_4.6.1/etc/fimo-to-wiggle.xsl meme_4.6.1_patch_1/etc/fimo-to-wiggle.xsl --- meme_4.6.1/etc/fimo-to-wiggle.xsl 2011-02-04 05:23:17.000000000 +1000 +++ meme_4.6.1_patch_1/etc/fimo-to-wiggle.xsl 2011-05-23 16:02:16.304185313 +1000 @@ -35,7 +35,7 @@ and the width of the item. The second line contain the starting position and the score. --> - variablestep + variableStep chrom= span= diff -uNr meme_4.6.1/etc/motif_logo.js meme_4.6.1_patch_1/etc/motif_logo.js --- meme_4.6.1/etc/motif_logo.js 2011-02-04 05:23:17.000000000 +1000 +++ meme_4.6.1_patch_1/etc/motif_logo.js 2011-05-23 17:30:58.324182960 +1000 @@ -41,6 +41,12 @@ this.freqs[pos] = (+freq); } } + } else { + //assume uniform background + var freq = 1.0 / this.letter_count; + for (var pos = 0; pos < this.letter_count; pos++) { + this.freqs[pos] = freq; + } } } @@ -210,16 +216,18 @@ //====================================================================== // start Pspm object //====================================================================== - function Pspm(pssm, name, ltrim, rtrim) { + function Pspm(pspm, name, ltrim, rtrim, nsites, evalue) { if (ltrim === undefined) ltrim = 0; if (rtrim === undefined) rtrim = 0; + if (nsites === undefined) nsites = 0; + if (evalue === undefined) evalue = 0; //variable prototype this.alph_length = 0; this.motif_length = 0; - this.pspm = new Array(); + this.pspm = null; this.name = (typeof name == "string" ? name : ""); - this.nsites = 0; - this.evalue = 0; + this.nsites = nsites; + this.evalue = evalue; this.ltrim = ltrim; this.rtrim = rtrim; //function prototype @@ -231,62 +239,86 @@ this.get_alph_length = Pspm_get_alph_length; this.get_left_trim = Pspm_get_left_trim; this.get_right_trim = Pspm_get_right_trim; + this.as_pspm = Pspm_as_pspm; + this.as_pssm = Pspm_as_pssm; this.toString = Pspm_to_string; //construct - var pspm_header = /letter-probability matrix:\s+alength=\s+(\d+)\s+w=\s+(\d+)(\s+nsites=\s+(\S+))?(\s+E=\s+(\S+))?\s*/; - var is_empty = /^\s*$/; - var lines = pssm.split(/\s*\n\s*/); - var read_pssm = false; - var line_num = 0; - var col_num = 0; - for (line_index in lines) { - //exclude inherited properties and undefined properties - if (!lines.hasOwnProperty(line_index) || lines[line_index] === undefined) continue; + if (typeof pspm == "string") { + var pspm_header = /letter-probability matrix:\s+alength=\s+(\d+)\s+w=\s+(\d+)(\s+nsites=\s+(\S+))?(\s+E=\s+(\S+))?\s*/; + var lines = pspm.split(/\n/); + var read_pspm = false; + var line_num = 0; + var col_num = 0; + this.pspm = new Array(); + for (line_index in lines) { + //exclude inherited properties and undefined properties + if (!lines.hasOwnProperty(line_index) || lines[line_index] === undefined) continue; - var line = lines[line_index]; - if (is_empty.test(line)) { - continue; - } - if (!read_pssm) { - var header_match = pspm_header.exec(line); - if (header_match != null) { - read_pssm = true; - this.alph_length = (+header_match[1]); - this.motif_length = (+header_match[2]); - if (header_match[4]) this.nsites = parseFloat(header_match[4]);//not always an integer - if (header_match[6]) this.evalue = parseFloat(header_match[6]); - this.pspm = new Array(this.motif_length); + var line = trim(lines[line_index]); + if (line == '') { + continue; + } + if (!read_pspm) { + var header_match = pspm_header.exec(line); + if (header_match != null) { + read_pspm = true; + this.alph_length = (+header_match[1]); + this.motif_length = (+header_match[2]); + if (header_match[4]) this.nsites = parseFloat(header_match[4]);//not always an integer + if (header_match[6]) this.evalue = parseFloat(header_match[6]); + this.pspm = new Array(this.motif_length); + } + continue; } - continue; + if (line_num >= this.motif_length) { + throw "TOO_MANY_ROWS"; + } + this.pspm[line_num] = new Array(this.alph_length); + col_num = 0; + var parts = line.split(/\s+/); + for (part_index in parts) { + //exclude inherited properties and undefined properties + if (!parts.hasOwnProperty(part_index) || parts[part_index] === undefined) continue; + + var prob = parts[part_index]; + if (col_num >= this.alph_length) { + throw "TOO_MANY_COLS"; + } + this.pspm[line_num][col_num] = (+prob); + //check the probability is within bounds + if (this.pspm[line_num][col_num] > 1 || this.pspm[line_num][col_num] < 0) { + throw "NUM_NOT_PROB"; + } + col_num++; + } + if (col_num != this.alph_length) { + throw "TOO_FEW_COLS"; + } + line_num++; } - if (line_num >= this.motif_length) { - throw "TOO_MANY_ROWS"; + if (line_num != this.motif_length) { + throw "TOO_FEW_ROWS"; } - this.pspm[line_num] = new Array(this.alph_length); - col_num = 0; - var parts = line.split(/\s+/); - for (part_index in parts) { - //exclude inherited properties and undefined properties - if (!parts.hasOwnProperty(part_index) || parts[part_index] === undefined) continue; - - var prob = parts[part_index]; - if (col_num >= this.alph_length) { - throw "TOO_MANY_COLS"; + } else { + // assume pspm is a nested array + this.motif_length = pspm.length; + this.alpha_length = (pspm.length > 0 ? pspm[0].length : 0); + this.pspm = new Array(this.motif_length); + // copy pspm and check + for (var row = 0; row < this.motif_length; row++) { + if (this.alpha_length != pspm[row].length) throw "COLUMN_MISMATCH"; + this.pspm[row] = new Array(this.alpha_length); + var row_sum = 0; + for (var col = 0; col < this.alpha_length; col++) { + row_sum += this.pspm[row][col]; + this.pspm[row][col] = 0 + pspm[row][col]; } - this.pspm[line_num][col_num] = (+prob); - //check the probability is within bounds - if (this.pspm[line_num][col_num] > 1 || this.pspm[line_num][col_num] < 0) { - throw "NUM_NOT_PROB"; + var delta = 0.1 + if ((row_sum > 1 && (row_sum - 1) > delta) || + (row_sum < 1 && (1 - row_sum) > delta)) { + throw "INVALID_SUM"; } - col_num++; } - if (col_num != this.alph_length) { - throw "TOO_FEW_COLS"; - } - line_num++; - } - if (line_num != this.motif_length) { - throw "TOO_FEW_ROWS"; } } @@ -413,6 +445,46 @@ return this.rtrim; } + function Pspm_as_pspm() { + var out = "letter-probability matrix: alength= " + this.alph_length + + " w= " + this.motif_length + " nsites= " + this.nsites + + " E= " + this.evalue.toExponential() + "\n"; + for (var row = 0; row < this.motif_length; row++) { + for (var col = 0; col < this.alph_length; col++) { + if (col != 0) out += " "; + out += this.pspm[row][col].toFixed(6); + } + out += "\n"; + } + return out; + } + + function Pspm_as_pssm(alphabet, pseudo) { + if (typeof pseudo != "number") pseudo = 0.1; + var out = "log-odds matrix: alength= " + this.alph_length + + " w= " + this.motif_length + + " E= " + this.evalue.toExponential() + "\n"; + var log2 = Math.log(2); + var total = this.nsites + pseudo; + for (var row = 0; row < this.motif_length; row++) { + for (var col = 0; col < this.alph_length; col++) { + if (col != 0) out += " "; + var p = this.pspm[row][col]; + // to avoid log of zero we add a pseudo count + var bg = alphabet.get_bg_freq(col); + var p2 = (p * this.nsites + bg * pseudo) / total; + // now calculate the score + var score = -10000; + if (p2 > 0) { + score = Math.round((Math.log(p2 / bg) / log2) * 100) + } + out += score; + } + out += "\n"; + } + return out; + } + function Pspm_to_string() { var str = ""; for (row_index in this.pspm) { @@ -1007,10 +1079,17 @@ cwidth = metrics.summed_width * scale; cheight = metrics.summed_height * scale; } else { - if (cwidth == 0 || cheight == 0 || scale == 0) { + if (cwidth == 0 && cheight == 0) { throw "CANVAS_MUST_HAVE_DIMENSIONS"; + } else if (cwidth == 0) { + scale = cheight / metrics.summed_height; + cwidth = metrics.summed_width * scale; + } else if (cheight == 0) { + scale = cwidth / metrics.summed_width; + cheight = metrics.summed_height * scale; + } else { + scale = Math.min(cwidth / metrics.summed_width, cheight / metrics.summed_height); } - scale = Math.min(cwidth / metrics.summed_width, cheight / metrics.summed_height); } var raster = new RasterizedAlphabet(logo.alphabet, metrics.stack_font, metrics.stack_width * scale * 2); if (cwidth != canvas.width || cheight != canvas.height) { @@ -1169,3 +1248,17 @@ element.parentNode.replaceChild(canvas, element); } + /* + * Fast string trimming implementation found at + * http://blog.stevenlevithan.com/archives/faster-trim-javascript + * + * Note that regex is good at removing leading space but + * bad at removing trailing space as it has to first go through + * the whole string. + */ + function trim (str) { + str = str.replace(/^\s\s*/, ''); + var ws = /\s/, i = str.length; + while (ws.test(str.charAt(--i))); + return str.slice(0, i + 1); + } diff -uNr meme_4.6.1/etc/tomtom-to-html.xsl meme_4.6.1_patch_1/etc/tomtom-to-html.xsl --- meme_4.6.1/etc/tomtom-to-html.xsl 2011-02-04 05:23:17.000000000 +1000 +++ meme_4.6.1_patch_1/etc/tomtom-to-html.xsl 2011-05-23 16:16:34.574184933 +1000 @@ -1,6 +1,7 @@ + @@ -44,12 +45,24 @@ span.C {font-size:20px; color:blue} span.G {font-size:20px; color:orange} span.T {font-size:20px; color:green} - .pt {padding: 0 10px} + table.targets td {padding: 0 10px;} + table.preview td {padding: 0 10px;} + table.preview tbody td {padding-bottom: 10px;} .ac {text-align: center;} .downloadTd {padding-left:20px;} div.logo_container {position:relative; width:99%; height:285px; padding:0px; margin:0px;} img.logo {position:absolute; z-index:2; max-width:100%;} tr.tspace th, tr.tspace td {padding-top: 20px;} + /* motif list link, first style */ + a.ml1 { + background-color: #FFF; + } + a.ml2 { + background-color: #FFF; + } + td.ml { + line-height: 1.8em; + } @@ -335,40 +348,40 @@ - +
- +
- + - + - + - - - + + + - + - + - + - - - +

Name 

Name 

Alt. Name 

Alt. Name 

Website 

Website 

Preview 

Matches 

List 

Preview 

Matches 

List 

linklink
+
+ + + + + ml1 + + + ml2 + + + - + @@ -393,11 +416,13 @@ - () +  ( + + ) - , + ,  @@ -434,20 +459,20 @@
- +
- - - + + + - - - + + + diff -uNr meme_4.6.1/scripts/meme2meme.pl.in meme_4.6.1_patch_1/scripts/meme2meme.pl.in --- meme_4.6.1/scripts/meme2meme.pl.in 2011-02-04 05:23:03.000000000 +1000 +++ meme_4.6.1_patch_1/scripts/meme2meme.pl.in 2011-05-23 16:09:52.274185111 +1000 @@ -490,18 +490,30 @@ while ($line = <$fp>) { if ($line =~ m/Background letter frequencies \(from(\s+[^\)]*)?/) { my $source = $1; + my $alphindex = 0; + my @freqs = (); $source =~ s/^\s*//; #trim left # read the background frequencies - do { + while ($alphindex < scalar(@alphabet)) { $line = <$fp>; - } while (defined($line) && $line =~ m/^\s*$/);# skip empty lines - die("Expected background frequencies on line following landmark \"Background letter frequencies (\" but got EOF.") unless defined($line); - my @freqs = split(/\s+/, $line); - for (my $i = 0; $i < scalar(@alphabet); $i++) { - die("Failed parsing background from \"$line\" in \"$file\", letter " . uc($freqs[$i*2]). " doesn't match expected " . $alphabet[$i] . ".\n") - if (uc($freqs[$i*2]) ne $alphabet[$i]); - my $freq = parse_double($freqs[$i*2 + 1]); - $bg{$alphabet[$i]} = $freq; + die("Encountered EOF while reading background frequencies.") + unless defined $line; + chomp($line); # remove EOL + $line =~ s/^\s*//; # trim left + $line =~ s/\s*$//; # trim right + my @line_freqs = split(/\s+/, $line); + push(@freqs, @line_freqs); + while(scalar(@freqs) > 2 and $alphindex < scalar(@alphabet)) { + my $letter = uc(shift(@freqs)); + if ($letter ne $alphabet[$alphindex]) { + die("Failed parsing background from \"$line\" in \"$file\", " . + "letter \"" . $letter. "\" doesn't match expected " . + $alphabet[$alphindex] . ".\n"); + } + my $freq = parse_double(shift(@freqs)); + $bg{$alphabet[$alphindex]} = $freq; + $alphindex++; + } } $bg{source} = $source; $bg{dna} = $is_dna; diff -uNr meme_4.6.1/scripts/meme-chip.pl.in meme_4.6.1_patch_1/scripts/meme-chip.pl.in --- meme_4.6.1/scripts/meme-chip.pl.in 2011-03-30 08:36:37.000000000 +1000 +++ meme_4.6.1_patch_1/scripts/meme-chip.pl.in 2011-05-23 16:50:42.064184029 +1000 @@ -9,7 +9,7 @@ use Fcntl qw(O_CREAT O_WRONLY O_TRUNC SEEK_SET); use File::Basename qw(fileparse); use File::Temp qw(tempfile); -use File::Spec::Functions qw(abs2rel catfile catdir); +use File::Spec::Functions qw(abs2rel catfile catdir splitdir tmpdir); use Getopt::Long; use HTML::Template; use List::Util qw(min max); @@ -34,6 +34,8 @@ -ccut : maximum size of a sequence before it is cut down to a centered section -desc : description of the job -fdesc : file containing plain text description of the job + -run-mast : run MAST - motif alignment & search tool + -run-ama : run AMA - Average motif affinity. -noecho : don't echo the commands run -tar : create a tar.gz file of the outputs -help : display this help message @@ -70,6 +72,9 @@ my $bindir = '@BINDIR@'; my $site_url = '@SITE_URL@'; my $template_file = '@APPCONFIGDIR@/meme-chip.tmpl'; +my $tmpdir = '@TMP_DIR@'; +# use the perl default if none is supplied or the replace fails +$tmpdir = &tmpdir() if ($tmpdir eq '' || $tmpdir =~ m/^\@TMP[_]DIR\@$/); # Required Argument my $sequences; @@ -78,6 +83,8 @@ my $help = 0; # FALSE my $echo = 1; # TRUE my $tar = 0; # FALSE +my $run_mast = 0; # FALSE +my $run_ama = 0; # FALSE my $appverbosity = 1; #all the applications write to stderr currently so had to disable this my $clobber = 1; # TRUE my $outdir = 'memechip_out'; @@ -101,7 +108,6 @@ # Derived Globals my $outfile; -my $tarfile; my $stderr_txt; my $stdout_txt; my @tomtom_dbnames = (); @@ -120,14 +126,16 @@ eval { # redirect stderr to a temp file as we want to log it my $olderr; - my ($tmperr, $tnmerr) = tempfile(UNLINK => 1); - open $olderr, ">&STDERR" or die("Can't dup STDERR: $!"); - open STDERR, '>', $tnmerr or die("Can't dredirect STDERR to temp file: $!"); + my $tmperr = tempfile('GetOptions_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); + open($olderr, ">&STDERR") or die("Can't dup STDERR: $!"); + open(STDERR, '>&', $tmperr) or die("Can't redirect STDERR to temp file: $!"); # parse options $options_success = GetOptions( 'help|?' => \$help, 'noecho' => sub {$echo = 0}, 'tar' => \$tar, + 'run-mast' => \$run_mast, + 'run-ama' => \$run_ama, 'o=s' => sub {$clobber = 0; shift; $outdir = shift}, 'oc=s' => \$outdir, 'desc=s' => \$desc, @@ -153,7 +161,7 @@ 'meme-pal' => \$meme_palindromes, ); # reset STDERR - open STDERR, ">&", $olderr, or die("Can't reset STDERR: $!"); + open(STDERR, ">&", $olderr) or die("Can't reset STDERR: $!"); # slurp errors seek($tmperr, SEEK_SET, 0); chomp($options_err = do {local ($/); <$tmperr>}); @@ -256,7 +264,6 @@ } # calculate some file names $outfile = catfile($outdir, 'index.html'); - $tarfile = catfile($outdir, 'meme-chip.tar.gz'); $stdout_txt = catfile($outdir,'stdout.txt'); $stderr_txt = catfile($outdir, 'stderr.txt'); }; @@ -271,8 +278,7 @@ &main(); &write_html("", 0); # html to go in tar if ($tar) { - &tar_output(); - my $tarname = fileparse($tarfile); + my $tarname = &tar_output(); &write_html("", 0, undef, $tarname); # index which links to tar } }; @@ -340,8 +346,18 @@ $i--; } } + # we want to tar including the outdir to avoid a tarbomb + # find the real name of the outdir (user could have passed '.') + my $folder = (splitdir(abs_path($outdir)))[-1]; + my $folder_dir = abs_path(catdir($outdir, '..')); + # append the folder to the outfiles + for (my $i = 0; $i < scalar(@outfiles); $i++) { + $outfiles[$i] = catfile($folder, $outfiles[$i]); + } + my $tarname = $folder . ".tar.gz"; + my $tarfile = catfile($outdir, $tarname); # run tar - system('tar', '-czf', $tarfile, '-C', $outdir, @outfiles); + system('tar', '-czf', $tarfile, '-C', $folder_dir, @outfiles); my $errmsg = ''; if ($? == -1) { $errmsg = "Failed to execute tar: $!"; @@ -352,6 +368,7 @@ $errmsg = sprintf("tar exited with value %d indicating failure.", $? >> 8); } die($errmsg) if $errmsg; + return $tarname; } ################################################################################ @@ -470,28 +487,32 @@ ################################################################## # Run MAST on MEME motifs ################################################################## - my $mast_prog = "mast"; - my $mast_outdir = "meme_mast_out"; - my @mast_args = ('-oc', catdir($outdir, $mast_outdir), $meme_motifs, $sequences, '-ev', $num_centered_sequences); - push(@mast_args, '-bfile', $bfile) if defined($bfile); - push(@mast_args,'-nostatus') unless $appverbosity >= 2; - my $mast_outputs = ["mast.html", "mast.txt", "mast.xml"]; - my $mast_comment = 'Predicted locations of all MEME motifs (p < 0.0001) in the input sequences.'; - &report_and_run(\@report_commands, \@report_outputs, - $level, $bindir, $mast_prog, \@mast_args, $mast_outputs, $mast_outdir, $mast_comment); + if ($run_mast) { + my $mast_prog = "mast"; + my $mast_outdir = "meme_mast_out"; + my @mast_args = ('-oc', catdir($outdir, $mast_outdir), $meme_motifs, $sequences, '-ev', $num_centered_sequences); + push(@mast_args, '-bfile', $bfile) if defined($bfile); + push(@mast_args,'-nostatus') unless $appverbosity >= 2; + my $mast_outputs = ["mast.html", "mast.txt", "mast.xml"]; + my $mast_comment = 'Predicted locations of all MEME motifs (p < 0.0001) in the input sequences.'; + &report_and_run(\@report_commands, \@report_outputs, + $level, $bindir, $mast_prog, \@mast_args, $mast_outputs, $mast_outdir, $mast_comment); + } ################################################################## # Run AMA on MEME motifs ################################################################## - my $ama_prog = "ama"; - my $ama_outdir = "meme_ama_out"; - my @ama_args = ('--verbosity', $appverbosity, '--oc', catdir($outdir, $ama_outdir)); - push(@ama_args, '--sdbg', 0) unless defined($bfile); - push(@ama_args, $meme_motifs, $sequences); - push(@ama_args, $bfile) if defined($bfile); - my $ama_outputs = ["", "ama.txt", "ama.xml"]; - my $ama_comment = 'Estimated binding affinity of each MEME motif to each input sequence.'; - &report_and_run(\@report_commands, \@report_outputs, - $level, $bindir, $ama_prog, \@ama_args, $ama_outputs, $ama_outdir, $ama_comment); + if ($run_ama) { + my $ama_prog = "ama"; + my $ama_outdir = "meme_ama_out"; + my @ama_args = ('--verbosity', $appverbosity, '--oc', catdir($outdir, $ama_outdir)); + push(@ama_args, '--sdbg', 0) unless defined($bfile); + push(@ama_args, $meme_motifs, $sequences); + push(@ama_args, $bfile) if defined($bfile); + my $ama_outputs = ["", "ama.txt", "ama.xml"]; + my $ama_comment = 'Estimated binding affinity of each MEME motif to each input sequence.'; + &report_and_run(\@report_commands, \@report_outputs, + $level, $bindir, $ama_prog, \@ama_args, $ama_outputs, $ama_outdir, $ama_comment); + } $level--; } @@ -532,28 +553,32 @@ ################################################################## # Run MAST on DREME motifs ################################################################## - my $mast_prog = "mast"; - my $mast_outdir = "dreme_mast_out"; - my @mast_args = ('-oc', catdir($outdir, $mast_outdir), $dreme_motifs, $sequences, '-ev', $num_centered_sequences); - push(@mast_args, '-bfile', $bfile) if defined($bfile); - push(@mast_args, '-nostatus') unless $appverbosity >= 2; - my $mast_outputs = ["mast.html", "mast.txt", "mast.xml"]; - my $mast_comment = 'Predicted locations of all DREME motifs (p < 0.0001) in the input sequences.'; - &report_and_run(\@report_commands, \@report_outputs, - $level, $bindir, $mast_prog, \@mast_args, $mast_outputs, $mast_outdir, $mast_comment); + if ($run_mast) { + my $mast_prog = "mast"; + my $mast_outdir = "dreme_mast_out"; + my @mast_args = ('-oc', catdir($outdir, $mast_outdir), $dreme_motifs, $sequences, '-ev', $num_centered_sequences); + push(@mast_args, '-bfile', $bfile) if defined($bfile); + push(@mast_args, '-nostatus') unless $appverbosity >= 2; + my $mast_outputs = ["mast.html", "mast.txt", "mast.xml"]; + my $mast_comment = 'Predicted locations of all DREME motifs (p < 0.0001) in the input sequences.'; + &report_and_run(\@report_commands, \@report_outputs, + $level, $bindir, $mast_prog, \@mast_args, $mast_outputs, $mast_outdir, $mast_comment); + } ################################################################## # Run AMA on DREME motifs ################################################################## - my $ama_prog = "ama"; - my $ama_outdir = "dreme_ama_out"; - my @ama_args = ('--verbosity', $appverbosity, '--oc', catdir($outdir, $ama_outdir)); - push(@ama_args, '--sdbg', 0) unless defined($bfile); - push(@ama_args, $dreme_motifs, $sequences); - push(@ama_args, $bfile) if defined($bfile); - my $ama_outputs = ["", "ama.txt", "ama.xml"]; - my $ama_comment = 'Estimated binding affinity of each DREME motif to each input sequence.'; - &report_and_run(\@report_commands, \@report_outputs, - $level, $bindir, $ama_prog, \@ama_args, $ama_outputs, $ama_outdir, $ama_comment); + if ($run_ama) { + my $ama_prog = "ama"; + my $ama_outdir = "dreme_ama_out"; + my @ama_args = ('--verbosity', $appverbosity, '--oc', catdir($outdir, $ama_outdir)); + push(@ama_args, '--sdbg', 0) unless defined($bfile); + push(@ama_args, $dreme_motifs, $sequences); + push(@ama_args, $bfile) if defined($bfile); + my $ama_outputs = ["", "ama.txt", "ama.xml"]; + my $ama_comment = 'Estimated binding affinity of each DREME motif to each input sequence.'; + &report_and_run(\@report_commands, \@report_outputs, + $level, $bindir, $ama_prog, \@ama_args, $ama_outputs, $ama_outdir, $ama_comment); + } $level--; } @@ -589,40 +614,37 @@ my ($filename) = @_; # redirect stdout to a temp file my $oldout; - my ($tmpout, $tnmout) = tempfile(UNLINK => 1); - open $oldout, ">&STDOUT" or die("Can't dup STDOUT: $!"); - open STDOUT, '>', $tnmout or die("Can't redirect STDOUT to temp file: $!"); + my $tmpout = tempfile('getsize_stdout_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); + open($oldout, ">&STDOUT") or die("Can't dup STDOUT: $!"); + open(STDOUT, '>&', $tmpout) or die("Can't redirect STDOUT to temp file: $!"); # redirect stderr to a temp file my $olderr; - my ($tmperr, $tnmerr) = tempfile(UNLINK => 1); - open $olderr, ">&STDERR" or die("Can't dup STDERR: $!"); - open STDERR, '>', $tnmerr or die("Can't dredirect STDERR to temp file: $!"); + my $tmperr = tempfile('getsize_stderr_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); + open($olderr, ">&STDERR") or die("Can't dup STDERR: $!"); + open(STDERR, '>&', $tmperr) or die("Can't dredirect STDERR to temp file: $!"); # run command system($bindir.'/getsize', $filename); # copy status my $status = $?; # reset stderr - open STDERR, ">&", $olderr, or die("Can't reset STDERR: $!"); + open(STDERR, ">&", $olderr) or die("Can't reset STDERR: $!"); # reset stdout - open STDOUT, ">&", $oldout or die("Can't reset STDOUT: $!"); + open(STDOUT, ">&", $oldout) or die("Can't reset STDOUT: $!"); # slurp output and errors - my $lineterm = $/; - undef $/; - seek($tmpout, SEEK_SET, 0); - my $output = <$tmpout>; + seek($tmpout, 0, SEEK_SET); + my $output = do {local $/ = undef; <$tmpout>}; close($tmpout); - seek($tmperr, SEEK_SET, 0); - my $errors = <$tmperr>; + seek($tmperr, 0, SEEK_SET); + my $errors = do {local $/ = undef; <$tmperr>}; close($tmperr); - $/ = $lineterm; #log errors if ($errors) { - my $msg = "'getsize $filename': ".$errors; + my $msg = "'getsize $filename': " . $errors; $logger->error($msg) if ($logger); - print STDERR $msg, "\n"; + print(STDERR $msg, "\n"); } # check status diff -uNr meme_4.6.1/scripts/meme-chip_webservice.pl.in meme_4.6.1_patch_1/scripts/meme-chip_webservice.pl.in --- meme_4.6.1/scripts/meme-chip_webservice.pl.in 2011-03-30 08:35:54.000000000 +1000 +++ meme_4.6.1_patch_1/scripts/meme-chip_webservice.pl.in 2011-05-23 16:37:38.394184374 +1000 @@ -23,6 +23,8 @@ meme-chip_webservice [options] Options: + -run-mast : run MAST - motif alignment & search tool + -run-ama : run AMA - average motif affinity -bfile : background file MEME Specific Options: @@ -45,6 +47,8 @@ my $log_file = 'memechip-log'; # Options +my $run_mast = 0; # FALSE +my $run_ama = 0; #FALSE my $bfile = undef; my $meme_mod = undef; my $meme_minw = undef; @@ -73,6 +77,8 @@ eval { GetOptions( + 'run-mast' => \$run_mast, + 'run-ama' => \$run_ama, 'bfile=s' => \$bfile, 'meme-mod=s' => \$meme_mod, 'meme-minw=i' => \$meme_minw, @@ -122,6 +128,8 @@ my $exe = catfile($bin_dir, 'meme-chip'); # prepare meme-chip arguments my @args = ('-noecho', '-tar', '-oc', '.', '-meme-time', $MAXTIME); +push(@args, '-run-mast') if $run_mast; +push(@args, '-run-ama') if $run_ama; push(@args, '-fdesc', 'description') if (-e 'description'); push(@args, '-bfile', $bfile) if $bfile; foreach my $db (@motif_dbs) { diff -uNr meme_4.6.1/scripts/MemeWebUtils.pm.in meme_4.6.1_patch_1/scripts/MemeWebUtils.pm.in --- meme_4.6.1/scripts/MemeWebUtils.pm.in 2011-03-10 08:31:41.000000000 +1000 +++ meme_4.6.1_patch_1/scripts/MemeWebUtils.pm.in 2011-05-23 16:50:42.064184029 +1000 @@ -13,22 +13,37 @@ valid_address valid_meme_version add_status_msg update_status loggable_args loggable_date write_invocation_log invoke); use Cwd; -use Fcntl qw(O_APPEND O_CREAT O_WRONLY SEEK_SET); +use Fcntl qw(O_APPEND O_CREAT O_WRONLY O_TRUNC SEEK_SET); use File::Basename qw(fileparse); use File::Copy qw(copy); -use File::Spec::Functions qw(catfile splitdir); +use File::Spec::Functions qw(catfile splitdir tmpdir); use File::Temp qw(tempfile); use HTTP::Request::Common qw(GET); use XML::Simple; use HTML::PullParser; use HTML::Template; use Sys::Hostname; +use Time::HiRes qw(gettimeofday tv_interval); use lib qw(@PERLLIBDIR@); use CatList qw(load_categories load_entry); +# Setup logging +my $logger = undef; +eval { + require Log::Log4perl; + Log::Log4perl->import(); +}; +unless ($@) { + Log::Log4perl::init('@APPCONFIGDIR@/logging.conf'); + $logger = Log::Log4perl->get_logger('meme.cgi.utils'); +} + my $template_dir = '@WEB_DIR@/cgi-bin'; my $service_invocation_log_dir = '@MEMELOGS@'; +my $tmpdir = '@TMP_DIR@'; +# use the perl default if none is supplied or the replace fails +$tmpdir = &tmpdir() if ($tmpdir eq '' || $tmpdir =~ m/^\@TMP[_]DIR\@$/); ############################################################################## # Functions ############################################################################## @@ -98,6 +113,17 @@ sub is_numeric { defined &getnum($_[0]) } # +# Output large integers with commas +# +# +sub commify { + my $input = shift; + $input = reverse $input; + $input =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; + return reverse $input; +} + +# # get the alphabet of a string: DNA or PROTEIN # # Used in @@ -227,8 +253,8 @@ push(@found_files, $entry) if (defined($file) && -e $file && -s $file); } - my $fh; # I'm suspicious that the 0777 may be too permissive (but that's what we had before) - sysopen($fh, $output_file, O_CREAT | O_WRONLY, 0777) or die("Failed to open \"$output_file\"."); + my $fh; + sysopen($fh, $output_file, O_CREAT | O_WRONLY | O_TRUNC) or die("Failed to open \"$output_file\"."); my $template = HTML::Template->new(filename => "$template_dir/job_status.tmpl"); $template->param(program => $program, refresh => $refresh, files => \@found_files, msgs => $msg_list, status => $status); print $fh $template->output; @@ -342,21 +368,36 @@ # PROG => program name # BIN => program directory # ARGS => reference to array of program arguments -# IN_FILE => file to set as stdin +# IN_FILE => file name or handle to set as stdin # IN_VAR => variable (or reference to variable) to feed in as stdin -# OUT_FILE => file to store stdout +# IN_NAME => the displayed name for the source of stdin +# ALL_FILE => file name or handle to store stdout and stderr +# ALL_VAR => reference to variable to store stdout and stderr +# ALL_NAME => the displayed name for the destination of output +# OUT_FILE => file name or handle to store stdout # OUT_VAR => reference to variable to store stdout -# ERR_FILE => file to store stderr +# OUT_NAME => the displayed name for the destination of stdout +# ERR_FILE => file name or handle to store stderr # ERR_VAR => reference to variable to store stderr +# ERR_NAME => the displayed name for the destination of stderr # CHECK_STATUS => true to die on bad status codes +# TRUNCATE => true to truncate output files if they exist # CMD => reference to store a human readable form of the command run +# TIME => reference to store the running time in seconds (floating point) +# TMPDIR => directory to create temporary files # sub invoke { my %opts = @_; + my $logger = $opts{LOGGER}; + $logger->trace("sub invoke") if $logger; + # output truncates? + my $dir = ($opts{TRUNCATE} ? '>' : '>>'); #direction + # temp file directory + my $tmpdir = ($opts{TMPDIR} ? $opts{TMPDIR} : &tmpdir()); # get program my $prog = $opts{PROG}; - my $exe = (defined($opts{BIN}) ? catfile($opts{BIN}, $prog) : $prog); - die("No program passed to _invoke") unless defined($prog); + die("No program passed to invoke") unless defined($prog); + my $exe = (defined($opts{BIN}) ? &catfile($opts{BIN}, $prog) : $prog); # get args my $args_ref = $opts{ARGS}; my @args = (); @@ -364,85 +405,161 @@ @args = @{$args_ref}; } # make command line for printing - my $cmd = loggable_args($prog, @args); + my $cmd = &loggable_args($prog, @args); + # do redirection + my $display_name; # check if we're redirecting stdin my ($in_old, $in_tmp, $in_nam); if (defined($opts{IN_FILE}) || defined($opts{IN_VAR})) { - #save stdin + $logger->trace("invoke - redirecing stdin") if $logger; + # save stdin open($in_old, "<&STDIN") or die("Can't dup STDIN: $!"); + # redirect stdin if (defined($opts{IN_FILE})) { # read stdin from specified file - $in_nam = $opts{IN_FILE}; - $cmd .= ' < '.loggable_arg($in_nam); + if (ref($opts{IN_FILE})) { # file handle (we hope) + my $handle = $opts{IN_FILE}; + open(STDIN, '<&', $handle) or die("Can't redirect STDIN: $!"); + $display_name = 'input_file'; + } else { # file name (we hope) + my $name = $opts{IN_FILE}; + open(STDIN, '<', $name) or die("Can't redirect STDIN: $!"); + $display_name = &loggable_arg($name); + } } else { # read stdin from a temp file which we preload with the var - # make a temporary file - ($in_tmp, $in_nam) = tempfile(UNLINK => 1); - # write the variable to the file - my $var = $opts{IN_VAR}; + $in_tmp = &tempfile('stdin_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); # make a temporary file + my $var = $opts{IN_VAR}; # variable could be passed as a ref or scalar my $var_ref = (ref($var) ? $var : \$var); - print $in_tmp ${$var_ref}; - $cmd .= ' < $input'; + print $in_tmp ${$var_ref}; # write variable to file + seek($in_tmp, 0, SEEK_SET); # rewind file + open(STDIN, '<&', $in_tmp) or die("Can't redirect STDIN: $!"); + $display_name = '$input'; } - open(STDIN, '<', $in_nam) or die("Can't redirect STDIN: $!"); + $display_name = $opts{IN_NAME} if defined($opts{IN_NAME}); + $cmd .= ' < ' . $display_name; } - # check if we're redirecting stdout - my ($out_old, $out_tmp, $out_nam); - if (defined($opts{OUT_FILE}) || defined($opts{OUT_VAR})) { - #save stdout + # check for output redirection + my ($out_old, $err_old); + my ($all_tmp, $out_tmp, $err_tmp); + if (defined($opts{ALL_FILE}) || defined($opts{ALL_VAR})) { + $logger->trace("invoke - redirecing output") if $logger; + # save stdout and stderr open($out_old, ">&STDOUT") or die("Can't dup STDOUT: $!"); - if (defined($opts{OUT_FILE})) { # send stdout to specified file - $out_nam = $opts{OUT_FILE}; - $cmd .= ' 1> '.loggable_arg($out_nam); - } else { # send stdout to a temp file which we can read in to the var - # make a temporary file - ($out_tmp, $out_nam) = tempfile(UNLINK => 1); - $cmd .= ' 1> $output'; - } - open(STDOUT, '>', $out_nam) or die("Can't redirect STDOUT: $!"); - } - # check if we're redirecting stderr - my ($err_old, $err_tmp, $err_nam); - if (defined($opts{ERR_FILE}) || defined($opts{ERR_VAR})) { - #save stderr open($err_old, ">&STDERR") or die("Can't dup STDERR: $!"); - if (defined($opts{ERR_FILE})) { # send stderr to specified file - $err_nam = $opts{ERR_FILE}; - $cmd .= ' 2> '.loggable_arg($err_nam); - } else { # send stderr to a temp file which we can read in to the var - # make a temporary file - ($err_tmp, $err_nam) = tempfile(UNLINK => 1); - $cmd .= ' 2> $error'; + # redirect stdout and stderr + if (defined($opts{ALL_FILE})) { # send output to specified file + truncate($opts{ALL_FILE}, 0) if ($opts{TRUNCATE}); + if (ref($opts{ALL_FILE})) { # file handle (we hope) + my $handle = $opts{ALL_FILE}; + open(STDOUT, '>>&', $handle) or die("Can't redirect STDOUT: $!"); + open(STDERR, '>>&', $handle) or die("Can't redirect STDERR: $!"); + $display_name = 'output_file'; + } else { # file name (we hope) + my $name = $opts{ALL_FILE}; + open(STDOUT, '>>', $name) or die("Can't redirect STDOUT: $!"); + open(STDERR, '>>', $name) or die("Can't redirect STDERR: $!"); + $display_name = &loggable_arg($name); + } + } else { + $all_tmp = &tempfile('allout_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); # make a temporary file + open(STDOUT, '>>&', $all_tmp) or die("Can't redirect STDOUT: $!"); + open(STDERR, '>>&', $all_tmp) or die("Can't redirect STDERR: $!"); + $display_name = '$all_messages'; + } + # turn off buffering so output order is maintained + my $oldfh; + $oldfh = select(STDOUT); + $| = 1; + select(STDERR); + $| = 1; + select($oldfh); + # update command + $display_name = $opts{ALL_NAME} if defined($opts{ALL_NAME}); + $cmd .= ' &'. $dir . ' ' . $display_name; + } else { + # check if we're redirecting stdout + if (defined($opts{OUT_FILE}) || defined($opts{OUT_VAR})) { + $logger->trace("invoke - redirecing stdout") if $logger; + # save stdout + open($out_old, ">&STDOUT") or die("Can't dup STDOUT: $!"); + # redirect stdout + if (defined($opts{OUT_FILE})) { # send stdout to specified file + if (ref($opts{OUT_FILE})) { # file handle (we hope) + my $handle = $opts{OUT_FILE}; + open(STDOUT, $dir.'&', $handle) or die("Can't redirect STDOUT: $!"); + $display_name = 'output_file'; + } else { # file name (we hope) + my $name = $opts{OUT_FILE}; + open(STDOUT, $dir, $name) or die("Can't redirect STDOUT: $!"); + $display_name = &loggable_arg($name); + } + } else { # send stdout to a temp file which we can read in to the var + $out_tmp = &tempfile('stdout_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); # make a temporary file + open(STDOUT, '>&', $out_tmp) or die("Can't redirect STDOUT: $!"); + $display_name = '$output_messages'; + } + $display_name = $opts{OUT_NAME} if defined($opts{OUT_NAME}); + $cmd .= ' 1'. $dir . ' ' . $display_name; + } + # check if we're redirecting stderr + if (defined($opts{ERR_FILE}) || defined($opts{ERR_VAR})) { + $logger->trace("invoke - redirecing stderr") if $logger; + # save stderr + open($err_old, ">&STDERR") or die("Can't dup STDERR: $!"); + # redirect stderr + if (defined($opts{ERR_FILE})) { # send stderr to specified file + if (ref($opts{ERR_FILE})) { # file handle (we hope) + my $handle = $opts{ERR_FILE}; + open(STDERR, $dir.'&', $handle) or die("Can't redirect STDERR: $!"); + $display_name = 'error_file'; + } else { # file name (we hope) + my $name = $opts{ERR_FILE}; + open(STDERR, $dir, $name) or die("Can't redirect STDERR: $!"); + $display_name = &loggable_arg($name); + } + } else { # send stderr to a temp file which we can read in to the var + $err_tmp = &tempfile('stderr_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); # make a temporary file + open(STDERR, '>&', $err_tmp) or die("Can't redirect STDERR: $!"); + $display_name = '$error_messages'; + } + $display_name = $opts{ERR_NAME} if defined($opts{ERR_NAME}); + $cmd .= ' 2' . $dir . ' ' . $display_name; } - open(STDERR, '>', $err_nam) or die("Can't redirect STDERR: $!"); } + # record the time before starting the program + $logger->trace("invoke - recording start time") if $logger; + my $t0 = [&gettimeofday()]; # run the command + $logger->trace("invoke - running") if $logger; my $status = system($exe, @args); - # reset stdin + # record the time after completing the program + $logger->trace("invoke - recording end time") if $logger; + my $t1 = [&gettimeofday()]; + # check if the caller wants the elapsed time + if (defined($opts{TIME})) { + ${$opts{TIME}} = &tv_interval($t0, $t1); + } + # reset file descriptors if (defined($in_old)) { + $logger->trace("invoke - reseting stdin") if $logger; open(STDIN, "<&", $in_old) or die("Can't reset STDIN: $!"); } - # reset stdout if (defined($out_old)) { + $logger->trace("invoke - reseting stdout") if $logger; open(STDOUT, ">&", $out_old) or die("Can't reset STDOUT: $!"); } - # reset stderr if (defined($err_old)) { + $logger->trace("invoke - reseting stderr") if $logger; open(STDERR, ">&", $err_old) or die("Can't reset STDERR: $!"); } - # clean up stdin temp file - if (defined($in_tmp)) { - close($in_tmp); - } - # read and close stdout temp file - if (defined($out_tmp)) { - ${$opts{OUT_VAR}} = do {local $/ = undef; <$out_tmp>}; - close($out_tmp); - } - # read and close stderr temp file - if (defined($err_tmp)) { - ${$opts{ERR_VAR}} = do {local $/ = undef; <$err_tmp>}; - close($err_tmp); - } + # close stdin temporary file + close($in_tmp) if (defined($in_tmp)); + # rewind, slurp and close temporary files + ${$opts{ALL_VAR}} = &rewind_slurp_close($all_tmp) if (defined($all_tmp)); + ${$opts{OUT_VAR}} = &rewind_slurp_close($out_tmp) if (defined($out_tmp)); + ${$opts{ERR_VAR}} = &rewind_slurp_close($err_tmp) if (defined($err_tmp)); + if ($opts{CHECK_STATUS}) { + $logger->trace("invoke - checking status") if $logger; # check status if ($status == -1) { die("Failed to execute command '". $cmd . "': $!"); @@ -458,9 +575,19 @@ ${$opts{CMD}} = $cmd; } + $logger->trace("invoke - returning") if $logger; return $status; } +# utility used by invoke +sub rewind_slurp_close { + my ($fh) = @_; + seek($fh, 0, SEEK_SET); + my $content = do {local $/ = undef; <$fh>}; + close($fh); + return $content; +} + ############################################################################## # Object Methods ############################################################################## @@ -639,6 +766,20 @@ } } # check_description +# +# I wanted to use the unlink1 used by File::Temp +# but for some reason they don't allow it to be +# exported. It shouldn't matter that I don't +# get the stat compare before the delete though +# as there shouldn't be any doubt that they +# refer to the same file. +# +sub unlink1 { + $logger->trace("sub unlink1") if $logger; + my ($fh, $filename) = @_; + close($fh); + unlink($filename); +} # # get sequence data @@ -657,6 +798,7 @@ # glam2.pl # sub get_sequence_data { + $logger->trace("sub get_sequence_data") if $logger; my $self = shift; die("Expected Utils object") unless ref($self) eq 'MemeWebUtils'; my $PROGRAM = $self->{PROGRAM}; @@ -668,7 +810,11 @@ $shuffle, # shuffle sequences if true $purge_score, # if nonempty the score for purge $dust_cutoff, # if nonempty the cutoff for dust + $dataset_name # if nonempty the name of the dataset used in messages ) = @_; + $logger->debug('get_sequence_data - file: "' . $file . '" file size: ' . (-s $file)) if $logger; + + $dataset_name = 'input dataset' unless defined($dataset_name); # return values my $fasta_data = ""; # sequence data in FASTA format my $alphabet = "UNRECOGNIZED"; # sequence alphabet DNA/PROTEIN @@ -688,14 +834,16 @@ my ($fasta_tmp, $fasta_nam); # other vars - my ($status, $error); + my ($status, $errors, $size, $has_problems); # check that sequence data was provided if (!$file && !$data) { $self->whine( - "You haven't entered any sequence data.
", - "If you still wish to submit a query, please go back and enter the", - "name of a sequence file or the actual sequences." + "No data was entered for the $dataset_name but $PROGRAM requires the ", + "$dataset_name to run.
", + "If you still wish to submit a query, please go back and either select ", + "a sequence file to upload or paste the actual sequences for the ", + "$dataset_name." ); goto DONE; } @@ -703,128 +851,135 @@ # don't allow both datafile and data if ($file && $data) { $self->whine( - "You may not enter both the name of a sequence file and sequences.
", - "If you still wish to submit a query, please go back and erase either", - "what you have written in the name of a file field or", - "in the actual sequences field." + "Both the sequence file and actual sequences were entered for the ", + "$dataset_name.
", + "If you still wish to submit a query, please go back and either clear", + "the file selection or erase the content of the actual sequences field." ); goto DONE; } # don't allow empty sequence files if ($file && (-s $file) == 0) { - $self->whine("Your sequence file is empty."); + $self->whine( + "The sequence file entered for the $dataset_name is empty ", + "but $PROGRAM requires at least 1 sequence to run.
", + "If you still wish to submit a query, please go back and select ", + "a non-empty sequence file to upload or paste actual sequences for ", + "the $dataset_name." + ); goto DONE; } - # - # create a file containing the raw sequences - # - - # slurp the uploaded file into a scalar if there was no textbox data - $data = do {local $/; <$file>} unless ($data); - - # convert to UNIX EOL - $data =~ s/\r\n/\n/g; # Windows -> UNIX eol - $data =~ s/\r/\n/g; # MacOS -> UNIX eol - # print raw sequences to a file - ($raw_tmp, $raw_nam) = tempfile(UNLINK => 1); - print $raw_tmp $data; - - # - # convert raw sequences to FASTA format using READSEQ - # - ($cooked_tmp, $cooked_nam) = tempfile(UNLINK => 1); - $status = &invoke(BIN => $BIN_DIR, PROG => 'readseq', ARGS => ['-a', '-f=8', $raw_nam], OUT_FILE => $cooked_nam, ERR_VAR => \$error); - - # check for errors - if ($status) { - $self->whine( - "An error occurred when the READSEQ program attempted to convert", - "your dataset to FASTA format.
", - "READSEQ returned:
$error
" - ); - goto DONE; + ($raw_tmp, $raw_nam) = tempfile('raw_seqs_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); + $logger->debug('get_sequence_data - $raw_tmp: ' . $raw_nam) if $logger; + if ($data) { + # convert to UNIX EOL + $data =~ s/\r\n/\n/g; # Windows -> UNIX eol + $data =~ s/\r/\n/g; # MacOS -> UNIX eol + # print to file + print $raw_tmp $data; + } else { + # copy the file changing to UNIX EOL + my $line; + while ($line = <$file>) { + chomp($line); + print $raw_tmp $line, "\n"; + } } - # - # get information on sequences - # - my($getsize_seqs, $getsize_cooked); + # run GETSIZE on the raw sequences and see if it reports any errors + $status = &invoke(BIN => $BIN_DIR, PROG => 'getsize', ARGS => [$raw_nam], + OUT_VAR => \$size, ERR_VAR => \$errors, LOGGER => $logger); + $logger->debug('get_sequence_data - GETSIZE on raw seqs: ' . $size) if $logger; + + if ($errors || $status) { + # maybe the file is not FASTA format? Attempt to convert it using READSEQ + ($cooked_tmp, $cooked_nam) = tempfile('cooked_seqs_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); + $status = &invoke(BIN => $BIN_DIR, PROG => 'readseq', ARGS => + ['-a', '-f=8', $raw_nam], OUT_FILE => $cooked_nam, ERR_VAR => \$errors); - # Run the 'getsize' program to get information on the raw sequence data. - &invoke(BIN => $BIN_DIR, PROG => 'getsize', ARGS => [$raw_nam], OUT_VAR => \$getsize_seqs, ERR_FILE => '/dev/null'); + # check for errors + if ($status) { + $self->whine( + "The sequences submitted for the $dataset_name could not be read as ", + "FASTA format and automatic conversion using the READSEQ program ", + "failed.
", + "READSEQ returned:
$errors

", + "If you still wish to submit a query, please go back and select ", + "a FASTA formatted sequence file to upload or paste actual ", + "sequences in FASTA format for the $dataset_name." + ); + goto DONE; + } - # Run the 'getsize' program to get information on converted data; will - # be unchanged if it is FASTA (note -nd means do not print warnings about duplicate sequences) - $status = &invoke(BIN => $BIN_DIR, PROG => 'getsize', ARGS => ['-nd', $cooked_nam], OUT_VAR => \$getsize_cooked, ERR_VAR => \$error); + #run GETSIZE on the converted sequences + #(note -nd means do not print warnings about duplicate sequences) + $status = &invoke(BIN => $BIN_DIR, PROG => 'getsize', ARGS => + ['-nd', $cooked_nam], OUT_VAR => \$size, + ERR_VAR => \$errors); + $logger->debug('get_sequence_data - GETSIZE on cooked seqs: ' . $size) if $logger; - # check for errors - if ($error || $status) { - $self->whine( - "After converting to FASTA format using the READSEQ program,", - "the following errors in your dataset were detected:
$error
", - "
Make sure all your sequences are in the same format since READSEQ", - "assumes that all sequences are in the same format as the first sequence.", - ); - goto DONE; - } - - # choose between the original and the cooked version - if ($getsize_seqs ne $getsize_cooked) { - # use cooked dataset - $getsize_seqs = $getsize_cooked; + # check for errors + if ($errors || $status) { + $self->whine( + "The sequences submitted for the $dataset_name were converted to ", + "FASTA format but in the process of detecting the alphabet and size ", + "the following errors in your dataset were detected:
$errors
", + "
Make sure all your sequences are in the same format since READSEQ", + "assumes that all sequences are in the same format as the first sequence." + ); + goto DONE; + } + # use converted dataset ($fasta_tmp, $fasta_nam) = ($cooked_tmp, $cooked_nam); - close($raw_tmp); + &unlink1($raw_tmp, $raw_nam); # this should be done when perl exits as well } else { # use original dataset ($fasta_tmp, $fasta_nam) = ($raw_tmp, $raw_nam); - close($cooked_tmp); } - $raw_tmp = undef; - $raw_nam = undef; - $cooked_tmp = undef; - $cooked_nam = undef; + # just so we don't close a file twice later + $raw_tmp = undef; $raw_nam = undef; $cooked_tmp = undef; $cooked_nam = undef; # extract out the sequence stats from getsize's response my $letters; - ($nseqs, $min, $max, $ave, $total, $letters) = split (' ', $getsize_seqs); + ($nseqs, $min, $max, $ave, $total, $letters) = split (' ', $size); + + $has_problems = 0; - # - # final checks - # # check for problem reading the dataset if ($nseqs <= 0) { $self->whine( - "Your dataset appears to be in a format that $PROGRAM does not recognize.", - "
Please check to be sure that your data is", + "The sequences submitted for the $dataset_name appear to be ", + "in a format that $PROGRAM does not recognize.
", + "Please check to be sure that your data is ", "formatted properly." ); - goto DONE; + $has_problems = 1; } # check for bad sequences if ($nseqs > 0 && $min == 0) { $self->whine( - "Your dataset appears to contain one or more zero-length sequences.", - "
Please check to be sure that your data is", + "The sequences submitted for the $dataset_name appear to ", + "contain one or more zero-length sequences.
", + "Please check to be sure that your data is ", " formatted properly." ); - goto DONE; + $has_problems = 1; } # Make sure there isn't too much data. if ($total > $maxsize) { $self->whine( - "The data you have entered contains more than $maxsize characters.", - "$PROGRAM cannot process it at this time.
", + "The sequences submitted for the $dataset_name contain ", + &commify($total) . " characters but $PROGRAM can only accept ", + &commify($maxsize) . " characters.
", "Please submit a smaller dataset." ); - goto DONE; + $has_problems = 1; } - # # prepare sequences and alphabet and do filtering if requested - # # calculate the alphabet my $bad_symbols; @@ -837,31 +992,35 @@ my @bad_lines = $self->find_bad_sequence_data($bad_symbols, $data); $self->whine( - "Your sequences contained the following unrecognized letters: $bad_symbols.
", + "The sequences submitted for the $dataset_name contained the ", + "following unrecognized letters: $bad_symbols.
", "The unrecognized letters occurred in the following locations:", @bad_lines, "
", "Please convert your sequences to one of the recognized sequence", "alphabets." ); - goto DONE; + $has_problems = 1; } + goto DONE if $has_problems; + # shuffle sequences if requested if ($shuffle) { - ($shuffled_tmp, $shuffled_nam) = tempfile(UNLINK => 1); + ($shuffled_tmp, $shuffled_nam) = tempfile('shuffled_seqs_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); $status = &invoke(BIN => $BIN_DIR, PROG => 'fasta-shuffle-letters', ARGS => ['-tod'], - IN_FILE => $fasta_nam, OUT_FILE => $shuffled_nam, ERR_VAR => \$error); + IN_FILE => $fasta_nam, OUT_FILE => $shuffled_nam, ERR_VAR => \$errors); - if ($error || $status) { + if ($errors || $status) { $self->whine( - "After shuffling, the following errors resulted:
$error
", + "When shuffling the $dataset_name, the following errors ", + "resulted:
$errors
", "
Please check your sequences." ); goto DONE; } - close($fasta_tmp); + &unlink1($fasta_tmp, $fasta_nam); # this should be done when perl exits as well ($fasta_tmp, $fasta_nam) = ($shuffled_tmp, $shuffled_nam); $shuffled_tmp = undef; $shuffled_nam = undef; @@ -876,20 +1035,21 @@ # instead it always writes to a file called . my $purge_out = $fasta_nam . '.' . $purge_score; die("Purge output file already exists!") if (-e $purge_out); - $status = &invoke(BIN => $BIN_DIR, PROG => 'purge', ARGS => \@purge_args, ERR_VAR => \$error); + $status = &invoke(BIN => $BIN_DIR, PROG => 'purge', ARGS => \@purge_args, ERR_VAR => \$errors); # check for errors - if ($error || $status) { - $self->whine("From purge, the following errors resulted:
$error
"); + if ($errors || $status) { + $self->whine("When calling purge on the $dataset_name, the following ", + "errors resulted:
$errors
"); unlink($purge_out) if (-e $purge_out); goto DONE; } # make things neater by copying the file created by purge # to a temporary file and deleteing the original - ($purged_tmp, $purged_nam) = tempfile(UNLINK => 1); - copy($purge_out, $purged_tmp); + ($purged_tmp, $purged_nam) = tempfile('purged_seqs_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); + ©($purge_out, $purged_tmp); unlink($purge_out); - close($fasta_tmp); + &unlink1($fasta_tmp, $fasta_nam); # this should be done when perl exits as well ($fasta_tmp, $fasta_nam) = ($purged_tmp, $purged_nam); $purged_tmp = undef; $purged_nam = undef; @@ -898,20 +1058,23 @@ # dust: removes low information content regions if ($dust_cutoff) { unless ($alphabet eq "DNA") { - $self->whine ("dust is only good for DNA"); + $self->whine ("Could not call dust on the $dataset_name, ", + "dust is only good for DNA"); goto DONE; } # make a temp output file - ($dusted_tmp, $dusted_nam) = tempfile(UNLINK => 1); + ($dusted_tmp, $dusted_nam) = tempfile('dusted_seqs_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); # run dust - $status = &invoke(BIN => $BIN_DIR, PROG => 'dust', ARGS => [$fasta_nam, $dust_cutoff], OUT_FILE => $dusted_nam, ERR_VAR => \$error); + $status = &invoke(BIN => $BIN_DIR, PROG => 'dust', ARGS => + [$fasta_nam, $dust_cutoff], OUT_FILE => $dusted_nam, ERR_VAR => \$errors); # check for errors - if ($error || $status) { - $self->whine("From dust, the following errors resulted:
$error
"); + if ($errors || $status) { + $self->whine("When calling dust on the $dataset_name, the following ", + "errors resulted:
$errors
"); goto DONE; } - close($fasta_tmp); + &unlink1($fasta_tmp, $fasta_nam); # this should be done when perl exits as well ($fasta_tmp, $fasta_nam) = ($dusted_tmp, $dusted_nam); $dusted_tmp = undef; $dusted_nam = undef; @@ -922,18 +1085,16 @@ DONE: # clean up - close($raw_tmp) if $raw_tmp; - close($cooked_tmp) if $cooked_tmp; - close($shuffled_tmp) if $shuffled_tmp; - close($purged_tmp) if $purged_tmp; - close($dusted_tmp) if $dusted_tmp; - close($fasta_tmp) if $fasta_tmp; + &unlink1($raw_tmp, $raw_nam) if $raw_tmp; + &unlink1($cooked_tmp, $cooked_nam) if $cooked_tmp; + &unlink1($shuffled_tmp, $shuffled_nam) if $shuffled_tmp; + &unlink1($purged_tmp, $purged_nam) if $purged_tmp; + &unlink1($dusted_tmp, $dusted_nam) if $dusted_tmp; + &unlink1($fasta_tmp, $fasta_nam) if $fasta_tmp; + # return sequences and attributes return($fasta_data, $alphabet, $nseqs, $min, $max, $ave, $total); } # get sequence data - - - # # get fasta data # @@ -1005,14 +1166,15 @@ $data =~ s/\r/\n/g; # MacOS -> UNIX eol # write the data to a temp file - my ($seq_tmp, $seq_nam) = tempfile(UNLINK => 1); + my ($seq_tmp, $seq_nam) = tempfile('raw_seqs_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); print $seq_tmp $data; # Run the 'getsize' program to get information on the raw sequence data. - $status = &invoke(BIN => $BIN_DIR, PROG => 'getsize', ARGS => [$seq_nam], OUT_VAR => \$getsize_out, ERR_VAR => \$getsize_err); + $status = &invoke(BIN => $BIN_DIR, PROG => 'getsize', ARGS => [$seq_nam], + OUT_VAR => \$getsize_out, ERR_VAR => \$getsize_err); - # close the temp file - close($seq_tmp); + # close and remove the temp file + &unlink1($seq_tmp, $seq_nam); # handle errors if ($getsize_err || $status) { diff -uNr meme_4.6.1/scripts/MotifUtils.pm meme_4.6.1_patch_1/scripts/MotifUtils.pm --- meme_4.6.1/scripts/MotifUtils.pm 2011-02-04 05:23:03.000000000 +1000 +++ meme_4.6.1_patch_1/scripts/MotifUtils.pm 2011-05-23 16:09:52.274185111 +1000 @@ -425,6 +425,9 @@ my @iupac = ($bg{dna} ? @dna_iupac : @protein_iupac); my %bits = ($bg{dna} ? %dna_bits : %protein_bits); my $alpha_ic = ($bg{dna} ? $dna_ic : $protein_ic); + # for protein allowing ambiguous characters tends to result + # in a IUPAC motif of all Xs which is not very informative. + my $disable_ambiguous = not $bg{dna}; #calculate iupac code ic's assuming same sites and background as motif @@ -435,6 +438,7 @@ my $ic = $alpha_ic; my %probs; my %fractions; + next if $disable_ambiguous and &bits_set($set) > 1; for (my $r = 0; $r < scalar(@residues); $r++) { my $residue = $residues[$r]; my $p = (($set & 1<<$r ? $counts : 0) + $bg{$residue} * $pseudo) / ($sites + $pseudo); diff -uNr meme_4.6.1/scripts/tamo2meme.pl.in meme_4.6.1_patch_1/scripts/tamo2meme.pl.in --- meme_4.6.1/scripts/tamo2meme.pl.in 2011-02-04 05:23:03.000000000 +1000 +++ meme_4.6.1_patch_1/scripts/tamo2meme.pl.in 2011-05-23 16:48:50.154184078 +1000 @@ -69,14 +69,20 @@ my $num_skipped = 0; my $line_number = 0; my $id; +my %non_unique_ids = (); my $line; my %motifs; while ($line = <$tamo_fp>) { chomp($line); $line_number++; next if ($line =~ /^#/ || $line =~ /^\s*$/);# skip comment, blank lines - if ($line =~ m/^Source:\s+(\S+)\b$/) { + if ($line =~ m/^Source:\s+(\S+)\b/) { $id = $1; + if (defined $non_unique_ids{$id}) { + $id .= "_" . ++$non_unique_ids{$id}; + } else { + $non_unique_ids{$id} = 0; + } } elsif ($line =~ /^Motif\b/) { # read the motif sites my $sites = ''; diff -uNr meme_4.6.1/src/mast2txt.c meme_4.6.1_patch_1/src/mast2txt.c --- meme_4.6.1/src/mast2txt.c 2011-02-04 05:22:59.000000000 +1000 +++ meme_4.6.1_patch_1/src/mast2txt.c 2011-05-23 16:02:08.324185316 +1000 @@ -1700,7 +1700,7 @@ MULTI_T multi_type = {.count = 2, .options = opts_type, .outputs = outs_type, .target = &(db->is_dna)}; char *names[8] = {"id", "last_mod_date", "name", "num", "residue_count", "seq_count", "source", "type"}; - int (*parsers[8])(char*, void*) = {ld_str, ld_str, ld_str, ld_int, ld_int, ld_int, ld_str, ld_multi}; + int (*parsers[8])(char*, void*) = {ld_str, ld_str, ld_str, ld_int, ld_long, ld_int, ld_str, ld_multi}; void *data[8] = {&(db->id), &(db->last_mod_date), &(db->name), &(db->num), &(db->residue_count), &(db->seq_count), &(db->source), &multi_type}; BOOLEAN_T required[8] = {TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE}; parse_attributes(ps, "database", attrs, 8, names, parsers, data, required); diff -uNr meme_4.6.1/src/mast.c meme_4.6.1_patch_1/src/mast.c --- meme_4.6.1/src/mast.c 2011-02-04 05:23:01.000000000 +1000 +++ meme_4.6.1_patch_1/src/mast.c 2011-05-23 16:45:25.774184167 +1000 @@ -599,7 +599,7 @@ int i, j, imotif, sample_index; int est_seq; // estimated number of sequences long sample_len; // length of sample - int file_pos; + long file_pos; // needs to be long for files larger than 4Gb char *sample_name, *sample_seq, *sample_comment; double *best_score = NULL; /* best score per motif for positive strand */ diff -uNr meme_4.6.1/src/readseq.c meme_4.6.1_patch_1/src/readseq.c --- meme_4.6.1/src/readseq.c 2011-02-04 05:23:00.000000000 +1000 +++ meme_4.6.1_patch_1/src/readseq.c 2011-05-23 16:50:42.044184029 +1000 @@ -461,7 +461,7 @@ manyout = false, dolower = false, doupper = false, doreverse= false, askout = true, dopipe= false, interleaved = false; short nfile = 0, iwhichlist=0, nwhichlist = 0; -short whichlist[kMaxwhichlist+1]; +long whichlist[kMaxwhichlist+1]; /* changed short to long - James J */ long whichSeq = 0, outform = kNoformat; char onamestore[128], *oname = onamestore; FILE *foo = NULL; @@ -489,7 +489,7 @@ int readopt( char *sopt) { char sparamstore[256], *sparam= sparamstore; - short n=0; + long n=0; /* changed from short to long - James J (UQ) */ /* fprintf(stderr,"readopt( %s) == ", sopt); */ @@ -576,7 +576,7 @@ do { while (*cp!=0 && !isdigit((int) *cp)) cp++; if (*cp!=0) { - n = atoi( cp); + n = atol( cp); /* changed from atoi to atol - James J (UQ) */ whichlist[nwhichlist++]= n; while (*cp!=0 && isdigit((int) *cp)) cp++; } @@ -718,9 +718,9 @@ #endif { boolean closein = false; -short ifile, nseq, atseq, format, err = 0, seqtype = kDNA, +short ifile, format, err = 0, seqtype = kDNA, nlines=0, seqout = 0, phylvers = 2; -long i, skiplines, seqlen, seqlen0=0; +long atseq, nseq, i, skiplines, seqlen, seqlen0=0; /* changed atseq and nseq from short to long - James J (UQ) */ unsigned long checksum= 0, checkall= 0; char *seq=NULL, *cp=NULL, *firstseq = NULL, *seqlist=NULL, *progname; diff -uNr meme_4.6.1/src/ureadseq.c meme_4.6.1_patch_1/src/ureadseq.c --- meme_4.6.1/src/ureadseq.c 2011-02-04 05:23:00.000000000 +1000 +++ meme_4.6.1_patch_1/src/ureadseq.c 2011-05-23 16:50:42.044184029 +1000 @@ -112,9 +112,9 @@ /* Local variables for readSeq: */ struct ReadSeqVars { - short choice, err, nseq; + short err; + long choice, nseq, topnseq; /* changed from short to long - James J (UQ) */ long seqlen, maxseq, seqlencount; - short topnseq; long topseqlen; const char *fname; char *seq, *seqid, matchchar; @@ -210,7 +210,7 @@ boolean saveadd; while (*s == ' ') s++; - sprintf(si, " %d) %s\n", V->nseq, s); + sprintf(si, " %ld) %s\n", V->nseq, s); saveadd = V->addit; V->addit = true; @@ -867,7 +867,7 @@ /* sscanf( V->s, "%d%d", &V->topnseq, &V->topseqlen); << topnseq == 0 !!! bad scan !! */ si= V->s; skipwhitespace(si); - V->topnseq= atoi(si); + V->topnseq= atol(si); /* changed from atoi to atol - James J (UQ) */ while (isdigit((int)*si)) si++; skipwhitespace(si); V->topseqlen= atol(si); @@ -923,7 +923,7 @@ /* sscanf( V->s, "%d%d", &V->topnseq, &V->topseqlen); < ? bad sscan ? */ si= V->s; skipwhitespace(si); - V->topnseq= atoi(si); + V->topnseq= atol(si); /* changed from atoi to atol - James J (UQ) */ while (isdigit((int)*si)) si++; skipwhitespace(si); V->topseqlen= atol(si); @@ -989,7 +989,7 @@ tolowerstr( V->s); if (strstr( V->s, "matrix")) done= true; if (strstr( V->s, "interleav")) interleaved= true; - if (NULL != (cp=strstr( V->s, "ntax=")) ) V->topnseq= atoi(cp+5); + if (NULL != (cp=strstr( V->s, "ntax=")) ) V->topnseq= atol(cp+5); /* changed from atoi to atol - James J (UQ) */ if (NULL != (cp=strstr( V->s, "nchar=")) ) V->topseqlen= atoi(cp+6); if (NULL != (cp=strstr( V->s, "matchchar=")) ) { cp += 10; @@ -1037,12 +1037,12 @@ char *readSeqFp( - const short whichEntry_, /* index to sequence in file */ + const long whichEntry_, /* index to sequence in file, changed from short to long - James J (UQ) */ FILE *fp_, /* pointer to open seq file */ const long skiplines_, const short format_, /* sequence file format */ long *seqlen_, /* return seq size */ - short *nseq_, /* number of seqs in file, for listSeqs() */ + long *nseq_, /* number of seqs in file, for listSeqs(), changed from short to long - James J (UQ) */ short *error_, /* return error */ char *seqid_) /* return seq name/info */ { @@ -1085,12 +1085,12 @@ } char *readSeq( - const short whichEntry_, /* index to sequence in file */ + const long whichEntry_, /* index to sequence in file, changed from short to long - James J (UQ) */ const char *filename_, /* file name */ const long skiplines_, const short format_, /* sequence file format */ long *seqlen_, /* return seq size */ - short *nseq_, /* number of seqs in file, for listSeqs() */ + long *nseq_, /* number of seqs in file, for listSeqs(), changed from short to long - James J (UQ) */ short *error_, /* return error */ char *seqid_) /* return seq name/info */ { @@ -1139,7 +1139,7 @@ const char *filename_, /* file name */ const long skiplines_, const short format_, /* sequence file format */ - short *nseq_, /* number of seqs in file, for listSeqs() */ + long *nseq_, /* number of seqs in file, for listSeqs(), changed from short to long - James J (UQ) */ short *error_) /* return error */ { char seqid[MAXLINE]; diff -uNr meme_4.6.1/src/ureadseq.h meme_4.6.1_patch_1/src/ureadseq.h --- meme_4.6.1/src/ureadseq.h 2011-02-04 05:23:00.000000000 +1000 +++ meme_4.6.1_patch_1/src/ureadseq.h 2011-05-23 16:50:42.044184029 +1000 @@ -130,16 +130,19 @@ extern short seqFileFormat(const char *filename, long *skiplines, short *error ); extern short seqFileFormatFp(FILE *fseq, long *skiplines, short *error ); +/* changed nseq from short to long - James J (UQ) */ extern char *listSeqs(const char *filename, const long skiplines, - const short format, short *nseq, short *error ); + const short format, long *nseq, short *error ); -extern char *readSeq(const short whichEntry, const char *filename, +/* changed whichEntry and nseq from short to long - James J (UQ) */ +extern char *readSeq(const long whichEntry, const char *filename, const long skiplines, const short format, - long *seqlen, short *nseq, short *error, char *seqid ); + long *seqlen, long *nseq, short *error, char *seqid ); -extern char *readSeqFp(const short whichEntry_, FILE *fp_, +/* changed whichEntry_ and nseq_ from short to long - James J (UQ) */ +extern char *readSeqFp(const long whichEntry_, FILE *fp_, const long skiplines_, const short format_, - long *seqlen_, short *nseq_, short *error_, char *seqid_ ); + long *seqlen_, long *nseq_, short *error_, char *seqid_ ); extern short writeSeq(FILE *outf, const char *seq, const long seqlen, const short outform, const char *seqid ); @@ -156,13 +159,14 @@ extern char *compressSeq( const char gapc, const char *seq, const long seqlen, long *newlen); #ifdef NCBI - +/* changed nseq from short to long - James J (UQ) */ extern char *listASNSeqs(const char *filename, const long skiplines, - const short format, short *nseq, short *error ); + const short format, long *nseq, short *error ); -extern char *readASNSeq(const short whichEntry, const char *filename, +/* changed whichEntry and nseq from short to long - James J (UQ) */ +extern char *readASNSeq(const long whichEntry, const char *filename, const long skiplines, const short format, - long *seqlen, short *nseq, short *error, char **seqid ); + long *seqlen, long *nseq, short *error, char **seqid ); #endif diff -uNr meme_4.6.1/website/cgi-bin/meme-chip.pl meme_4.6.1_patch_1/website/cgi-bin/meme-chip.pl --- meme_4.6.1/website/cgi-bin/meme-chip.pl 2011-03-10 08:31:41.000000000 +1000 +++ meme_4.6.1_patch_1/website/cgi-bin/meme-chip.pl 2011-05-23 16:37:38.394184374 +1000 @@ -60,7 +60,7 @@ #params my ($action, $address, $description, $datafile_name, $data, $dist, $nmotifs, $minsites, $maxsites, $minw, $maxw, - $upload_bfile_name, $pal, $posonly); + $upload_bfile_name, $pal, $posonly, $run_mast, $run_ama); #my @bfile; # Currently unused, was to be fields of a select list # Information about the sequences derived from the fasta file @@ -217,6 +217,14 @@ $doc = "../meme-input.html#pal"; $text = "Look for palindromes only"; $options .= $utils->make_checkbox("pal", "1", $text, 0); + $options .= "
\n"; + $doc = "../mast-intro.html"; + $text = "Run MAST"; + $options .= $utils->make_checkbox("run_mast", "1", $text, 0); + $options .= "
\n"; + $doc = "../doc/ama.html"; + $text = "Run AMA"; + $options .= $utils->make_checkbox("run_ama", "1", $text, 0); $opt_right .= $options; } # dna options @@ -262,6 +270,8 @@ #$evt = param('evt'); #what is evt? There is no field called evt in the actual page. I think this is some legacy thing that has hung around! $pal = param('pal'); $posonly = param('posonly'); + $run_mast = param('run_mast'); + $run_ama = param('run_ama'); } # get_params # @@ -354,6 +364,8 @@ $args .= '-meme-norevcomp ' if $posonly; $args .= '-meme-pal ' if $pal; $args .= "-bfile $upload_bfile_name " if $upload_bfile_name; + $args .= '-run-mast ' if $run_mast; + $args .= '-run-ama ' if $run_ama; $args .= $datafile_name . ' ' . join(' ', @databases); } # check_params diff -uNr meme_4.6.1/website/cgi-bin/meme.pl meme_4.6.1_patch_1/website/cgi-bin/meme.pl --- meme_4.6.1/website/cgi-bin/meme.pl 2011-02-04 05:23:10.000000000 +1000 +++ meme_4.6.1_patch_1/website/cgi-bin/meme.pl 2011-05-23 16:50:42.064184029 +1000 @@ -15,6 +15,8 @@ use lib qw(@PERLLIBDIR@); use CGI qw(:standard); +use CGI::Carp qw(fatalsToBrowser); +use Data::Dumper; use MIME::Base64; use SOAP::Lite; @@ -23,6 +25,18 @@ use OpalTypes; use MemeWebUtils; +# Setup logging +my $logger = undef; +eval { + require Log::Log4perl; + Log::Log4perl->import(); +}; +unless ($@) { + Log::Log4perl::init('@APPCONFIGDIR@/logging.conf'); + $logger = Log::Log4perl->get_logger('meme.cgi.meme'); +} +$logger->trace("Starting MEME CGI") if $logger; + our $upload_negfile_name; #why is this decared as 'our'? # get the directories using the new installation scheme @@ -71,6 +85,7 @@ # print the form # sub print_form { + $logger->trace("sub print_form") if $logger; my $action = "meme.cgi"; my $logo = "../images/meme.png"; @@ -266,6 +281,7 @@ # get parameters from the input # sub get_params { + $logger->trace("sub get_params") if $logger; # command options $options = ""; @@ -310,6 +326,7 @@ # Check the parameters on the form. # sub check_params { + $logger->trace("sub check_params") if $logger; # change working directory to LOGS chdir($logs) || $utils->whine("Can't cd to $logs"); @@ -348,7 +365,12 @@ $utils->whine("Any number of repetitions not allowed ". "for discriminative motif discovery") if($dist eq "tcm"); ($neg_data, $neg_alphabet, $neg_num, $neg_min, $neg_max, $neg_ave, $neg_total) - = $utils->get_sequence_data(undef, $upload_negfile_name, $MAXDATASET*4); + = $utils->get_sequence_data(undef, $upload_negfile_name, + $MAXDATASET*4, 0, undef, undef, "negative dataset"); + $logger->trace("neg_num: " . $neg_num . + " neg_min: " . $neg_min . " neg_max: " . $neg_max . + " neg_ave: " . $neg_ave . " neg_total: " . $neg_total) if $logger; + $utils->whine ("negative sequence alphabet ($neg_alphabet) should be same". " as postive sequence alphabet ($alphabet)") unless lc($neg_alphabet) eq lc($alphabet); @@ -422,6 +444,7 @@ # check that number of sites is OK # sub check_nsites { + $logger->trace("sub check_nsites") if $logger; my ( $minsites, # minimum nsites $maxsites, # maximum nsites @@ -455,6 +478,7 @@ # check that width is OK # sub check_width { + $logger->trace("sub check_width") if $logger; if ($minw < $MINW) { $utils->whine("The minimum width you specified ($minw) is too small.
") @@ -473,6 +497,7 @@ # make the verification message in HTML # sub make_verification { + $logger->trace("sub make_verification") if $logger; my $content = "
    \n"; @@ -538,6 +563,7 @@ # sub submit_to_opal { + $logger->trace("sub submit_to_opal") if $logger; my $service = OpalServices->new(service_url => $service_url); # diff -uNr meme_4.6.1/website/cgi-bin/meme_request.pl meme_4.6.1_patch_1/website/cgi-bin/meme_request.pl --- meme_4.6.1/website/cgi-bin/meme_request.pl 2011-02-04 05:23:10.000000000 +1000 +++ meme_4.6.1_patch_1/website/cgi-bin/meme_request.pl 2011-05-23 16:50:42.064184029 +1000 @@ -2,138 +2,323 @@ use strict; use lib qw(@PERLLIBDIR@); -use CGI qw/:standard/; +use CGI qw(:standard); +use CGI::Carp qw(fatalsToBrowser); +use Data::Dumper; +use Fcntl qw(SEEK_SET); +use File::Spec::Functions qw(catfile tmpdir); +use File::Temp qw(tempfile); use HTTP::Request::Common qw(POST); use LWP::UserAgent; -use File::Temp qw/ tempfile /; -my ($blocks_url) = '@BLOCKS_URL@'; -my ($fimo_url) = '@SITE_URL@/cgi-bin/fimo.cgi'; -my ($gomo_url) = '@SITE_URL@/cgi-bin/gomo.cgi'; -my ($mast_url) = '@SITE_URL@/cgi-bin/mast.cgi'; -my ($tomtom_url) = '@SITE_URL@/cgi-bin/tomtom.cgi'; - - -my $query = CGI->new(); - -# discover what the user wants to do -my ($param_name, $program, $motif); -foreach $param_name ($query->param()) { - if ($param_name =~ m/^do_(MAST|FIMO|GOMO|BLOCKS|TOMTOM|LOGO)_(all|\d+)$/) { - $program = $1; - $motif = $2; - last; - } +use MotifUtils qw(intern_to_meme parse_double); + +# Setup logging +my $logger = undef; +eval { + require Log::Log4perl; + Log::Log4perl->import(); +}; +unless ($@) { + Log::Log4perl::init('@APPCONFIGDIR@/logging.conf'); + $logger = Log::Log4perl->get_logger('meme.cgi.meme_request'); } +$logger->trace("Starting meme_request CGI") if $logger; + +# globals +my $tmpdir = '@TMP_DIR@'; +$tmpdir = &tmpdir() if ($tmpdir eq '' || $tmpdir =~ m/^\@TMP[_]DIR\@$/); + +&process_request(); +exit(0); + +sub process_request { + my $blocks_url = '@BLOCKS_URL@'; + my $fimo_url = '@SITE_URL@/cgi-bin/fimo.cgi'; + my $gomo_url = '@SITE_URL@/cgi-bin/gomo.cgi'; + my $mast_url = '@SITE_URL@/cgi-bin/mast.cgi'; + my $tomtom_url = '@SITE_URL@/cgi-bin/tomtom.cgi'; + my $spamo_url = '@SITE_URL@/cgi-bin/spamo.cgi'; + + + my $q = CGI->new(); + + # discover what the user wants to do + my ($program, $motif); + + # optionally get the program and motif from the parameters with those names + $program = $q->param('program'); + $motif = $q->param('motif'); + + # otherwise look for the name of the submit button used (for MEME form) + foreach my $param_name ($q->param()) { + if ($param_name =~ m/^do_(MAST|FIMO|GOMO|BLOCKS|TOMTOM|LOGO)_(all|\d+)$/) { + $program = $1; + $motif = $2; + last; + } + } -# get the range of motifs -my ($nmotifs, $start, $end); -$nmotifs = int($query->param("nmotifs")); -if ($motif eq "all") { - $start = 1; - $end = $nmotifs; -} elsif ($motif) { - $start = int($motif); - $end = $start; -} - -# generate content -my ($content, $content_len); -if ($nmotifs > 0) { - if ($motif && $program) { - if ($program eq "MAST") { - $content = search($mast_url, $start, $end); - } elsif ($program eq "FIMO") { - $content = search($fimo_url, $start, $end); - } elsif ($program eq "GOMO") { - $content = search($gomo_url, $start, $end); - } elsif ($program eq "BLOCKS") { - $content = submit_block($blocks_url, $start, $end); - } elsif ($program eq "TOMTOM") { - $content = search($tomtom_url, $start, $end); - } elsif ($program eq "LOGO") { - #this produces an image which requires different headers so it calls exit and does not return - generate_logo($start); - } elsif ($program) { - $content = error_page("Unknown program", "The program \"$program\" can't be handled by this script."); + # generate content + my ($content, $content_len); + if (&nmotifs($q) > 0) { + if ($motif && $program) { + if ($program eq "MAST") { + $content = search($q, $mast_url, $motif, 0); + } elsif ($program eq "FIMO") { + $content = search($q, $fimo_url, $motif, 0); + } elsif ($program eq "GOMO") { + $content = search($q, $gomo_url, $motif, 0); + } elsif ($program eq "TOMTOM") { + $content = search($q, $tomtom_url, $motif, 0); + } elsif ($program eq "SPAMO") { + $content = search($q, $spamo_url, $motif, 1); + } elsif ($program eq "BLOCKS") { + $content = submit_block($q, $blocks_url, $motif); + } elsif ($program eq "LOGO") { + # this produces an image which requires different headers + # it does not return unless it need to display an error page + $content = generate_logo($q, $motif); + } elsif ($program) { + $content = error_page($q, "Unknown program", "The program \"$program\" can't be handled by this script."); + } + } else { + $content = error_page($q, "No action?", "The script couldn't find a parameter of the form \"do_[Program]_[Motif#|all]\" and so can't perform an action."); } } else { - $content = error_page("No action?", "The script couldn't find a parameter of the form \"do_[Program]_[Motif#|all]\" and so can't perform an action."); + $content = error_page($q, "No motifs?", "The number of motifs parameter resolved to a non-positive number. Maybe it wasn't specified?"); } -} else { - $content = error_page("No motifs?", "The number of motifs parameter resolved to a non-positive number. Maybe it wasn't specified?"); + $content_len = length($content); + + # output content + print "Content-type: text/html", "\n"; + print "Content-length: $content_len", "\n\n"; + print $content; + } -$content_len = length($content); -# output content -print "Content-type: text/html", "\n"; -print "Content-length: $content_len", "\n\n"; -print $content; -exit(0); +sub nmotifs { + my ($q) = @_; + return int($q->param('nmotifs')); +} + +sub parse_bg { + $logger->trace("call parse_bg") if $logger; + my ($alphabet, $bgsrc, $bgfreq) = @_; + + my %bg = (); + + $bg{dna} = $alphabet eq "ACGT"; + + $bg{source} = (defined $bgsrc ? $bgsrc : "unknown source"); + + my @bglines = split(/\n/, $bgfreq); + my @bgleft = (); + my $totalbg = 0; + foreach my $bgline (@bglines) { + next if $bgline =~ m/^\s*$/; + $bgline =~ s/^\s+//; + $bgline =~ s/\s+$//; + my @bgparts = split(/\s+/, $bgline); + push(@bgleft, @bgparts); + while (scalar(@bgleft) >= 2) { + my $a = shift(@bgleft); + my $f = 0 + shift(@bgleft); + $bg{$a} = $f; + $totalbg += $f; + } + } + $logger->debug("parse_bg returns " . Dumper(\%bg)) if $logger; + return \%bg; +} + +sub parse_strands { + my ($strands_txt, $dna) = @_; + my $strands = 0; + if ($dna) { + if ($strands_txt =~ m/\+\s*-/ or $strands_txt =~ m/-\s*\+/) { + $strands = 2; + } else { + $strands = 1; + } + } + return $strands; +} -sub get_motif { - my ($start, $stop) = @_; - my ($motif, $i); - - $motif = $query->param('version') . "\n\nALPHABET= " . $query->param('alphabet') . - "\n\nstrands: + - \n\nBackground letter frequencies (from dataset with add-one prior applied):\n" . - $query->param('bgfreq') . "\n"; - for ($i = $start; $i <= $stop; $i++) { - $motif .= "\nMOTIF ".$i." m" . $i . "\n"; - $motif .= $query->param('motifblock'.$i); - $motif .= $query->param('pssm'.$i); - $motif .= "\n"; - $motif .= $query->param('pspm'.$i); +sub parse_pspm { + $logger->trace("call parse_pspm") if $logger; + my ($motif, $alphabet, $pspm) = @_; + my @letters = split(//, $alphabet); + my %pspm_matrix = (); + foreach my $letter (@letters) { + $pspm_matrix{$letter} = []; + } + $motif->{pspm} = \%pspm_matrix; + + my @pspm_lines = split(/\n/, $pspm); + + for (my $linei = 0; $linei < scalar(@pspm_lines); $linei++) { + my $line = $pspm_lines[$linei]; + if ($line =~ m/^letter-probability matrix:\s+(.*)$/) { + $line = $1; + $line =~ s/\s+$//; #trim right + my %motif_params = split(/\s+/, $line); + #check that we have the parameters we require + die("Pspm is missing required parameter(s).\n") unless ( + defined($motif_params{'alength='}) && defined($motif_params{'w='}) && + defined($motif_params{'nsites='}) && defined($motif_params{'E='}) + ); + $motif->{width} = $motif_params{'w='}; + $motif->{sites} = $motif_params{'nsites='}; + $motif->{evalue} = $motif_params{'E='}; + if (scalar(@pspm_lines) < ($linei + $motif->{width})) { + die("Pspm is missing required rows\n"); + } + for (my $row = 0; $row < $motif->{width}; $row++) { + $line = $pspm_lines[$linei + $row + 1]; + $line =~ s/^\s+//; # trim left + $line =~ s/\s+$//; # trim right + my @probs = split(/\s+/, $line); + die("Pspm has incorrect row $row.\n") unless ( + scalar(@probs) == scalar(@letters) + ); + for (my $i = 0; $i < scalar(@probs); $i++) { + $motif->{pspm}->{$letters[$i]}->[$row] = parse_double($probs[$i]); + } + } + last; + } } - $motif; +} + +sub parse_pssm { + $logger->trace("call parse_pssm") if $logger; + my ($motif, $alphabet, $pssm) = @_; + my @letters = split(//, $alphabet); + return unless defined $pssm; + my %pssm_matrix = (); + foreach my $letter (@letters) { + $pssm_matrix{$letter} = []; + } + $motif->{pssm} = \%pssm_matrix; + + my @pssm_lines = split(/\n/, $pssm); + + for (my $linei = 0; $linei < scalar(@pssm_lines); $linei++) { + my $line = $pssm_lines[$linei]; + if ($line =~ m/log-odds matrix:/) { + for (my $row = 0; $row < $motif->{width}; $row++) { + $line = $pssm_lines[$linei + $row + 1]; + $line =~ s/^\s+//; # trim left + $line =~ s/\s+$//; # trim right + my @scores = split(/\s+/, $line); + die("Pssm has incorrect row $row.\n") unless ( + scalar(@scores) == scalar(@letters) + ); + for (my $i = 0; $i < scalar(@scores); $i++) { + $motif->{pssm}->{$letters[$i]}->[$row] = parse_double($scores[$i]); + } + } + } + } +} + +sub motif_name { + my ($q, $i) = @_; + # first see if the motif name is avaliable (it won't be for MEME) + my $name = $q->param('motifname'.$i); + unless (defined $name) { + # try to extract the motif name from the block parameter + my $block = $q->param('motifblock'.$i); + if (defined $block) { + if ($block =~ m/\bMOTIF\s+([^\s]+)\b/) { + $name = $1; + } + } + } + # if all else fails use a number for the motif name + $name = "m" . $i unless defined $name; + return $name; +} + +sub motifs { + $logger->trace("call motifs") if $logger; + my ($q, @nums) = @_; + + my $alphabet = $q->param('alphabet'); + my $bg = parse_bg($alphabet, $q->param('bgsrc'), $q->param('bgfreq')); + my $strands = parse_strands($q->param('strands'), $bg->{dna}); + my $output = ""; + my $is_first = 1; + foreach my $i (@nums) { + my $motif = { + bg => $bg, + strands => $strands, + pseudo => 0, + id => &motif_name($q, $i) + }; + parse_pspm($motif, $alphabet, $q->param('pspm'.$i)); + # note pssm is optional, this line will do nothing if it is missing + parse_pssm($motif, $alphabet, $q->param('pssm'.$i)); + + $logger->trace("motif $i: " . Dumper($motif)) if $logger; + + $output .= intern_to_meme($motif, 1, 1, $is_first); + $is_first = 0; + } + return $output; } # # Test if all parameters required for a search are avaliable # sub can_search { - my ($start, $stop) = @_; - my ($all_params) = $query->param('version') && $query->param('alphabet') - && $query->param('bgfreq') && $query->param('name') && $query->param('bgfreq'); - if ($all_params) { - my ($i); - for ($i = $start; $i <= $stop; $i++) { - $all_params = $query->param('motifblock'.$i) && $query->param('pssm'.$i) && $query->param('pspm'.$i); - last unless $all_params; - } + $logger->trace("call can_search") if $logger; + my ($q, @nums) = @_; + return 0 unless $q->param('version') && $q->param('alphabet') + && $q->param('bgfreq') && $q->param('name'); + foreach my $i (@nums) { + return 0 unless $q->param('pspm'.$i); } - $all_params; + return 1; } # -# Submit a search query +# Submit a search q # sub search { - my($url, $start, $stop) = @_; + $logger->trace("call search") if $logger; + my($q, $url, $selected, $all) = @_; my(%params, $content, $i); - if (can_search($start, $stop)) { + my @nums = ($selected); + @nums = (1 .. &nmotifs($q)) if ($selected eq 'all' || $all); + + + if (can_search($q, @nums)) { # set up the parameters to pass - $params{'version'} = $query->param('version'); - $params{'alphabet'} = $query->param('alphabet'); - $params{'bgfreq'} = $query->param('bgfreq'); - $params{'inline_name'} = $query->param('name'); - $params{'inline_motifs'} = get_motif($start, $stop); + $params{'version'} = $q->param('version'); + $params{'alphabet'} = $q->param('alphabet'); + $params{'bgfreq'} = $q->param('bgfreq'); + $params{'inline_name'} = $q->param('name'); + $params{'inline_motifs'} = &motifs($q, @nums); + $params{'inline_selected'} = $selected if ($selected ne 'all' && $all); # post the query - my($ua) = LWP::UserAgent->new(); - my($req) = POST "$url", [%params]; - my($request) = $ua->request($req); + my $ua = LWP::UserAgent->new(); + my $req = POST $url, [%params]; + my $request = $ua->request($req); $content = $request->content; } else { - $content = error_page("Missing required variable",""); + $content = error_page($q, "Missing required variable",""); } - $content; + return $content; } # search sub error_page { - my($title, $message) = @_; - my($content) = + $logger->trace("call error_page") if $logger; + my($q, $title, $message) = @_; + my $content = "\n". "\t\n". "\t\t$title\n". @@ -147,8 +332,8 @@ "\t\t\t\t
\n". "\t\t\t\n"; my ($param_name, $param_value); - foreach $param_name ($query->param()) { - $param_value = $query->param($param_name); + foreach $param_name ($q->param()) { + $param_value = $q->param($param_name); $content .= "\t\t\t\n". "\t\t\t\t\n". @@ -159,20 +344,44 @@ "\t\t

Database 

Number of Motifs 

Motifs Matched 

Database 

Number of Motifs 

Motifs Matched 

Parameter Value
$param_name
\n". "\t\n". ""; - $content; + return $content; +} + +sub program_error { + my ($q, $program, $status) = @_; + # check status + if ($status == -1) { + return error_page($q, "$program error", "$program failed to run."); + } elsif ($status & 127) { + return error_page($q, "$program error", + sprintf("%s died with signal %d, %s coredump.", + $program, ($status & 127), ($status & 128) ? 'with' : 'without' + ) + ); + } elsif ($status != 0) { + return error_page($q, "$program error", + sprintf("%s exited with value %d indicating failure.", $program, $? >> 8) + ); + } } # # Submit a block to the blocks processor # sub submit_block { - my($blocks_url, $start, $end) = @_; + $logger->trace("call submit_block") if $logger; + my($q, $blocks_url, $motif) = @_; my($content, $blocks, $i); # get the BLOCK(S) - $blocks = ""; - for ($i = $start; $i <= $end; $i++) { - $blocks .= param('BLOCKS'.$i); + if ($motif eq 'all') { + $blocks = ""; + my $end = &nmotifs($q); + for ($i = 1; $i <= $end; $i++) { + $blocks .= $q->param('BLOCKS'.$i); + } + } else { + $blocks = $q->param('BLOCKS'.$motif); } my $ua = LWP::UserAgent->new(); @@ -195,172 +404,153 @@ $content; } # submit_block -# -# Compare a PSPM to the TOMTOM motif database search tool -# -sub submit_pspm_to_tomtom { - my($tomtom_url, $number) = @_; - my(@fields, $nsites, $i, $n, $w, $pspm, $row, $col); - - # get the motif PSPM - $_ = param('pspm'.$number); - @fields = split; - - # ignore 1st 10 entries - $nsites = $fields[7];# save number of sites - for ($i = 0; $i <= $#fields - 10; $i++) { - $fields[$i] = $fields[$i+10] * $nsites; - } - $#fields = $#fields - 10; - - # rotate PSPM 90 degrees (natural format) as a string with newlines - $n = $#fields + 1; # number of entries in motif - $w = $n/4; # motif width - $pspm = ""; - for ($row=0; $row<4; $row++) { - for ($col=0; $col<$w; $col++) { - $pspm .= " " . $fields[($col*4) + $row]; - } - $pspm .= "\n"; # terminate row with newline - } - - # create the request - my $ua = LWP::UserAgent->new(); - my $req = POST "$tomtom_url", - Content_Type => 'multipart/form-data', - Content => [ 'query' => $pspm, 'query_name' => "Motif_$number" ]; - my $request = $ua->request($req); - - my $content = $request->content; - - # return the page - $content; -} # submit_pspm_to_tomtom - -sub generate_logo { - my ($number) = @_; - my ($bin) = "@MEME_DIR@/bin"; - - binmode STDOUT; - - #print "Content-type: text/plain", "\n\n"; - #print "Hello World"; - #exit(0); - my ($content) = ""; - - - my ($motifs) = get_motif($number, $number); - - #print "Content-type: text/plain", "\n\n"; - #print $motifs; - #exit(0); - - # write the motifs to a temporary file that will be deleted when perl exits - my ($fh, $tmpname); - ($fh, $tmpname) = tempfile(UNLINK => 1); - print($fh $motifs); - close($fh); - - #print "Content-type: text/plain", "\n\n"; - #print "cat $tmpname", "\n"; - #print `cat $tmpname`; - #exit(0); - - # get parameters - my ($ssc, $png, $rc, $width, $height, $outname); - $ssc = ($query->param('logossc_' . $number) eq "true"); - $png = ($query->param('logoformat_' . $number) eq "png"); - $rc = ($query->param('logorc_' . $number) eq "true"); - $width = ($query->param('logowidth_'.$number)); - $height = ($query->param('logoheight_'.$number)); +sub get_param_with_default { + $logger->trace("call get_param_with_default") if $logger; + my ($q, $name, $num, $default) = @_; + my $param; + $param = $q->param($name .'_' . $num) if defined($num); + $param = $q->param($name) unless defined $param; + $param = $default unless defined $param; + return $param; +} - if ($width =~ m/^\s*(\d+(.\d+)?)\s*$/) { - $width = "-w $1"; - } else { - $width = ""; - } +sub get_pos_num { + my ($txt) = @_; - if ($height =~ m/^\s*(\d+(.\d+)?)\s*$/) { - $height = "-h $1"; + if ($txt =~ m/^\s*(\d+(.\d+)?)\s*$/) { + return $1; } else { - $height = ""; + return undef; } +} +sub logo_name { + my ($rc, $ssc, $png, $number) = @_; # generate the name that should be specified for downloading - $outname = "logo"; + my $name = "logo"; if ($rc) { - $outname .= "_rc"; + $name .= "_rc"; } if ($ssc) { - $outname .= "_ssc"; + $name .= "_ssc"; } - $outname .= $number; - $outname .= "."; + $name .= $number; + $name .= "."; if ($png) { - $outname .= "png"; + $name .= "png"; } else { - $outname .= "eps"; - } - - #print "Content-type: text/plain", "\n\n"; - #print "ceqlogo -k AA -i $tmpname", "\n"; - #print `@MEME_DIR@/bin/ceqlogo -k AA -i $tmpname`; - #print "cat $tmpname", "\n"; - #print `cat $tmpname`; - #exit(0); - - # find out the format - my ($alphabet_size) = length($query->param('alphabet')); - my ($kind); - if ($alphabet_size < 20) { - $kind = "NA"; - } else { - $kind = "AA"; - } - - # setup ssc toggle - my ($ssc_toggle) = ""; - if ($ssc) { - $ssc_toggle = "-E -S"; - } - - - # setup reverse complement flag as required - # note that only nucleic acid can be reverse complemented - my ($rc_flag) = ""; - if ($rc and $alphabet_size < 20) { - $rc_flag = "-r"; + $name .= "eps"; } + return $name; +} - my $fineprint = "MEME "; +sub logo_desc { + my ($program, $ssc) = @_; + my $fineprint = $program . ' '; if ($ssc) { - $fineprint = $fineprint."(with SSC)"; + $fineprint = $fineprint.'(with SSC)'; } else { - $fineprint = $fineprint."(no SSC)"; + $fineprint = $fineprint.'(no SSC)'; } my (undef,$min,$hour,$day,$month,$year,undef,undef,undef) = localtime(time); $year += 1900; # year contains years since 1900 $month += 1; # month counted from zero. $fineprint .= sprintf("%d.%d.%d %02d:%02d", $day, $month, $year, $hour, $min); + return $fineprint; +} - # generate the image content - if ($png) { - $content = `$bin/ceqlogo -Y -N $ssc_toggle -k $kind $rc_flag -d \"$fineprint\" -t \"\" -x \"\" $width $height -i $tmpname | $bin/convert - png:-`; - } else { - $content = `$bin/ceqlogo -Y -N $ssc_toggle -k $kind $rc_flag -d \"$fineprint\" -t \"\" -x \"\" $width $height -i $tmpname`; - } - #print "Content-type: text/plain\n\n"; - #print $content; - #exit(0); +sub unlink1 { + my ($fh, $filename) = @_; + close($fh); + unlink($filename); +} + +sub generate_logo { + $logger->trace("call generate_logo") if $logger; + my ($q, $number) = @_; + my $bin = '@MEME_DIR@/bin'; + my $gs = '@WHICHGHOSTSCRIPT@'; + $gs = 'gs' if $gs =~ m/\@WHICH[G]HOSTSCRIPT\@/; # The makefile needs to be updated but I can't do that in a patch + + # mime type + my $mime_type = "application/postscript"; + + # get logo settings + my ($program, $dna, $ssc, $png, $rc, $width, $height); + $program = $q->param('program'); + $program = 'MEME' unless defined $program; + $dna = (length(get_param_with_default($q, 'alphabet', undef, '')) < 20); + $ssc = (get_param_with_default($q, 'logossc', $number, 'false') eq 'true'); + $png = (get_param_with_default($q, 'logoformat', $number, 'png') eq 'png'); + $rc = (get_param_with_default($q, 'logorc', $number, 'false') eq 'true'); + $width = get_pos_num(get_param_with_default($q, 'logowidth', $number, '')); + $height = get_pos_num(get_param_with_default($q, 'logoheight', $number, '')); + + # write the motif to a temporary file + my ($motif_fh, $motif_nam) = &tempfile('motif_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); + print($motif_fh &motifs($q, $number)); + + # create a temporary file as destination of ceqlogo + my ($img_fh, $img_nam) = &tempfile('eps_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); + + # create arguments to ceqlogo + my @ceqlogo_args = ('-Y', '-N'); # enable Y axis, number X axis + push(@ceqlogo_args, '-t', '', '-x', ''); # remove title, remove X axis label + push(@ceqlogo_args, '-k', ($dna ? 'NA' : 'AA')); # set alphabet type + push(@ceqlogo_args, '-d', &logo_desc($program, $ssc)); # set descriptive fine print + push(@ceqlogo_args, '-E', '-S') if $ssc; # if ssc: enable error bars, enable SSC + push(@ceqlogo_args, '-r') if ($rc && $dna); # if rc: enable reverse complement mode + push(@ceqlogo_args, '-w', $width) if $width; # set the width + push(@ceqlogo_args, '-h', $height) if $height; # set the height + push(@ceqlogo_args, '-i', $motif_nam); # set the input file + push(@ceqlogo_args, '-o', $img_nam); + + # run ceqlogo + my $status = system(&catfile($bin, 'ceqlogo'), @ceqlogo_args); + + # remove motif temporary file + &unlink1($motif_fh, $motif_nam); + + # check status + return &program_error($q, 'ceqlogo', $status) if $status; - # this header tells the browser to download the file - print "Content-Disposition: attachment; filename=$outname", "\n"; if ($png) { - print "Content-type: image/png", "\n"; - } else { - print "Content-type: application/postscript", "\n"; + # create a temporary file as destination of ghostscript + my ($png_fh, $png_nam) = &tempfile('png_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); + + # create arguments to ghostscript + my @gs_args = ( + '-q', '-r100', '-dSAFER', '-dBATCH', '-dNOPAUSE', '-dDOINTERPOLATE', + '-sDEVICE=pngalpha', '-dBackgroundColor=16#FFFFFF', '-dTextAlphaBits=4', + '-dGraphicsAlphaBits=4', '-dEPSCrop' + ); + push(@gs_args, '-sOutputFile='.$png_nam, $img_nam); + + # run ghostscript + $status = system($gs, @gs_args); + + # check status + return &program_error($q, 'ghostscript', $status) if $status; + + # remove eps temporary file + &unlink1($img_fh, $img_nam); + + # update the image to be the png + ($img_fh, $img_nam) = ($png_fh, $png_nam); + $mime_type = "image/png"; } - print "\n"; + + # slurp in content + seek($img_fh, 0, SEEK_SET); + my $content = do { local( $/ ); <$img_fh>}; + + # remove image file + &unlink1($img_fh, $img_nam); + + # output the image + my $outname = &logo_name($rc, $ssc, $png, $number); + binmode STDOUT; + print $q->header(-type=>$mime_type, -Content_Disposition=>"attachment; filename=$outname"); print $content; exit(0); } diff -uNr meme_4.6.1/website/cgi-bin/tomtom_request.pl meme_4.6.1_patch_1/website/cgi-bin/tomtom_request.pl --- meme_4.6.1/website/cgi-bin/tomtom_request.pl 2011-02-04 05:23:10.000000000 +1000 +++ meme_4.6.1_patch_1/website/cgi-bin/tomtom_request.pl 2011-05-23 16:50:42.064184029 +1000 @@ -3,10 +3,18 @@ use strict; use CGI; use CGI::Carp qw(fatalsToBrowser); -use List::Util qw(sum); +use Fcntl qw(SEEK_SET); +use File::Spec::Functions qw(catfile tmpdir); use File::Temp qw(tempfile); +use List::Util qw(sum); + +# globals +my $tmpdir = '@TMP_DIR@'; +$tmpdir = &tmpdir() if ($tmpdir eq '' || $tmpdir =~ m/^\@TMP[_]DIR\@$/); my $bin = "@MEME_DIR@/bin"; +my $gs = '@WHICHGHOSTSCRIPT@'; +$gs = 'gs' if $gs =~ m/\@WHICH[G]HOSTSCRIPT\@/; # The makefile needs to be updated but I can't do that in a patch my $q = CGI->new; dispatch(); @@ -43,79 +51,162 @@ print $q->end_html; } -sub tomtom_logo { - my $version = $q->param('version'); - die("Input version must be up to three dot separated numbers.") unless ($version =~ m/^\d+(\.\d+(\.\d+)?)?$/); - my $bgline = parse_background($q->param('background')); - my $target_id = $q->param('target_id'); - die("Input target_id must not contain spaces.") unless ($target_id =~ m/^\S+$/); - my $target_length = $q->param('target_length'); - die("Input target_length must be a positive integer.") unless ($target_length =~ m/^\d+$/); - my $target_pspm = $q->param('target_pspm'); - die("Input target_pspm must be defined.") unless defined $target_pspm; - my $target_rc_str = $q->param('target_rc'); - die("Input target_rc must be either 1 or 0.") unless ($target_rc_str =~ m/^[01]$/); - my $target_rc = int($target_rc_str); - - my $query_id = $q->param('query_id'); - die("Input query_id must not contain spaces.") unless ($query_id =~ m/^\S+$/); - my $query_length = $q->param('query_length'); - die("Input query_length must be a positive integer.") unless ($query_length =~ m/^\d+$/); - my $query_pspm = $q->param('query_pspm'); - die("Input query_pspm must be defined.") unless defined $query_pspm; - my $query_offset_str = $q->param('query_offset'); - die("Input query_offset must be a number.") unless ($query_offset_str =~ m/^[-]?\d+$/); - my $query_offset = int($query_offset_str); - - my $error_bars = $q->param('error_bars'); - die("Input error_bars but be 0 or 1.") unless ($error_bars =~ m/^[01]$/); - my $small_sample_correction = $q->param('small_sample_correction'); - die("Input small_sample_correction must be 0 or 1.") unless ($small_sample_correction =~ m/^[01]$/); - my $flip = $q->param('flip'); - die("Input flip must be 0 or 1.") unless ($flip =~ m/^[01]$/); - - - my $img_w = ""; - my $image_width = $q->param('image_width'); - if (defined $image_width && $image_width =~ m/^\d+$/) { - $img_w = "-w $image_width "; - } - my $img_h = ""; - my $image_height = $q->param('image_height'); - if (defined $image_height && $image_height =~ m/^\d+$/) { - $img_h = "-h $image_height "; +sub get_param_with_default { + my ($q, $name, $default) = @_; + my $param = $q->param($name); + if (defined $param) { + return $param; + } else { + return $default; + } +} + +sub validate_param { + my ($q, $name, $match) = @_; + my $param = $q->param($name); + die("Missing required parameter $name.\n") unless defined $param; + die("Parameter $name failed validation.\n") unless $param =~ m/$match/; + return $param; +} + +sub program_error { + my ($q, $program, $status) = @_; + # check status + if ($status == -1) { + die("$program failed to run."); + } elsif ($status & 127) { + die( + sprintf("%s died with signal %d, %s coredump.", + $program, ($status & 127), ($status & 128) ? 'with' : 'without' + ) + ); + } elsif ($status != 0) { + die( + sprintf("%s exited with value %d indicating failure.", $program, $? >> 8) + ); } - my $image_type = $q->param('image_type'); - die("Input image_type must be either png or eps.") unless ($image_type =~ m/^(png|eps)$/); +} - my $q_file = meme_nucleotide_file($version, $bgline, $query_id, $query_pspm); - my $t_file = meme_nucleotide_file($version, $bgline, $target_id, $target_pspm); +sub get_pos_num { + my ($txt) = @_; + if ($txt =~ m/^\s*(\d+(.\d+)?)\s*$/) { + return $1; + } else { + return 0; + } +} + +sub unlink1 { + my ($fh, $filename) = @_; + close($fh); + unlink($filename); +} + +sub tomtom_logo { + # version and background parameters + my $version = &validate_param($q, 'version', qr/^\d+(\.\d+(\.\d+)?)?$/); + my $bgline = &parse_background($q->param('background')); + + # target motif parameters + my $target_id = &validate_param($q, 'target_id', qr/^\S+$/); + my $target_length = &validate_param($q, 'target_length', qr/^\d+$/); + my $target_pspm = &validate_param($q, 'target_pspm', qr/.+/); + my $target_rc = &validate_param($q, 'target_rc', qr/^[01]$/); - my $eb = ($error_bars ? "-E " : ""); - my $ssc = ($small_sample_correction ? "-S " : ""); - my $t_rc = (($flip ? !($target_rc) : $target_rc) ? "-r " : ""); - my $q_rc = ($flip ? "-r " : ""); + # query motif parameters + my $query_id = &validate_param($q, 'query_id', qr/^\S+$/); + my $query_length = &validate_param($q, 'query_length', qr/^\d+$/); + my $query_pspm = &validate_param($q, 'query_pspm', qr/.+/); + my $query_offset = &validate_param($q, 'query_offset', qr/^[-]?\d+$/); + + # user selected parameters + my $image_type = &validate_param($q, 'image_type', qr/^(png|eps)$/); + my $error_bars = &validate_param($q, 'error_bars', qr/^[01]$/); + my $ssc = &validate_param($q, 'small_sample_correction', qr/^[01]$/); + my $flip = &validate_param($q, 'flip', qr/^[01]$/); + my $image_width = &get_pos_num(&get_param_with_default($q, 'image_width', 0)); + my $image_height = &get_pos_num(&get_param_with_default($q, 'image_height', 0)); + + # query offset my $q_off = ($flip ? $target_length - ($query_length + $query_offset) : $query_offset); - my $t_shift = ($q_off < 0 ? "-s ".(-$q_off)." " : ""); - my $q_shift = ($q_off > 0 ? "-s ".($q_off)." " : ""); + # fine descriptive text + my $fine_text = "Tomtom " . &date_time_string(); + # mime type + my $mime_type = "application/postscript"; + + # write the query and target motifs to temporary files + my ($q_fh, $q_nam) = &tempfile('motif_query_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); + print $q_fh &meme_nucleotide_motif($version, $bgline, $query_id, $query_pspm); + my ($t_fh, $t_nam) = &tempfile('motif_target_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); + print $t_fh &meme_nucleotide_motif($version, $bgline, $target_id, $target_pspm); + + # temporary file for output + my ($img_fh, $img_nam) = &tempfile('eps_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); - my $fine_text = "Tomtom " . date_time_string(); + # create arguments to ceqlogo + my @ceqlogo_args = ('-Y', '-N'); # enable Y axis, number X axis + push(@ceqlogo_args, '-E') if $error_bars; + push(@ceqlogo_args, '-S') if $ssc; + push(@ceqlogo_args, '-k', 'NA'); # nucleic acid alphabet + push(@ceqlogo_args, '-w', $image_width) if $image_width; + push(@ceqlogo_args, '-h', $image_height) if $image_height; + push(@ceqlogo_args, '-d', $fine_text); + push(@ceqlogo_args, '-t', $target_id); # set title label + push(@ceqlogo_args, '-x', $query_id); # set x axis label + push(@ceqlogo_args, '-r') if $target_rc xor $flip; # rc target + push(@ceqlogo_args, '-i', $t_nam); # target motif + push(@ceqlogo_args, '-s', -$q_off) if ($q_off < 0); # shift target + push(@ceqlogo_args, '-r') if $flip; # rc query + push(@ceqlogo_args, '-i', $q_nam); # query motif + push(@ceqlogo_args, '-s', $q_off) if ($q_off > 0); # shift query + push(@ceqlogo_args, '-o', $img_nam); + + # run ceqlogo + my $status = system(&catfile($bin, 'ceqlogo'), @ceqlogo_args); - my $ceqlogo_cmd = "$bin/ceqlogo -Y -N $eb $ssc -k NA $img_w $img_h -d \"$fine_text\" -t \"$target_id\" -x \"$query_id\" $t_rc -i $t_file $t_shift $q_rc -i $q_file $q_shift"; + # remove temporary motif files + &unlink1($q_fh, $q_nam); + &unlink1($t_fh, $t_nam); + + # check status + return &program_error($q, 'ceqlogo', $status) if $status; + + if ($image_type eq 'png') { + # create a temporary file as destination of ghostscript + my ($png_fh, $png_nam) = &tempfile('png_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); + + # create arguments to ghostscript + my @gs_args = ( + '-q', '-r100', '-dSAFER', '-dBATCH', '-dNOPAUSE', '-dDOINTERPOLATE', + '-sDEVICE=pngalpha', '-dBackgroundColor=16#FFFFFF', '-dTextAlphaBits=4', + '-dGraphicsAlphaBits=4', '-dEPSCrop' + ); + push(@gs_args, '-sOutputFile='.$png_nam, $img_nam); + + # run ghostscript + $status = system($gs, @gs_args); + + # check status + return &program_error($q, 'ghostscript', $status) if $status; + + # remove eps temporary file + &unlink1($img_fh, $img_nam); - my $image; - my $mime_type; - if ($image_type =~ m/^eps$/) { - $image = `$ceqlogo_cmd`; - $mime_type = "application/postscript"; - } elsif ($image_type =~ m/^png$/) { - $image = `$ceqlogo_cmd | $bin/convert - png:-`; + # update the image to be the png + ($img_fh, $img_nam) = ($png_fh, $png_nam); $mime_type = "image/png"; } + # slurp the image + seek($img_fh, 0, SEEK_SET); + my $image = do { local( $/ ); <$img_fh>}; + + # remove the image temporary file + &unlink1($img_fh, $img_nam); + + # output the image my $outname = "logo_" . $query_id . "_" . $target_id . "." . $image_type; binmode STDOUT; - print $q->header(-type=>$mime_type, -Content_Disposition=>"attachment; filename=$outname"); print $image; } @@ -131,16 +222,6 @@ return "A $bgprobs[0] C $bgprobs[1] G $bgprobs[2] T $bgprobs[3]"; } -sub meme_nucleotide_file { - my $motif = meme_nucleotide_motif(@_); - # write the motifs to a temporary file that will be deleted when perl exits - my ($fh, $tmpname); - ($fh, $tmpname) = tempfile(UNLINK => 1); - print $fh $motif; - close $fh; - return $tmpname; -} - sub meme_nucleotide_motif { my ($version, $bgline, $id, $pspm) = @_; diff -uNr meme_4.6.1/website/html/alternates.html meme_4.6.1_patch_1/website/html/alternates.html --- meme_4.6.1/website/html/alternates.html 2011-02-04 05:23:08.000000000 +1000 +++ meme_4.6.1_patch_1/website/html/alternates.html 2011-05-23 16:25:41.884184692 +1000 @@ -13,7 +13,7 @@
diff -uNr meme_4.6.1/website/html/documentation.html meme_4.6.1_patch_1/website/html/documentation.html --- meme_4.6.1/website/html/documentation.html 2011-02-04 05:23:08.000000000 +1000 +++ meme_4.6.1_patch_1/website/html/documentation.html 2011-05-23 16:25:41.884184692 +1000 @@ -12,7 +12,7 @@
diff -uNr meme_4.6.1/website/html/intro.html meme_4.6.1_patch_1/website/html/intro.html --- meme_4.6.1/website/html/intro.html 2011-02-04 05:23:08.000000000 +1000 +++ meme_4.6.1_patch_1/website/html/intro.html 2011-05-23 16:25:41.884184692 +1000 @@ -19,7 +19,7 @@
diff -uNr meme_4.6.1/website/html/meme-download.html meme_4.6.1_patch_1/website/html/meme-download.html --- meme_4.6.1/website/html/meme-download.html 2011-03-10 08:31:41.000000000 +1000 +++ meme_4.6.1_patch_1/website/html/meme-download.html 2011-05-23 16:25:41.884184692 +1000 @@ -13,7 +13,7 @@
diff -uNr meme_4.6.1/website/html/motif_discovery.html meme_4.6.1_patch_1/website/html/motif_discovery.html --- meme_4.6.1/website/html/motif_discovery.html 2011-03-10 08:31:41.000000000 +1000 +++ meme_4.6.1_patch_1/website/html/motif_discovery.html 2011-05-23 16:25:41.884184692 +1000 @@ -18,7 +18,7 @@
- +
diff -uNr meme_4.6.1/website/html/resources.in meme_4.6.1_patch_1/website/html/resources.in --- meme_4.6.1/website/html/resources.in 2011-03-30 08:35:54.000000000 +1000 +++ meme_4.6.1_patch_1/website/html/resources.in 2011-05-23 16:25:41.884184692 +1000 @@ -12,7 +12,7 @@
diff -uNr meme_4.6.1/website/html/search.in meme_4.6.1_patch_1/website/html/search.in --- meme_4.6.1/website/html/search.in 2011-02-04 05:23:08.000000000 +1000 +++ meme_4.6.1_patch_1/website/html/search.in 2011-05-23 16:25:41.884184692 +1000 @@ -14,7 +14,7 @@