]+
- and may have comment lines beginning with "#" in column 1.
+ File must have format:
+ [ ]+
+ and may have comment lines beginning with "#" in column 1.
- If the freq is not NULL, uses the 0-order
- frequencies in freq as the model, after adding X if requested.
+ If the freq is not NULL, uses the 0-order
+ frequencies in freq as the model, after adding X if requested.
- Sets the order of the model.
+ Sets the order of the model.
- Returns array:
- cp[s2i(wa)] = Pr(a | w)
- where "a" is a character and "w" is a string.
- The 0-order probabilities are in positions 0..alength-1 of the
- array.
+ Returns array:
+ cp[s2i(wa)] = Pr(a | w)
+ where "a" is a character and "w" is a string.
+ The 0-order probabilities are in positions 0..alength-1 of the
+ array.
*/
/***************************************************************************/
double *read_markov_model(
- char *pfile, /* name of probability file */
- double *freq, /* letter frequencies */
- char *alpha, /* alphabet expected */
- BOOLEAN add_x, /* add x-tuples if TRUE */
- BOOLEAN rc, /* average reverse complements*/
- int *order /* order of model read */
+ char *pfile, /* name of probability file */
+ double *freq, /* letter frequencies */
+ char *alpha, /* alphabet expected */
+ BOOLEAN add_x, /* add x-tuples if TRUE */
+ BOOLEAN rc, /* average reverse complements*/
+ int *order /* order of model read */
)
{
- int i; /* index into array */
- double a_p[MAX_BACK_SIZE]; /* tuple-prob array */
- double *a_cp; /* conditional prob. array */
- FILE *pfilep; /* file pointer to file */
- char *line; /* line buffer */
- char **fields; /* fields of line */
- int nfields; /* number of fields in line */
- int line_no = 0; /* line number */
- char *tuple; /* the tuple */
- double p; /* the probability */
- int maxw = 0; /* maximum tuple width */
- int alen = strlen(alpha); /* length of alphabet */
- int ntuples; /* number of tuples */
+ int i, nfields, line_no, maxw, alen, ntuples;
+ double *a_p, *a_cp, p;
+ FILE *pfilep;
+ char *line, **fields, *tuple;
+
+ line_no = 0;
+ maxw = 0;
+ alen = strlen(alpha);
a_cp = NULL;
line = NULL;
fields = NULL;
+
/* check input */
if (!pfile && !freq) {
fprintf(stderr, "read_markov_model error: specify pfile or freq\n");
@@ -138,10 +136,10 @@
setup_index(alpha);
/* use the frequencies if given */
- if (freq) { /* frequencies given */
+ if (freq) { /* frequencies given */
Resize(a_cp, alen, double);
for (i=0; i1) {
fprintf(stderr, "Illegal probability in file %s line %d: %s\n",
pfile, line_no, line);
+ exit(1);
+ } else if (p == 0) {
+ fprintf(stderr, "Zero probability state %s in Markov model file \"%s\" "
+ "on line %d.\n", tuple, pfile, line_no);
+ exit(1);
}
- len = strlen(tuple);
- maxw = MAX(len, maxw);
index = s2i(tuple);
if (index < 0) {
fprintf(stderr, "Illegal character in word `%s' in file %s line %d: %s\n",
tuple, pfile, line_no, line);
exit(1);
}
- if (index >= MAX_BACK_SIZE) {
- for (i=1, ntuples=0; i<=maxw; i++) ntuples+= pow(alen, i);
- fprintf(stderr, "Background model too large. Use smaller model or increase \nMAX_BACK_SIZE to at least %d in background.h and recompile.\n", ntuples);
- exit(1);
+ // check if we need to expand the storage allocated
+ len = strlen(tuple);
+ if (len > maxw) {
+ int new_ntuples;
+ // calculate the new size needed
+ for (i = maxw+1, new_ntuples = ntuples; i <= len; i++) new_ntuples += pow(alen, i);
+ // resize and initilize new section to -1
+ Resize(a_p, new_ntuples, double);
+ for (i = ntuples; i < new_ntuples; i++) a_p[i] = -1;
+ // record the new sizes
+ maxw = len;
+ ntuples = new_ntuples;
}
- a_p[index] = p; /* store probability */
+ assert(index < ntuples);
+ a_p[index] = p; // store probability
}
fclose(pfilep);
@@ -202,7 +212,7 @@
exit(1);
}
- *order = maxw - 1; /* order of Markov model */
+ *order = maxw - 1; /* order of Markov model */
/* average reverse complement probabilities together if requested */
if (rc) average_rc(add_x, a_p, maxw, "", 0, alpha);
@@ -223,8 +233,9 @@
// cleanup before return
if (add_x) free(alpha);
+ free(a_p);
- return(a_cp); /* return conditionals */
+ return(a_cp); /* return conditionals */
} /* read_markov_model */
/***************************************************************************/
diff -r 5863bee1d071 -r 8041c82f059e src/background.h
--- a/src/background.h Fri Dec 07 10:34:33 2012 +1000
+++ b/src/background.h Thu Jan 31 12:04:26 2013 +1000
@@ -16,10 +16,6 @@
#ifndef BACKGROUND_H
#define BACKGROUND_H
-/* maximum size of background model */
-// 475254 is required for a third-order protein model
-#define MAX_BACK_SIZE 475254
-
/* compute the log probability of a substring of a sequence given the log
cumulative probability in lcb:
log Pr(S_{i,...,i+w-1} | H_0)
diff -r e77390759cae -r 61325860cd46 src/centrimo.c
--- a/src/centrimo.c Thu Nov 22 18:53:34 2012 +1100
+++ b/src/centrimo.c Tue Jan 22 18:14:22 2013 +1000
@@ -80,7 +80,7 @@
" --dfile use the file content as the description;\n"
" default: no description\n"
" --local compute the enrichment of all regions;\n"
- " default: compute enrichement of central regions only\n"
+ " default: compute enrichment of central regions only\n"
" --noseq do not store sequence IDs in HTML output;\n"
" default: IDs are stored in the HTML output\n"
" --neg plot a negative set of sequences against the\n"
@@ -180,6 +180,9 @@
// significance of the same window in the negative set if it exists
double neg_log_pvalue;
double neg_log_adj_pvalue;
+ // Fisher Exact test p-value comparing positive and negative enrichment
+ double fisher_log_pvalue;
+ double fisher_log_adj_pvalue;
// how different is the negative set?
double mcc; // Matthews correlation coefficient
} WIN_STATS_T;
@@ -977,8 +980,13 @@
jsonwr_dbl_prop(json, "log_adj_pvalue", window->log_adj_pvalue);
if (negative_sequences) {
jsonwr_dbl_prop(json, "neg_sites", window->neg_sites);
- if (discriminative) jsonwr_dbl_prop(json, "neg_log_adj_pvalue",
- window->neg_log_adj_pvalue);
+ jsonwr_dbl_prop(json, "neg_log_adj_pvalue", window->neg_log_adj_pvalue);
+ // Compute Fisher Exact Test for peak
+ window->fisher_log_pvalue = window_FET(window->sites, counts->total_sites, window->neg_sites,
+ neg_counts->total_sites);
+ window->fisher_log_adj_pvalue = LOGEV(log(stats->n_tests), window->fisher_log_pvalue);
+ jsonwr_dbl_prop(json, "fisher_log_adj_pvalue", window->fisher_log_adj_pvalue);
+ jsonwr_dbl_prop(json, "fisher_adj_pvalue", window->fisher_log_adj_pvalue);
if (mcc) jsonwr_dbl_prop(json, "mcc", window->mcc);
}
jsonwr_end_object_value(json);
@@ -1434,11 +1442,9 @@
window->log_adj_pvalue = log_adj_pvalue;
if (options->neg_sequences) {
window->neg_sites = neg_sites;
- if (options->disc) {
- window->neg_log_pvalue = window_binomial(neg_sites,
- neg_c_counts->total_sites, bins, n_bins);
- window->neg_log_adj_pvalue = LOGEV(log_n_tests, window->neg_log_pvalue);
- }
+ window->neg_log_pvalue = window_binomial(neg_sites,
+ neg_c_counts->total_sites, bins, n_bins);
+ window->neg_log_adj_pvalue = LOGEV(log_n_tests, window->neg_log_pvalue);
if (options->mcc) {
window->mcc = window_MCC(pve_sites, neg_sites, seqN, neg_seqN);
}
@@ -1591,7 +1597,7 @@
}
motif_stats->n_tests = n_tests;
motif_stats->n_bins = n_bins;
- motif_stats->score_threshold = best_score_thresh;
+ motif_stats->score_threshold = options->optimize_score ? best_score_thresh : options->score_thresh;
motif_stats->sites = best_total_sites;
motif_stats->neg_sites = best_neg_total_sites;
return motif_stats;
@@ -1679,6 +1685,9 @@
}
// convert the evalue threshold into a pvalue threshold
log_pvalue_thresh = log(options.evalue_thresh) - log(motifN);
+ // if p-value threshold would be 1.0, reduce it slightly to
+ // prevent jillions of absolutely non-significant peaks being printed
+ if (log_pvalue_thresh >= 0) log_pvalue_thresh = log(0.999999999);
// Setup some things for double strand scanning
if (options.scan_both_strands == TRUE) {
@@ -1708,8 +1717,6 @@
motif = rev_motif = NULL;
pos_pssm = rev_pssm = NULL;
scores = neg_scores = NULL;
- // initialize Fisher Exact Test global log values
- if (options.disc) init_FET();
// calculate and output the best windows for each motif
for (db_i = 0, i = 1; db_i < arraylst_size(options.motif_sources); db_i++) {
diff -r 5863bee1d071 -r 8041c82f059e src/display.c
--- a/src/display.c Fri Dec 07 10:34:33 2012 +1000
+++ b/src/display.c Thu Jan 31 12:04:26 2013 +1000
@@ -118,6 +118,12 @@
char *id;
} SORTED_SCORE;
+/* sortable letter value record */
+typedef struct {
+ char letter;
+ double value;
+} LETTER_VALUE;
+
/* local functions */
static void print_sites(
DATASET *dataset, /* the dataset */
@@ -172,6 +178,10 @@
const void *v1,
const void *v2
);
+static int lv_compare(
+ const void *v1,
+ const void *v2
+);
static double get_q(
int nsteps, /* try nsteps from 0 to 1 */
int window, /* smoothing window radius */
@@ -1176,39 +1186,36 @@
int alength = dataset->alength;
char *alphabet = dataset->alphabet;
char *string = NULL;
+ LETTER_VALUE *letterv = NULL;
+
+ Resize(letterv, alength, LETTER_VALUE);
Resize(string, w*N+2, char);
for (i=0; i < w; i++) { /* position in motif */
int maxj[MAXDEPTH]; /* array of max indices in Theta */
- /* find N letters at position i with highest probability (in order) */
- for (n = 0; n < N; n++) { /* current depth */
- double max = LITTLE; /* current max probability */
- for (j=0; j < alength; j++) { /* letter */
- if (theta(i, j) > max) {
- max = theta(i, j); /* maximum probability */
- maxj[n] = j; /* current letter with n-th best prob */
- }
- }
- theta(i, maxj[n]) = -theta(i, maxj[n]); /* tag this position as used */
+ /* sort letters at position i, largest first */
+ for (j = 0; j < alength; j++) {
+ letterv[j].letter = alphabet[j];
+ letterv[j].value = theta(i, j);
}
-
- /* restore theta */
- for (n = 0; n < N; n++) { /* current depth */
- theta(i, maxj[n]) = -theta(i, maxj[n]); /* untag */
- }
+ qsort(letterv, alength, sizeof(LETTER_VALUE), lv_compare);
/* set up the consensus strings for position i */
for (n = 0; n < N; n++) { /* current depth */
- if (theta(i, maxj[n]) < min_prob) {
- string[(n*w)+i] = (n==0 ? 'x' : ' '); /* below cutoff */
- } else {
- string[(n*w)+i] = alphabet[maxj[n]]; /* set n'th consensus */
- }
+ if (n < alength) {
+ if (letterv[n].value < min_prob) {
+ string[(n*w)+i] = (n==0 ? 'x' : ' '); /* below cutoff */
+ } else {
+ string[(n*w)+i] = letterv[n].letter; /* set n'th consensus */
+ }
+ } else string[(n*w)+i] = ' ';
}
}
- string[((N-1)*w)+i] = '\0'; /* terminate string */
+ string[N*w] = '\0'; /* terminate string */
+ // cleanup
+ free(letterv);
return string;
} /* get_consensus */
@@ -1688,6 +1695,26 @@
return ((diff > 0) ? -1 : ( (diff < 0) ? 1 : 0) );
} /* s_compare */
+/**********************************************************************/
+/*
+ lv_compare
+
+ Compare two letter values in decending order
+*/
+/**********************************************************************/
+static int lv_compare(
+ const void *v1,
+ const void *v2
+)
+{
+ const LETTER_VALUE *lv1, *lv2;
+ lv1 = (const LETTER_VALUE *) v1;
+ lv2 = (const LETTER_VALUE *) v2;
+ if (lv1->value == lv2->value) return 0;
+ if (lv1->value < lv2->value) return 1;
+ return -1;
+} /* lv_compare */
+
/**********************************************************************/
/*
diff -r e77390759cae -r 61325860cd46 src/fisher_exact.c
--- a/src/fisher_exact.c Thu Nov 22 18:53:34 2012 +1100
+++ b/src/fisher_exact.c Tue Jan 22 18:14:22 2013 +1000
@@ -15,6 +15,12 @@
#include
#include "utils.h"
+//double _mm_nats;
+double _mm_nats = 0;
+double _log10;
+double _log0_99999999;
+double _log1_00000001;
+
// Global constants for hypergeometric computation
#define _log_zero (-1e10) // Zero on the log scale.
#define _log_small (-0.5e10) // Threshold below which everything is zero
@@ -22,7 +28,7 @@
// Routines for computing the logarithm of a sum in log space.
#define my_exp(x) ( \
- ((x) < _log_small) ? 0 : exp(x) \
+ ((x) < _log_small) ? 0.0 : exp(x) \
)
#define log_sum1(logx, logy) ( \
((logx) - (logy) > _mm_nats) ? (logx) : (logx) + log(1 + my_exp((logy) - (logx))) \
@@ -51,11 +57,6 @@
double _log_sprob;
} FISHER_VAL_T;
-double _log10;
-double _log0_99999999;
-double _log1_00000001;
-double _mm_nats;
-
// Prototypes.
double lngamm(int z);
@@ -80,7 +81,8 @@
return exp(log_factorial[a + b] + log_factorial[c + d] + log_factorial[a + c] + log_factorial[b + d] - (log_factorial[a + b + c + d] + log_factorial[a] + log_factorial[b] + log_factorial[c] + log_factorial[d]));
}
-void fisher_exact(int a, //x[0,0]
+void fisher_exact(
+ int a, //x[0,0]
int b, //x[0,1]
int c, //x[1,0]
int d, //x[1,1]
@@ -109,7 +111,9 @@
tmpc++;
tmpd--;
tmpp = fet(tmpa, tmpb, tmpc, tmpd);
- if (tmpp <= *p)
+ // FIXED tlb: want prob pos succ >= observed
+ //if (tmpp <= *p)
+ if (tmpp < *p)
*left += tmpp;
}
@@ -125,10 +129,13 @@
tmpc--;
tmpd++;
tmpp = fet(tmpa, tmpb, tmpc, tmpd);
- if (tmpp <= *p)
+ // FIXED tlb: want prob pos succ <= observed
+ //if (tmpp <= *p)
+ if (tmpp < *p)
*two += tmpp;
}
+ // FIXED tlb: Now right is 1 - left - Pr(pos succ == observed)
*right = 1 - *left + *p;
}
@@ -155,12 +162,18 @@
if (!((n1_i | n_1i | ni) != 0)) {
if (!(n11i % 10 == 0)) {
if (n11i == f_vals->_sn11 + 1) {
- f_vals->_log_sprob = f_vals->_log_sprob + log(((f_vals->_sn1_ - f_vals->_sn11) / n11i) * ((f_vals->_sn_1 - f_vals->_sn11) / (n11i + f_vals->_sn - f_vals->_sn1_ - f_vals->_sn_1)));
+ f_vals->_log_sprob +=
+ // log ( ((_sn1_-_sn11)/float(n11i))*((_sn_1-_sn11)/float(n11i+_sn-_sn1_-_sn_1)) )
+ log(((f_vals->_sn1_ - f_vals->_sn11) / n11i) * ((f_vals->_sn_1 - f_vals->_sn11)
+ / (n11i + f_vals->_sn - f_vals->_sn1_ - f_vals->_sn_1)));
f_vals->_sn11 = n11i;
return f_vals->_log_sprob;
}
if (n11i == f_vals->_sn11 - 1) {
- f_vals->_log_sprob = f_vals->_log_sprob + log(((f_vals->_sn11) / (f_vals->_sn1_ - n11i)) * ((f_vals->_sn11 + f_vals->_sn - f_vals->_sn1_ - f_vals->_sn_1) / (f_vals->_sn_1 - n11i)));
+ f_vals->_log_sprob +=
+ // log ( ((_sn11)/float(_sn1_-n11i))*((_sn11+_sn-_sn1_-_sn_1)/float(_sn_1-n11i)) )
+ log(((f_vals->_sn11) / (f_vals->_sn1_ - n11i)) * ((f_vals->_sn11 + f_vals->_sn - f_vals->_sn1_ - f_vals->_sn_1)
+ / (f_vals->_sn_1 - n11i)));
f_vals->_sn11 = n11i;
return f_vals->_log_sprob;
}
@@ -193,13 +206,16 @@
* Returns Pr(green balls drawn >= b2)
*/
static double log_getFETprob(int a1, int a2, int b1, int b2) {
+
// initialize values
+ if (_mm_nats == 0) init_FET();
FISHER_VAL_T *fisher_values = mm_malloc(sizeof(FISHER_VAL_T));
fisher_values->_log_sprob = 0;
fisher_values->_sn = 0;
fisher_values->_sn11 = 0;
fisher_values->_sn1_ = 0;
fisher_values->_sn_1 = 0;
+
double log_sless = _log_zero;
double log_sright = _log_zero;
double log_sleft = _log_zero;
@@ -209,12 +225,15 @@
int col1 = a1 + b1;
int max = row1;
+ if (a1+a2 == 0 || b1+b2 == 0) return(0); // p-value == 1 if no positive/negative samples
+ if (a1+b1 == 0 || a2+b2 == 0) return(0); // p-value == 1 if no successes/failures samples
+
if (col1 < max) {
max = col1;
}
int min = row1 + col1 - n;
- if (min < 0)
- min = 0;
+ if (min < 0) min = 0;
+ if (min == max) return(_log_zero);
double log_prob_fisher = log_hyper0(a1, row1, col1, n, fisher_values);
log_sleft = _log_zero;
@@ -226,6 +245,7 @@
log_p = log_hyper(i, fisher_values);
i = i + 1;
}
+
i = i - 1;
if (log_p < _log1_00000001 + log_prob_fisher) {
log_sleft = log_sum(log_sleft, log_p);
@@ -243,6 +263,7 @@
log_p = log_hyper(j, fisher_values);
j = j - 1;
}
+
j = j + 1;
if (log_p < _log1_00000001 + log_prob_fisher) {
log_sright = log_sum(log_sright, log_p);
@@ -258,7 +279,7 @@
}
else {
log_sless = log(1.0 - exp(log_sright));
- log_sless = (log_sless, log_prob_fisher);
+ log_sless = log_sum(log_sless, log_prob_fisher);
log_slarg = log_sright;
}
free(fisher_values);
@@ -269,10 +290,9 @@
getLogFETPvalue
Return log of hypergeometric pvalue of # pos successes >= p.
- Don't forget to call init_FET() to initialize parameters before
- running this function
*/
-double getLogFETPvalue(double p, // positive successes
+double getLogFETPvalue(
+ double p, // positive successes
double P, // positives
double n, // negative successes
double N, // negatives
@@ -290,3 +310,43 @@
return (log_pvalue);
} // getLogFETPvalue
+
+
+#ifdef FE_MAIN
+#include "general.h"
+int main(
+ int argc,
+ char** argv
+) {
+ int i = 1;
+ int pos_succ = 0;
+ int pos = 0;
+ int neg_succ = 0;
+ int neg = 0;
+
+ DO_STANDARD_COMMAND_LINE(2,
+ USAGE( [options]);
+ NON_SWITCH(1,\r,
+ switch (i++) {
+ case 1: pos_succ = atoi(_OPTION_); break;
+ case 2: pos = atoi(_OPTION_); break;
+ case 3: neg_succ = atoi(_OPTION_); break;
+ case 4: neg = atoi(_OPTION_); break;
+ default: COMMAND_LINE_ERROR;
+ }
+ );
+ USAGE(\n\tCompute the Fisher Exact test p-value:);
+ USAGE(\n\t\tPr(#pos_succ > pos_succ));
+ );
+
+ double log_pvalue = log_getFETprob(neg - neg_succ, neg_succ, pos - pos_succ, pos_succ);
+ printf("log_p %.2e p %.2e\n", log_pvalue, exp(log_pvalue));
+
+ //fisher_exact_init(pos+neg);
+ //double p1, p2, p3, p4;
+ //fisher_exact(neg - neg_succ, neg_succ, pos - pos_succ, pos_succ, &p1, &p2, &p3, &p4);
+ //printf("p1 %.2e p2 %.2e p3 %.2e p4 %.2e\n", p1, p2, p3, p4);
+
+ return(0);
+}
+#endif
diff -r e77390759cae -r 61325860cd46 src/json-writer.c
--- a/src/json-writer.c Thu Nov 22 18:53:34 2012 +1100
+++ b/src/json-writer.c Tue Jan 22 18:14:22 2013 +1000
@@ -192,7 +192,11 @@
break;
default:
// check if a control character
- if (codepoint <= 0x1F || (codepoint >= 0x7F && codepoint <= 0x9F)) {
+ // or if line seperator (U+2028) or if paragraph separator (U+2029)
+ // the latter two are valid JSON but not valid Javascript as Javascript
+ // can't have unescaped newline characters in a string.
+ if (codepoint <= 0x1F || (codepoint >= 0x7F && codepoint <= 0x9F) ||
+ codepoint == 0x2028 || codepoint == 0x2029) {
str_appendf(storage, "\\u%.04x", codepoint);
} else {
str_append(storage, c, bytes);
diff -r e77390759cae -r 61325860cd46 src/pssm.c
--- a/src/pssm.c Thu Nov 22 18:53:34 2012 +1100
+++ b/src/pssm.c Tue Jan 22 18:14:22 2013 +1000
@@ -616,7 +616,7 @@
MHMM_T* the_hmm // The HMM.
)
{
- int i;
+ int i, len;
int i_motif = the_hmm->states[i_state].i_motif; // Name of motif.
MHMM_STATE_T *start_state = the_hmm->states + i_state;// Starting state of motif.
ALPH_T alph = the_hmm->alph;
@@ -673,7 +673,12 @@
}
}
// Find minimum motif score whose p-value is < p_threshold.
- for (i=0; get_array_item((int) i, start_state->pssm->pv) > p_threshold; i++);
+ len = get_array_length(start_state->pssm->pv);
+ for (i=0; i < len && get_array_item((int) i, start_state->pssm->pv) > p_threshold; i++);
+ if (i == len) {
+ fprintf(stderr, "Warning: Motif %s has no scores with p-value < p-value threshold (%.2g)\n",
+ the_hmm->states[i_state].motif_id, p_threshold);
+ }
start_state->min_sig_score = i-1;
free_array(background);
}
diff -r e77390759cae -r 61325860cd46 src/seq.c
--- a/src/seq.c Thu Nov 22 18:53:34 2012 +1100
+++ b/src/seq.c Tue Jan 22 18:14:22 2013 +1000
@@ -417,7 +417,7 @@
// Make a smaller copy of the raw sequence.
assert(offset > 0);
assert(offset <= sequence->length);
- smaller_sequence = (char*)mm_malloc((offset + 1) * sizeof(char));
+ smaller_sequence = (char*)mm_malloc((sequence->length - offset + 1) * sizeof(char));
strcpy(smaller_sequence, &(sequence->sequence[offset]));
assert((int)strlen(smaller_sequence) == (sequence->length - offset));
diff -r e77390759cae -r 61325860cd46 tests/dreme/norc.xml
--- a/tests/dreme/norc.xml Thu Nov 22 18:53:34 2012 +1100
+++ b/tests/dreme/norc.xml Tue Jan 22 18:14:22 2013 +1000
@@ -65,10 +65,10 @@
]>
-
+
dreme -norc -oc results/dreme -v 1 -noxslt -p Klf1-200-100.s
-
+
@@ -76,33 +76,33 @@
100
0.01
1
- tlb-kamikaze-lt.imb.uq.edu.au
- Thu Jan 12 17:37:55 EST 2012
+ d-173-250-141-199.dhcp4.washington.edu
+ Wed Jan 09 16:41:51 PST 2013
-
-
-
-
-
-
-
+
+
+
+
+
+
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
-
+
diff -r e77390759cae -r 61325860cd46 tests/meme/meme.crp0.zoops
--- a/tests/meme/meme.crp0.zoops Thu Nov 22 18:53:34 2012 +1100
+++ b/tests/meme/meme.crp0.zoops Tue Jan 22 18:14:22 2013 +1000
@@ -1,7 +1,7 @@
********************************************************************************
MEME - Motif discovery tool
********************************************************************************
-MEME version 4.8.1 (Release date: Wed Feb 22 11:51:06 EST 2012)
+MEME version 4.9.0 (Release date: Wed Dec 12 10:31:40 EST 2012)
For further information on how to interpret these results or to get
a copy of the MEME software please access http://meme.nbcr.net.
@@ -226,13 +226,13 @@
--------------------------------------------------------------------------------
Motif 1 regular expression
--------------------------------------------------------------------------------
-[AT][AT][TAG]TGTGA[CGT][GA][TC][ATCGC]G[AT]TC[AG][CA]
+[AT][AT][TAG]TGTGA[CGT][GA][TC][ATCG]G[AT]TC[AG][CA]
--------------------------------------------------------------------------------
-Time 2.57 secs.
+Time 4.55 secs.
********************************************************************************
@@ -331,7 +331,7 @@
-Time 4.11 secs.
+Time 7.40 secs.
********************************************************************************
@@ -374,6 +374,6 @@
Stopped because nmotifs = 2 reached.
********************************************************************************
-CPU: tlb-squirrel
+CPU: unknown
********************************************************************************
diff -r e77390759cae -r 61325860cd46 website/cgi-bin/centrimo.pl
--- a/website/cgi-bin/centrimo.pl Thu Nov 22 18:53:34 2012 +1100
+++ b/website/cgi-bin/centrimo.pl Tue Jan 22 18:14:22 2013 +1000
@@ -131,10 +131,8 @@
my %d = ();
# get the local enrichment option
$d{LOCAL} = $utils->param_bool($q, 'local');
- # get the discriminative search option
- $d{DISCR} = $utils->param_bool($q, 'discr');
- # check if we should require the negative sequences
- $d{NEGS_ALWAYS} = $utils->param_bool($q, 'negs_always');
+ # get the search option
+ $d{COMPAR} = $utils->param_bool($q, 'compar');
# get the input sequences
my $seqsfh = $q->upload('sequences');
my $pasted = $utils->param_bool($q, 'use_pasted');
@@ -146,17 +144,17 @@
$d{SEQ_ORIG_NAME} = ($pasted ? $name : fileparse($q->param('sequences')));
$d{SEQ_NAME} = get_safe_name($d{SEQ_ORIG_NAME}, $name, 2);
# get the comparative sequences
- if ($d{DISCR} || $d{NEGS_ALWAYS}) {
- my $discr_seqs_fh = $q->upload('discr_sequences');
- my $discr_pasted = $utils->param_bool($q, 'use_discr_pasted');
+ if ($d{COMPAR}) {
+ my $compar_seqs_fh = $q->upload('compar_sequences');
+ my $compar_pasted = $utils->param_bool($q, 'use_compar_pasted');
( $d{D_SEQ_DATA}, undef, $d{D_SEQ_COUNT}, $d{D_SEQ_MIN}, $d{D_SEQ_MAX},
$d{D_SEQ_AVG}, $d{D_SEQ_TOTAL}
- ) = $utils->get_sequence_data(scalar $q->param('discr_pasted_sequences'),
- $discr_seqs_fh, PASTE => $discr_pasted);
+ ) = $utils->get_sequence_data(scalar $q->param('compar_pasted_sequences'),
+ $compar_seqs_fh, PASTE => $compar_pasted);
# get the input sequences name
- my $discr_name = 'neg_sequences';
- $d{D_SEQ_ORIG_NAME} = ($pasted ? $name : fileparse($q->param('discr_sequences')));
- $d{D_SEQ_NAME} = get_safe_name($d{SEQ_ORIG_NAME}, $discr_name, 2);
+ my $compar_name = 'neg_sequences';
+ $d{D_SEQ_ORIG_NAME} = ($pasted ? $name : fileparse($q->param('compar_sequences')));
+ $d{D_SEQ_NAME} = get_safe_name($d{SEQ_ORIG_NAME}, $compar_name, 2);
if ($d{D_SEQ_NAME} eq $d{SEQ_NAME}) {
$d{SEQ_NAME} .= '1';
$d{D_SEQ_NAME} .= '2';
@@ -206,7 +204,6 @@
my ($data) = @_;
my @args = ();
push(@args, '--local') if ($data->{LOCAL});
- push(@args, '--discr') if ($data->{DISCR});
push(@args, '--score', $data->{MIN_SCORE}) if (defined($data->{MIN_SCORE}));
push(@args, '--optsc') if ($data->{OPT_SCORE});
push(@args, '--ethresh', $data->{ETHRESH}) if (defined($data->{ETHRESH}));
@@ -233,7 +230,7 @@
#fill in parameters
$template->param(description => $data->{DESCRIPTION});
$template->param(local => $data->{LOCAL});
- $template->param(discr => $data->{DISCR});
+ $template->param(compar => $data->{COMPAR});
$template->param(norc => $data->{STRANDS} eq 'given');
$template->param(flip => $data->{STRANDS} eq 'both_flip');
$template->param(min_score => $data->{MIN_SCORE});
@@ -251,7 +248,7 @@
$template->param(seq_avg => $data->{SEQ_AVG});
$template->param(seq_total => $data->{SEQ_TOTAL});
- # discriminative sequence information:
+ # comparative sequence information:
if (defined($data->{D_SEQ_NAME})) {
$template->param(d_seq_orig_name => $data->{D_SEQ_ORIG_NAME});
$template->param(d_seq_name => $data->{D_SEQ_NAME});
@@ -298,7 +295,7 @@
my @infilelist = ();
# Sequences file
push(@infilelist, InputFileType->new($data->{SEQ_NAME}, $data->{SEQ_DATA}));
- # Discriminative Sequences file
+ # Comparative Sequences file
push(@infilelist, InputFileType->new($data->{D_SEQ_NAME}, $data->{D_SEQ_DATA})) if $data->{D_SEQ_NAME};
# Uploaded database
push(@infilelist, InputFileType->new($data->{UPMOT_NAME}, $data->{UPMOT_DATA})) if $data->{UPMOT_NAME};
diff -r e77390759cae -r 61325860cd46 website/cgi-bin/centrimo.tmpl
--- a/website/cgi-bin/centrimo.tmpl Thu Nov 22 18:53:34 2012 +1100
+++ b/website/cgi-bin/centrimo.tmpl Tue Jan 22 18:14:22 2013 +1000
@@ -24,12 +24,14 @@