/* Last changed Time-stamp: <2009-06-18 14:20:02 ivo> */ /* Access to alifold Routines c Ivo L Hofacker Vienna RNA package */ #include #include #include #include #include #include #include "fold.h" #include "part_func.h" #include "fold_vars.h" #include "PS_dot.h" #include "utils.h" #include "pair_mat.h" #include "alifold.h" #include "aln_util.h" #include "MEA.h" extern void read_parameter_file(const char fname[]); extern float energy_of_circ_struct(const char *seq, const char *structure); /*@unused@*/ static const char rcsid[] = "$Id: RNAalifold.c,v 1.23 2009/02/24 14:21:26 ivo Exp $"; #define PRIVATE static static const char scale[] = "....,....1....,....2....,....3....,....4" "....,....5....,....6....,....7....,....8"; PRIVATE void /*@exits@*/ usage(void); PRIVATE char **annote(const char *structure, const char *AS[]); PRIVATE void print_pi(const pair_info pi, FILE *file); PRIVATE void print_aliout(char **AS, plist *pl, int n_seq, char * mfe, FILE *aliout); PRIVATE void mark_endgaps(char *seq, char egap); PRIVATE cpair *make_color_pinfo(char **sequences, plist *pl, int n_seq, bondT *mfe); /*--------------------------------------------------------------------------*/ #define MAX_NUM_NAMES 500 int main(int argc, char *argv[]) { char *string; char *structure=NULL, *cstruc=NULL; char ffname[20], gfname[20], fname[13]=""; char *ParamFile=NULL; char *ns_bases=NULL, *c; int n_seq, i, length, sym, r; int endgaps=0, mis=0; double min_en, real_en, sfact=1.07; int pf=0, noPS=0, istty; char *AS[MAX_NUM_NAMES]; /* aligned sequences */ char *names[MAX_NUM_NAMES]; /* sequence names */ FILE *clust_file = stdin; int circ=0; int doAlnPS=0; int doColor=0; int n_back=0; int eval_energy = 0; int doMEA=0; double MEAgamma = 1.; do_backtrack = 1; string=NULL; dangles=2; oldAliEn=0; for (i=1; i : base i is paired with a base j>i\n"); printf("matching brackets ( ): base i pairs base j\n"); } if (fold_constrained) { if (istty) printf("%s\n", scale); cstruc = get_line(stdin); } if (istty && (clust_file == stdin)) { printf("\nInput aligned sequences in clustalw format\n"); if (!fold_constrained) printf("%s\n", scale); } n_seq = read_clustal(clust_file, AS, names); if (endgaps) for (i=0; i2000) fprintf(stderr, "scaling factor %f\n", pf_scale); fflush(stdout); /* init_alipf_fold(length); */ if (cstruc!=NULL) strncpy(structure, cstruc, length+1); energy = (circ) ? alipf_circ_fold(AS, structure, &pl) : alipf_fold(AS, structure, &pl); if (n_back>0) { /*stochastic sampling*/ for (i=0; i0 && (seq[i]=='-'); i--) { seq[i] = egap; } } PRIVATE void print_pi(const pair_info pi, FILE *file) { const char *pname[8] = {"","CG","GC","GU","UG","AU","UA", "--"}; int i; /* numbering starts with 1 in output */ fprintf(file, "%5d %5d %2d %5.1f%% %7.3f", pi.i, pi.j, pi.bp[0], 100.*pi.p, pi.ent); for (i=1; i<=7; i++) if (pi.bp[i]) fprintf(file, " %s:%-4d", pname[i], pi.bp[i]); if (!pi.comp) fprintf(file, " +"); fprintf(file, "\n"); } #define MIN2(A, B) ((A) < (B) ? (A) : (B)) /*-------------------------------------------------------------------------*/ PRIVATE char **annote(const char *structure, const char *AS[]) { /* produce annotation for colored drawings from PS_rna_plot_a() */ char *ps, *colorps, **A; int i, n, s, pairings, maxl; short *ptable; char * colorMatrix[6][3] = { {"0.0 1", "0.0 0.6", "0.0 0.2"}, /* red */ {"0.16 1","0.16 0.6", "0.16 0.2"}, /* ochre */ {"0.32 1","0.32 0.6", "0.32 0.2"}, /* turquoise */ {"0.48 1","0.48 0.6", "0.48 0.2"}, /* green */ {"0.65 1","0.65 0.6", "0.65 0.2"}, /* blue */ {"0.81 1","0.81 0.6", "0.81 0.2"} /* violet */ }; n = strlen(AS[0]); maxl = 1024; A = (char **) space(sizeof(char *)*2); ps = (char *) space(maxl); colorps = (char *) space(maxl); ptable = make_pair_table(structure); for (i=1; i<=n; i++) { char pps[64], ci='\0', cj='\0'; int j, type, pfreq[8] = {0,0,0,0,0,0,0,0}, vi=0, vj=0; if ((j=ptable[i])0) { snprintf(pps, 64, "%d %d %s colorpair\n", i,j, colorMatrix[pairings-1][pfreq[0]]); strcat(colorps, pps); } if (pfreq[0]>0) { snprintf(pps, 64, "%d %d %d gmark\n", i, j, pfreq[0]); strcat(ps, pps); } if (vi>1) { snprintf(pps, 64, "%d cmark\n", i); strcat(ps, pps); } if (vj>1) { snprintf(pps, 64, "%d cmark\n", j); strcat(ps, pps); } } free(ptable); A[0]=colorps; A[1]=ps; return A; } /*-------------------------------------------------------------------------*/ #define PMIN 0.0008 PRIVATE int compare_pair_info(const void *pi1, const void *pi2) { pair_info *p1, *p2; int i, nc1, nc2; p1 = (pair_info *)pi1; p2 = (pair_info *)pi2; for (nc1=nc2=0, i=1; i<=6; i++) { if (p1->bp[i]>0) nc1++; if (p2->bp[i]>0) nc2++; } /* sort mostly by probability, add epsilon * comp_mutations/(non-compatible+1) to break ties */ return (p1->p + 0.01*nc1/(p1->bp[0]+1.)) < (p2->p + 0.01*nc2/(p2->bp[0]+1.)) ? 1 : -1; } PRIVATE void print_aliout(char **AS, plist *pl, int n_seq, char * mfe, FILE *aliout) { int i, j, k, n, num_p=0, max_p = 64; pair_info *pi; double *duck, p; short *ptable; for (n=0; pl[n].i>0; n++); max_p = 64; pi = space(max_p*sizeof(pair_info)); duck = (double *) space((strlen(mfe)+1)*sizeof(double)); ptable = make_pair_table(mfe); for (k=0; k=max_p) { max_p *= 2; pi = xrealloc(pi, max_p * sizeof(pair_info)); } } free(duck); pi[num_p].i=0; qsort(pi, num_p, sizeof(pair_info), compare_pair_info ); /* print it */ fprintf(aliout, "%d sequence; length of alignment %d\n", n_seq, (int) strlen(AS[0])); fprintf(aliout, "alifold output\n"); for (k=0; pi[k].i>0; k++) { pi[k].comp = (ptable[pi[k].i] == pi[k].j) ? 1:0; print_pi(pi[k], aliout); } fprintf(aliout, "%s\n", mfe); free(ptable); free(pi); } PRIVATE cpair *make_color_pinfo(char **sequences, plist *pl, int n_seq, bondT *mfe) { /* produce info for PS_color_dot_plot */ cpair *cp; int i, n,s, a, b,z,t,j, c; int pfreq[7]; for (n=0; pl[n].i>0; n++); c=0; cp = (cpair *) space(sizeof(cpair)*(n+1)); for (i=0; iPMIN) { cp[c].i = pl[i].i; cp[c].j = pl[i].j; cp[c].p = pl[i].p; for (z=0; z<7; z++) pfreq[z]=0; for (s=0; s0) { ncomp++; }} cp[c].hue = (ncomp-1.0)/6.2; /* hue<6/6.9 (hue=1 == hue=0) */ cp[c].sat = 1 - MIN2( 1.0, (float) (pfreq[0]*2./*pi[i].bp[0]*//(n_seq))); c++; } } for (t=1; t<=mfe[0].i; t++) { int nofound=1; for (j=0; j