/* Last changed Time-stamp: <2007-12-05 13:55:42 ronny> */ /* Ineractive Access to folding Routines c Ivo L Hofacker Vienna RNA package */ /** \file *** \brief RNAfold program source code *** *** This code provides an interface for MFE and Partition function folding *** of single linear or circular RNA molecules. **/ #include #include #include #include #include #include #include "fold.h" #include "part_func.h" #include "fold_vars.h" #include "PS_dot.h" #include "utils.h" #include "read_epars.h" #include "MEA.h" #include "RNAfold_cmdl.h" /*@unused@*/ static char UNUSED rcsid[] = "$Id: RNAfold.c,v 1.25 2009/02/24 14:22:21 ivo Exp $"; /*--------------------------------------------------------------------------*/ int main(int argc, char *argv[]){ struct RNAfold_args_info args_info; char *buf, *rec_sequence, *rec_id, **rec_rest, *structure=NULL, *cstruc=NULL; char fname[80], ffname[80], *ParamFile=NULL; char *ns_bases=NULL, *c; int i, length, l, cl, sym, r, istty, pf, noPS, noconv, fasta; unsigned int rec_type, read_opt; double energy, min_en, kT, sfact; int doMEA=0, circular; double MEAgamma = 1.; rec_type = read_opt = 0; rec_id = rec_sequence = NULL; rec_rest = NULL; do_backtrack = 1; pf = 0; sfact = 1.07; noPS = 0; noconv = 0; circular = 0; fasta = 0; cl = l = length = 0; dangles = 2; /* ############################################# # check the command line parameters ############################################# */ if(RNAfold_cmdline_parser (argc, argv, &args_info) != 0) exit(1); /* temperature */ if(args_info.temp_given) temperature = args_info.temp_arg; /* structure constraint */ if(args_info.constraint_given) fold_constrained=1; /* do not take special tetra loop energies into account */ if(args_info.noTetra_given) tetra_loop=0; /* set dangle model */ if(args_info.dangles_given) dangles = args_info.dangles_arg; /* do not allow weak pairs */ if(args_info.noLP_given) noLonelyPairs = 1; /* do not allow wobble pairs (GU) */ if(args_info.noGU_given) noGU = 1; /* do not allow weak closing pairs (AU,GU) */ if(args_info.noClosingGU_given) no_closingGU = 1; /* do not convert DNA nucleotide "T" to appropriate RNA "U" */ if(args_info.noconv_given) noconv = 1; /* set energy model */ if(args_info.energyModel_given) energy_set = args_info.energyModel_arg; /* take another energy parameter set */ if(args_info.paramFile_given) ParamFile = strdup(args_info.paramFile_arg); /* Allow other pairs in addition to the usual AU,GC,and GU pairs */ if(args_info.nsp_given) ns_bases = strdup(args_info.nsp_arg); /* set pf scaling factor */ if(args_info.pfScale_given) sfact = args_info.pfScale_arg; /* assume RNA sequence to be circular */ if(args_info.circ_given) circular=1; /* do not produce postscript output */ if(args_info.noPS_given) noPS=1; /* partition function settings */ if(args_info.partfunc_given){ pf = 1; if(args_info.partfunc_arg != -1) do_backtrack = args_info.partfunc_arg; } /* MEA (maximum expected accuracy) settings */ if(args_info.MEA_given){ pf = doMEA = 1; if(args_info.MEA_arg != -1) MEAgamma = args_info.MEA_arg; } /* free allocated memory of command line data structure */ RNAfold_cmdline_parser_free (&args_info); /* ############################################# # begin initializing ############################################# */ if (ParamFile != NULL) read_parameter_file(ParamFile); if (circular && noLonelyPairs) warn_user("depending on the origin of the circular sequence, some structures may be missed when using -noLP\nTry rotating your sequence a few times"); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } istty = isatty(fileno(stdout))&&isatty(fileno(stdin)); /* print user help if we get input from tty */ if(istty){ if(fold_constrained){ print_tty_constraint_full(); print_tty_input_seq_str("Input sequence (upper or lower case) followed by structure constraint"); } else print_tty_input_seq(); } /* set options we wanna pass to read_record */ if(istty) read_opt |= VRNA_INPUT_NOSKIP_BLANK_LINES; if(!fold_constrained) read_opt |= VRNA_INPUT_NO_REST; /* ############################################# # main loop: continue until end of file ############################################# */ while( !((rec_type = read_record(&rec_id, &rec_sequence, &rec_rest, read_opt)) & (VRNA_INPUT_ERROR | VRNA_INPUT_QUIT))){ /* ######################################################## # init everything according to the data we've read ######################################################## */ if(rec_id){ if(!istty) printf("%s\n", rec_id); (void) sscanf(rec_id, ">%42s", fname); } else fname[0] = '\0'; length = (int)strlen(rec_sequence); structure = (char *)space(sizeof(char) *(length+1)); /* parse the rest of the current dataset to obtain a structure constraint */ if(fold_constrained){ cstruc = NULL; unsigned int coptions = (rec_id) ? VRNA_CONSTRAINT_MULTILINE : 0; coptions |= VRNA_CONSTRAINT_ALL; getConstraint(&cstruc, (const char **)rec_rest, coptions); cl = (cstruc) ? (int)strlen(cstruc) : 0; if(cl == 0) warn_user("structure constraint is missing"); else if(cl < length) warn_user("structure constraint is shorter than sequence"); else if(cl > length) nrerror("structure constraint is too long"); if(cstruc) strncpy(structure, cstruc, sizeof(char)*(cl+1)); } if(noconv) str_RNA2RNA(rec_sequence); else str_DNA2RNA(rec_sequence); if(istty) printf("length = %d\n", length); /* ######################################################## # begin actual computations ######################################################## */ min_en = (circular) ? circfold(rec_sequence, structure) : fold(rec_sequence, structure); printf("%s\n%s", rec_sequence, structure); if (istty) printf("\n minimum free energy = %6.2f kcal/mol\n", min_en); else printf(" (%6.2f)\n", min_en); (void) fflush(stdout); if(fname[0] != '\0'){ strcpy(ffname, fname); strcat(ffname, "_ss.ps"); } else strcpy(ffname, "rna.ps"); if (!noPS) (void) PS_rna_plot(rec_sequence, structure, ffname); if (length>2000) free_arrays(); if (pf) { char *pf_struc; pf_struc = (char *) space((unsigned) length+1); if (dangles==1) { dangles=2; /* recompute with dangles as in pf_fold() */ min_en = (circular) ? energy_of_circ_structure(rec_sequence, structure, 0) : energy_of_structure(rec_sequence, structure, 0); dangles=1; } kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */ pf_scale = exp(-(sfact*min_en)/kT/length); if (length>2000) fprintf(stderr, "scaling factor %f\n", pf_scale); if (cstruc!=NULL) strncpy(pf_struc, cstruc, length+1); energy = (circular) ? pf_circ_fold(rec_sequence, pf_struc) : pf_fold(rec_sequence, pf_struc); if (do_backtrack) { printf("%s", pf_struc); if (!istty) printf(" [%6.2f]\n", energy); else printf("\n"); } if ((istty)||(!do_backtrack)) printf(" free energy of ensemble = %6.2f kcal/mol\n", energy); if (do_backtrack) { plist *pl1,*pl2; char *cent; double dist, cent_en; FLT_OR_DBL *probs = export_bppm(); assign_plist_from_pr(&pl1, probs, length, 1e-5); assign_plist_from_db(&pl2, structure, 0.95*0.95); /* cent = centroid(length, &dist); <- NOT THREADSAFE */ cent = get_centroid_struct_pr(length, &dist, pr); cent_en = (circular) ? energy_of_circ_structure(rec_sequence, cent, 0) :energy_of_structure(rec_sequence, cent, 0); printf("%s {%6.2f d=%.2f}\n", cent, cent_en, dist); free(cent); if (fname[0]!='\0') { strcpy(ffname, fname); strcat(ffname, "_dp.ps"); } else strcpy(ffname, "dot.ps"); (void) PS_dot_plot_list(rec_sequence, ffname, pl1, pl2, ""); free(pl2); if (do_backtrack==2) { pl2 = stackProb(1e-5); if (fname[0]!='\0') { strcpy(ffname, fname); strcat(ffname, "_dp2.ps"); } else strcpy(ffname, "dot2.ps"); PS_dot_plot_list(rec_sequence, ffname, pl1, pl2, "Probabilities for stacked pairs (i,j)(i+1,j-1)"); free(pl2); } free(pl1); free(pf_struc); if(doMEA){ float mea, mea_en; plist *pl; assign_plist_from_pr(&pl, pr, length, 1e-4/(1+MEAgamma)); mea = MEA(pl, structure, MEAgamma); mea_en = (circular) ? energy_of_circ_structure(rec_sequence, structure, 0) : energy_of_structure(rec_sequence, structure, 0); printf("%s {%6.2f MEA=%.2f}\n", structure, mea_en, mea); free(pl); } } printf(" frequency of mfe structure in ensemble %g; ", exp((energy-min_en)/kT)); if (do_backtrack) printf("ensemble diversity %-6.2f", mean_bp_distance(length)); printf("\n"); free_pf_arrays(); } (void) fflush(stdout); /* clean up */ if(cstruc) free(cstruc); if(rec_id) free(rec_id); free(rec_sequence); free(structure); /* free the rest of current dataset */ if(rec_rest){ for(i=0;rec_rest[i];i++) free(rec_rest[i]); free(rec_rest); } rec_id = rec_sequence = structure = cstruc = NULL; rec_rest = NULL; /* print user help for the next round if we get input from tty */ if(istty){ if(fold_constrained){ print_tty_constraint_full(); print_tty_input_seq_str("Input sequence (upper or lower case) followed by structure constraint"); } else print_tty_input_seq(); } } return EXIT_SUCCESS; }