/* Last changed Time-stamp: <2008-07-04 16:15:50 ulim> $Id: RNAup.c,v 1.5 2008/07/04 14:27:09 ivo Exp $ Ineractive Access to cofolding routines c Ivo L Hofacker Vienna RNA package */ /** *** \file RNAup.c **/ #include #include #include #include #include #include #include #include "fold.h" #include "fold_vars.h" #include "PS_dot.h" #include "utils.h" #include "part_func.h" #include "part_func_up.h" #include "duplex.h" #include "energy_const.h" #include "RNAup_cmdl.h" /*@unused@*/ static char rcsid[] = "$Id: RNAup.c,v 1.5 2008/07/04 14:27:09 ivo Exp $"; #define MIN(x,y) (((x)<(y)) ? (x) : (y)) #define MAX(x,y) (((x)>(y)) ? (x) : (y)) #define EQUAL(A,B) (fabs((A)-(B)) < 1000*DBL_EPSILON) #define ULENGTH_MAXIMUM 21 #define COMMANDLINE_PARAMETERS_INIT_LENGTH 1024 PRIVATE void tokenize(char *line, char **seq1, char **seq2); PRIVATE void seperate_bp(char **inter,int len1,char **intra_l,char **intra_s); PRIVATE void print_interaction(interact *Int, char *s1, char *s2, pu_contrib *p_c, pu_contrib *p_c2, int w, int incr3, int incr5); PRIVATE void print_unstru(pu_contrib *p_c, int w); PRIVATE int compare_unpaired_values(const void *p1, const void *p2); PRIVATE int move_useless_unpaired_values(const void *p1, const void *p2); PRIVATE void adjustUnpairedValues(int ***unpaired_values); /* this function sorts and cleans up the unpaired values given at command line */ PRIVATE void appendCmdlParameter(char **param_dest, const char *parameter, int *param_dest_length); /* defaults for -u and -w */ PRIVATE int default_u; /* -u options for plotting: plot pr_unpaired for 4 nucleotides */ PRIVATE double RT; /*--------------------------------------------------------------------------*/ int main(int argc, char *argv[]){ struct RNAup_args_info args_info; unsigned int input_type, up_mode; char temp_name[512], my_contrib[10], up_out[250], name[512]; char fname1[80], fname2[80], fname_target[80]; char *ParamFile, *ns_bases, *c, *structure; char *head, *input_string, *s1, *s2, *s3, *s_target, *cstruc1, *cstruc2, *cstruc_target, *cstruc_combined; char cmdl_tmp[2048], *cmdl_parameters; int cmdl_parameters_length; int i, j, length1, length2, l, length_target, sym, r, istty, rotated; double energy, min_en; double sfact=1.07; int noconv=0; int max_u = 0; int **unpaired_values; /* very new array that contains the different ulength values */ int ulength_num = 0; /* number of ulength values given on commandline */ /* variables for output */ pu_contrib *unstr_out, *unstr_short, *unstr_target, *contrib1, *contrib2; interact *inter_out; /* pu_out *longer; */ /* commandline parameters */ int w = 25; /* length of region of interaction */ int incr3 = 0; /* add x unpaired bases after 3'end of short RNA*/ int incr5 = 0; /* add x unpaired bases after 5'end of short RNA*/ int unstr; /* length of unpaired region for output*/ int longerSeqFirst = 1; /* rotate input seq. to ensure that first sequence is the longer one (RNA_UP_MODE_2) */ int header = 1; /* print header in output file */ int output = 1; /* create output file */ /* more default settings for RNAup */ up_mode = RNA_UP_MODE_1; /* default RNAup mode, single sequence unpaired probabilities */ my_contrib[0] = 'S'; my_contrib[1] = '\0'; default_u = 4; unstr=default_u; /* early initializing */ do_backtrack = 1; rotated = 0; input_string = s1 = s2 = s3 = s_target = cstruc1 = cstruc2 = cstruc_target = cstruc_combined = NULL; length1 = length2 = length_target = 0; inter_out = NULL; unstr_out = unstr_short = unstr_target = contrib1 = contrib2 = NULL; structure = ParamFile = ns_bases = head = NULL; fname_target[0] = '\0'; /* allocate init length for commandline parameter string */ cmdl_parameters_length = COMMANDLINE_PARAMETERS_INIT_LENGTH; cmdl_parameters = (char *)space(sizeof(char) * cmdl_parameters_length); sprintf(cmdl_parameters, "RNAup "); /* ############################################# # check the command line prameters ############################################# */ if(RNAup_cmdline_parser (argc, argv, &args_info) != 0) exit(1); /* do not create header */ if(args_info.no_header_given) header = 0; /* temperature */ if(args_info.temp_given){ temperature = args_info.temp_arg; /* collect parameter if header is needed */ if(header){ sprintf(cmdl_tmp, "-T %f ", temperature); appendCmdlParameter(&cmdl_parameters, cmdl_tmp, &cmdl_parameters_length); } } /* structure constraint */ if(args_info.constraint_given){ fold_constrained=1; /* collect parameter if header is needed */ if(header) appendCmdlParameter(&cmdl_parameters, "-C ", &cmdl_parameters_length); } /* do not take special tetra loop energies into account */ if(args_info.noTetra_given){ tetra_loop=0; if(header) appendCmdlParameter(&cmdl_parameters, "-4 ", &cmdl_parameters_length); } /* set dangle model */ if(args_info.dangles_given){ dangles = args_info.dangles_arg; if(header){ sprintf(cmdl_tmp, "-d %d ", dangles); appendCmdlParameter(&cmdl_parameters, cmdl_tmp, &cmdl_parameters_length); } } /* do not allow weak pairs */ if(args_info.noLP_given){ noLonelyPairs = 1; if(header) appendCmdlParameter(&cmdl_parameters, "--noLP ", &cmdl_parameters_length); } /* do not allow wobble pairs (GU) */ if(args_info.noGU_given){ noGU = 1; if(header) appendCmdlParameter(&cmdl_parameters, "--noGU ", &cmdl_parameters_length); } /* do not allow weak closing pairs (AU,GU) */ if(args_info.noClosingGU_given){ no_closingGU = 1; if(header) appendCmdlParameter(&cmdl_parameters, "--noClosingGU ", &cmdl_parameters_length); } /* do not convert DNA nucleotide "T" to appropriate RNA "U" */ if(args_info.noconv_given){ noconv = 1; if(header) appendCmdlParameter(&cmdl_parameters, "--noconv ", &cmdl_parameters_length); } /* set energy model */ if(args_info.energyModel_given){ energy_set = args_info.energyModel_arg; if(header){ sprintf(cmdl_tmp, "-e %d ", energy_set); appendCmdlParameter(&cmdl_parameters, cmdl_tmp, &cmdl_parameters_length); } } /* take another energy parameter set */ if(args_info.paramFile_given){ ParamFile = strdup(args_info.paramFile_arg); if(header){ sprintf(cmdl_tmp, "-P %s ", ParamFile); appendCmdlParameter(&cmdl_parameters, cmdl_tmp, &cmdl_parameters_length); } } /* Allow other pairs in addition to the usual AU,GC,and GU pairs */ if(args_info.nsp_given){ ns_bases = strdup(args_info.nsp_arg); if(header){ sprintf(cmdl_tmp, "--nsp %s ", ns_bases); appendCmdlParameter(&cmdl_parameters, cmdl_tmp, &cmdl_parameters_length); } } /* set pf scaling factor */ if(args_info.pfScale_given){ sfact = args_info.pfScale_arg; if(header){ sprintf(cmdl_tmp, "-S %f ", sfact); appendCmdlParameter(&cmdl_parameters, cmdl_tmp, &cmdl_parameters_length); } } /* set the maximal length of interaction region */ if(args_info.window_given){ w = args_info.window_arg; if(header){ sprintf(cmdl_tmp, "-w %d ", w); appendCmdlParameter(&cmdl_parameters, cmdl_tmp, &cmdl_parameters_length); } } /* do not make an output file */ if(args_info.no_output_file_given){ output = 0; } /* set mode to unpaired regions in both RNAs */ if(args_info.include_both_given){ up_mode = RNA_UP_MODE_3; if(header) appendCmdlParameter(&cmdl_parameters, "--include_both ", &cmdl_parameters_length); } /* set interaction mode 1 (pairwise interaction) */ if(args_info.interaction_pairwise_given){ up_mode = RNA_UP_MODE_2; if(header) appendCmdlParameter(&cmdl_parameters, "--interaction_pairwise ", &cmdl_parameters_length); } /* set interaction mode 2 (first sequence interacts with all others) */ if(args_info.interaction_first_given){ up_mode = RNA_UP_MODE_3; if(header) appendCmdlParameter(&cmdl_parameters, "--interaction_first ", &cmdl_parameters_length); } /* extend unpaired region 5' */ if(args_info.extend5_given){ incr5 = args_info.extend5_arg; if(header){ sprintf(cmdl_tmp, "-5 %d ", incr5); appendCmdlParameter(&cmdl_parameters, cmdl_tmp, &cmdl_parameters_length); } } /* extend unpaired region 3' */ if(args_info.extend3_given){ incr3 = args_info.extend3_arg; if(header){ sprintf(cmdl_tmp, "-3 %d ", incr3); appendCmdlParameter(&cmdl_parameters, cmdl_tmp, &cmdl_parameters_length); } } /* set contribution output */ if(args_info.contributions_given){ strncpy(my_contrib, args_info.contributions_arg, 10); if(header){ sprintf(cmdl_tmp, "-c %s ", my_contrib); appendCmdlParameter(&cmdl_parameters, cmdl_tmp, &cmdl_parameters_length); } } /* set length(s) of unpaired (unstructured) region(s) */ int min, max, tmp; i = (args_info.ulength_given == 0) ? 1 : args_info.ulength_given; /* here's the very new way of treating multiple ulength values/ranges */ unpaired_values = (int **)space(sizeof(int *) * (i + 1)); if(header && args_info.ulength_given) appendCmdlParameter(&cmdl_parameters, "-u ", &cmdl_parameters_length); for(i = 0; i < args_info.ulength_given; i++){ unpaired_values[++ulength_num] = (int *)space(2 * sizeof(int)); /* we got a ulength range... */ if(sscanf(args_info.ulength_arg[i], "%d-%d", &min, &max) == 2){ if(min > max){ tmp = min; min = max; max = tmp;} if(min == max){ unpaired_values[ulength_num][0] = min; unpaired_values[ulength_num][1] = -1; } else{ unpaired_values[ulength_num][0] = min; unpaired_values[ulength_num][1] = max; } max_u = MAX2(max_u, max); } else if(sscanf(args_info.ulength_arg[i], "%d", &max) == 1){ unpaired_values[ulength_num][0] = max; unpaired_values[ulength_num][1] = -1; max_u = MAX2(max_u, max); } if(header){ if(i < args_info.ulength_given - 1) sprintf(cmdl_tmp, "%s,", args_info.ulength_arg[i]); else sprintf(cmdl_tmp, "%s ", args_info.ulength_arg[i]); appendCmdlParameter(&cmdl_parameters, cmdl_tmp, &cmdl_parameters_length); } } if(i == 0){ /* use default settings */ unpaired_values[++ulength_num] = (int *)space(2 * sizeof(int)); unpaired_values[ulength_num][0] = 4; unpaired_values[ulength_num][1] = -1; max_u = 4; } /* store number of entries at position [0][0] */ unpaired_values[0] = (int *)space(2 * sizeof(int)); unpaired_values[0][0] = ulength_num; unpaired_values[0][1] = -1; /* adjust ranges and values such that everything is non-redundant WARNING: after this step ulength_num may not reflect actual number of ulength values anymore */ adjustUnpairedValues(&unpaired_values); /* free allocated memory of command line data structure */ RNAup_cmdline_parser_free (&args_info); /* ############################################# # begin initializing ############################################# */ if (dangles!=0) dangles = 2; /* only 0 or 2 allowed */ if (ParamFile != NULL) read_parameter_file(ParamFile); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } istty = isatty(fileno(stdout))&&isatty(fileno(stdin)); if ((fold_constrained)&&(istty)) { print_tty_constraint(VRNA_CONSTRAINT_DOT | VRNA_CONSTRAINT_X | VRNA_CONSTRAINT_RND_BRACK); printf("constraints for intramolecular folding only:\n"); print_tty_constraint(VRNA_CONSTRAINT_NO_HEADER | VRNA_CONSTRAINT_ANG_BRACK); printf("constraints for cofolding (intermolecular folding) only:\n"); print_tty_constraint(VRNA_CONSTRAINT_NO_HEADER | VRNA_CONSTRAINT_PIPE); } RT = ((temperature+K0)*GASCONST/1000.0); /* ############################################# # main loop: continue until end of file ############################################# */ do{ rotated = 0; cut_point = -1; fname1[0] = '\0'; fname2[0] = '\0'; /* ######################################################## # handle user input from 'stdin' ######################################################## */ if (istty){ switch(up_mode){ case RNA_UP_MODE_1: /* just calculate the probability of beeing unpaired for the given interval(s) */ print_tty_input_seq(); break; case RNA_UP_MODE_2: /* pairwise interaction mode, former -Xp mode */ print_tty_input_seq_str("Use either '&' to connect the 2 sequences or give each sequence on an extra line."); break; case RNA_UP_MODE_3: /* consecutive multi interaction mode ;) first sequence pairs with all following, former -Xf mode */ /* either we wait for the first two sequences */ if(s_target == NULL) print_tty_input_seq_str("Give each sequence on an extra line. " "The first seq. is stored, every other seq. is compared to the first one."); /* or we already have them and wait for the next sequence */ else print_tty_input_seq_str("Enter another sequence."); break; default: nrerror("This should never happen (again)"); break; } } /* extract filename from fasta header if available */ while((input_type = get_input_line(&input_string, 0)) & VRNA_INPUT_FASTA_HEADER){ (void) sscanf(input_string, "%51s", fname1); printf(">%s\n", input_string); /* print fasta header if available */ free(input_string); } /* break on any error, EOF or quit request */ if(input_type & (VRNA_INPUT_QUIT | VRNA_INPUT_ERROR)){ break;} /* else assume a proper sequence of letters of a certain alphabet (RNA, DNA, etc.) */ else tokenize(input_string, &s1, &s2); /* this also frees the input_string */ length1 = (int)strlen(s1); length2 = (s2) ? (int)strlen(s2) : 0; /* now we got the first and maybe a second sequence */ /* check if we have to change the mode we are operating in */ if((cut_point != -1) && (up_mode & RNA_UP_MODE_1)){ warn_user("Two concatenated sequences given, switching to pairwise interaction mode!"); up_mode = RNA_UP_MODE_2; } int read_again = 0; switch(up_mode){ case RNA_UP_MODE_2: if(cut_point == -1) read_again = 1; break; case RNA_UP_MODE_3: if(cut_point == -1){ if(s_target == NULL) read_again = 1; } else if(s_target != NULL) nrerror( "After the first sequence (pair): Input a single sequence (no &)!\n" "Each input seq. is compared to the very first seq. given.\n" ); break; default: break; } if(read_again){ /* we are in this block only if we just have 1 sequence yet but need a second, too */ /* extract filename from fasta header if available */ while((input_type = get_input_line(&input_string, 0)) & VRNA_INPUT_FASTA_HEADER){ (void) sscanf(input_string, "%51s", fname2); printf(">%s\n", input_string); /* print fasta header if available */ free(input_string); } /* break on any error, EOF or quit request */ if(input_type & (VRNA_INPUT_QUIT | VRNA_INPUT_ERROR)){ break;} /* else assume a proper sequence of letters of a certain alphabet (RNA, DNA, etc.) */ else tokenize(input_string, &s2, &s3); /* this also frees the input_string */ if(cut_point != -1) nrerror("Don't confuse me by mixing concatenated (&) with single sequences! Go, have some sleep and then check your input again..."); length2 = (int)strlen(s2); } if(noconv){ str_RNA2RNA(s1); str_RNA2RNA(s2); } else{ str_DNA2RNA(s1); str_DNA2RNA(s2); } /** read structure constraint(s) if necessary */ if (fold_constrained) { char *cstruc_tmp = NULL; int old_cut; input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS); if(input_type & VRNA_INPUT_QUIT){ break;} else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)){ old_cut = cut_point; cut_point = -1; tokenize(input_string, &cstruc1, &cstruc2); } else nrerror("constraints missing"); /* now that we've got the constraining structure(s) check if the input was valid */ if(old_cut != cut_point){ nrerror("RNAup -C: mixed single/dual sequence or constraint strings or different cut points"); } read_again = 0; if(cut_point == -1){ if(up_mode & RNA_UP_MODE_2) read_again = 1; else if((up_mode & RNA_UP_MODE_3) && (s_target == NULL)) read_again = 1; } if(read_again){ input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS); if(input_type & VRNA_INPUT_QUIT){ break;} else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)){ tokenize(input_string, &cstruc2, &cstruc_tmp); } else nrerror("constraints missing"); if(cut_point != -1) nrerror("Don't confuse me by mixing concatenated (&) with single sequences! Go, have some sleep and then check your input again..."); } /* check length(s) of input sequence(s) and constraint(s) */ if(strlen(cstruc1) != length1){ fprintf(stderr, "%s\n%s\n", s1, cstruc1); nrerror("RNAup -C: constraint string and structure have unequal length"); } if(s2 != NULL){ if(strlen(cstruc2) != length2){ fprintf(stderr, "%s\n%s\n", s2, cstruc2); nrerror("RNAup -C: constraint string and structure have unequal length"); } } } /* thats all for constraint folding */ /* rotate input sequences if upmode>=2 to ensure first sequence is the longer one */ if(up_mode & (RNA_UP_MODE_2 | RNA_UP_MODE_3)){ if(length1 < length2 ){ rotated = 1; /* rotate the sequences such that the longer is the first */ int l = length2; length2 = length1; length1 = l; char *s = s2; s2 = s1; s1 = s; /* also rotate the file names */ char f[80]; strncpy(f, fname2, 51); strncpy(fname2, fname1, 51); strncpy(fname1, f, 51); /* rotate constraint strings as well */ if(fold_constrained){ s = cstruc2; cstruc2 = cstruc1; cstruc1 = s; } } } /* check ulength values against sequences given */ if(max_u > length1) nrerror("maximum unpaired region exceeds sequence length"); if(up_mode & RNA_UP_MODE_3){ /* if we haven't seen the target yet, store it now */ if(s_target == NULL){ if(rotated){ s_target = s2; s2 = NULL; length_target = length2; strcpy(fname_target, fname2); if(fold_constrained){ cstruc_target = cstruc2; cstruc2 = NULL; } } else{ s_target = s1; length_target = length1; s1 = s2; s2 = NULL; length1 = length2; strcpy(fname_target, fname1); strcpy(fname1, fname2); if(fold_constrained){ cstruc_target = cstruc1; cstruc1 = cstruc2; cstruc2 = NULL; } } fname2[0] = '\0'; } } /* ######################################################## # done with 'stdin' handling ######################################################## */ /* compose file names */ /* first file name */ if (fname1[0] != '\0'){ strcpy(up_out, fname1); if(fname2[0] != '\0'){ strcat(up_out, "_"); strcat(up_out, fname2); } else if(fname_target != '\0'){ strcat(up_out, "_"); strcat(up_out, fname_target); } } else { strcpy(up_out, "RNA"); } /* now, up_out has a maximal length of 104, it should be safe to concatenate more chars */ if(!(up_mode & RNA_UP_MODE_1)){ sprintf(temp_name,"_w%d",w); strncat(up_out, temp_name, 10); } structure = (char *) space(sizeof(char) * (MAX2(length_target, MAX2(length1, length2)) + 1)); /* begin actual computations */ update_fold_params(); /* calc mfe of first sequence */ if (cstruc1 != NULL) strncpy(structure, cstruc1, length1+1); min_en = fold(s1, structure); (void) fflush(stdout); /* calc probability to be unstructured for 1st sequence (in upmode=3 this is not the target!) */ int wplus = w; if(!(up_mode & RNA_UP_MODE_3)){ wplus += incr3 + incr5; /* reset window size if maximum unstructured region is exceeds it */ if(max_u > wplus) wplus = max_u; } /* reset window size if sequence length is shorter */ if(length1 < wplus) wplus = length1; /* calc mfe for first sequence (2nd if upmode = 3) */ if(cstruc1 != NULL) strncpy(structure, cstruc1, length1+1); min_en = fold(s1, structure); pf_scale = exp(-(sfact*min_en)/RT/length1); if (length1>2000) fprintf(stderr, "scaling factor %f\n", pf_scale); if (cstruc1 != NULL) strncpy(structure, cstruc1, length1+1); energy = pf_fold(s1, structure); unstr_out = pf_unstru(s1, wplus); free_pf_arrays(); if(fold_constrained && !(up_mode & RNA_UP_MODE_1)){ cstruc_combined = (char *)space(sizeof(char) * (length1 + length2 + 1)); strncpy(cstruc_combined, cstruc1, length1+1); strcat(cstruc_combined, cstruc2); } contrib1 = contrib2 = NULL; inter_out = NULL; switch(up_mode){ case RNA_UP_MODE_1: for (i = 1; i <= unpaired_values[0][0]; i++) { j = unpaired_values[i][0]; do print_unstru(unstr_out, j); while(++j <= unpaired_values[i][1]); } if(output && header){ head = (char *)space(sizeof(char)*(length1 + strlen(cmdl_parameters) + 512)); sprintf(head, "# %s\n# %d %s\n# %s", cmdl_parameters, length1, fname1, s1); } contrib1 = unstr_out; break; case RNA_UP_MODE_2: inter_out = pf_interact(s1, s2, unstr_out, NULL, w, cstruc_combined, incr3, incr5); print_interaction(inter_out, s1, s2, unstr_out, NULL, w, incr3, incr5); if(output && header){ head = (char *)space(sizeof(char)*(length1 + length2 + strlen(cmdl_parameters) + 1024)); sprintf(head, "# %s\n# %d %s\n# %s\n# %d %s\n# %s", cmdl_parameters, length1, fname1, s1, length2, fname2, s2); } contrib1 = unstr_out; break; case RNA_UP_MODE_3: /* calculate prob. unstruct. for target seq */ if(unstr_target == NULL){ wplus = w + incr3 + incr5; if(max_u > wplus) wplus = max_u; if(length_target < wplus) wplus = length_target; if (cstruc_target != NULL) strncpy(structure, cstruc_target, length_target + 1); min_en = fold(s_target, structure); pf_scale = exp(-(sfact*min_en)/RT/length_target); if (length_target>2000) fprintf(stderr, "scaling factor %f\n", pf_scale); if (cstruc_target != NULL) strncpy(structure, cstruc_target, length_target + 1); energy = pf_fold(s_target, structure); unstr_target = pf_unstru(s_target, wplus); free_pf_arrays(); /* for arrays for pf_fold(...) */ } /* check if target sequence is actually longer than query, if not rotate both sequences */ if(length_target < length1){ inter_out = pf_interact(s1, s_target, unstr_out, unstr_target, w, cstruc_combined, incr3, incr5); print_interaction(inter_out, s1, s_target, unstr_out, unstr_target, w, incr3, incr5); contrib1 = unstr_out; contrib2 = unstr_target; } else{ inter_out = pf_interact(s_target, s1, unstr_target, unstr_out, w, cstruc_combined, incr3, incr5); print_interaction(inter_out, s_target, s1, unstr_target, unstr_out, w, incr3, incr5); contrib1 = unstr_target; contrib2 = unstr_out; } if(output && header){ head = (char *)space(sizeof(char)*(length_target + length1 + strlen(cmdl_parameters) + 1024)); sprintf(head, "# %s\n# %d %s\n# %s\n# %d %s\n# %s", cmdl_parameters, length_target, fname_target, s_target, length1, fname1, s1); } break; } /* create additional output */ if(output){ /* how to best compose a reasonable filename */ printf("RNAup output in file: "); strcpy(name, up_out); strcat(name, "_u"); /* since we do not limit the amount of ulength values anymore we just put the maximum length into the filename, the actual printed lengths should be somewhere in the output itself */ sprintf(temp_name, "%d.out", unpaired_values[0][0]); strcat(name, temp_name); printf("%s\n",name); Up_plot(contrib1, contrib2, inter_out, name, unpaired_values, my_contrib, head, up_mode); } /* ######################################################## # clean up ######################################################## */ /* we can save the pu contribution structure of the target sequence for the next run */ if(unstr_target != NULL){ if(length_target < length1) free_pu_contrib_struct(contrib1); else free_pu_contrib_struct(contrib2); } else{ if(contrib1 != NULL) free_pu_contrib_struct(contrib1); if(contrib2 != NULL) free_pu_contrib_struct(contrib2); } if(inter_out != NULL) free_interact(inter_out); /* free all unnecessary character arrays */ if(structure != NULL) free(structure); if(s1 != NULL) free(s1); if(s2 != NULL) free(s2); if(cstruc1 != NULL) free(cstruc1); if(cstruc2 != NULL) free(cstruc2); if(head != NULL) free(head); if(cstruc_combined != NULL) free(cstruc_combined); structure = s1 = s2 = cstruc1 = cstruc2 = head = cstruc_combined = NULL; free_arrays(); /* for arrays for fold(...) */ } while (1); free(cmdl_parameters); return 0; } PRIVATE int compare_unpaired_values(const void *p1, const void *p2){ if((*(int **)p1)[0] > (*(int **)p2)[0]) return 1; if((*(int **)p1)[0] < (*(int **)p2)[0]) return -1; return 0; } PRIVATE int move_useless_unpaired_values(const void *p1, const void *p2){ if((*(int **)p1)[1] < (*(int **)p2)[1]) return 1; if((*(int **)p1)[0] > (*(int **)p2)[0]) return -1; return 0; } PRIVATE void adjustUnpairedValues(int ***unpaired_values){ int i, last_max, real_count; if(*unpaired_values == NULL) return; if((*unpaired_values)[0][0] <= 0) return; /* sort the ranges array */ qsort(&((*unpaired_values)[1]), (*unpaired_values)[0][0], sizeof(int **), compare_unpaired_values); last_max = (*unpaired_values)[1][1] != -1 ? (*unpaired_values)[1][1] : (*unpaired_values)[1][0]; real_count = 1; for(i = 2; i <= (*unpaired_values)[0][0]; i++){ if((*unpaired_values)[i][1] == -1){ /* we just have a single value */ if((*unpaired_values)[i][0] <= last_max) (*unpaired_values)[i][1] = -2; /* mark this entry to be removed */ else{ last_max = (*unpaired_values)[i][0]; real_count++; } } else { /* we have a range of values */ if(((*unpaired_values)[i][0] <= last_max) && ((*unpaired_values)[i][1] <= last_max)) (*unpaired_values)[i][1] = -2; /* mark this entry to be removed as the whole range is already covered */ else if(((*unpaired_values)[i][0] <= last_max) && ((*unpaired_values)[i][1] > last_max)){ (*unpaired_values)[i][0] = last_max + 1; last_max = last_max + 1; if((*unpaired_values)[i][1] == last_max) (*unpaired_values)[i][1] = -1; /* range reduced to single value */ else last_max = (*unpaired_values)[i][1]; /* maximum of range */ real_count++; } else{ last_max = (*unpaired_values)[i][1]; real_count++; } } } /* sort entries again to get rid of useless ones */ qsort(&((*unpaired_values)[1]), (*unpaired_values)[0][0], sizeof(int **), move_useless_unpaired_values); /* free memory we dont need anymore */ for(i = real_count+1; i <= (*unpaired_values)[0][0]; i++) free((*unpaired_values)[i]); (*unpaired_values) = (int **)realloc((*unpaired_values), (real_count + 1) * sizeof(int **)); (*unpaired_values)[0][0] = real_count; /* sort the array again */ qsort(&((*unpaired_values)[1]), (*unpaired_values)[0][0], sizeof(int **), compare_unpaired_values); } /** *** Append a parameter char string to a larger char string savely *** *** \param param_dest A pointer to the char array where the new parameter will be concatenated to *** \param parameter The new parameter to be concatenated *** \param param_dest_length A pointer to the size of the already allocated space of param_dest **/ PRIVATE void appendCmdlParameter(char **param_dest, const char *parameter, int *param_dest_length){ int l = strlen(*param_dest) + strlen(parameter); if(l + 1 > *param_dest_length){ /* alloc more space */ *param_dest_length = (int)(1.2 * l); *param_dest = xrealloc(*param_dest, *param_dest_length * sizeof(char)); } strcat(*param_dest, parameter); } /* call: tokenize(line,&seq1,&seq2); the sequence string is split at the "&" and the first seq is written in seq1, the second into seq2 */ /* using sscanf instead of strcpy get's rid of trainling junk on the input line */ void tokenize(char *line, char **seq1, char **seq2) { char *pos; int cut = -1; int i; pos = strchr(line, '&'); if (pos) { cut = (int) (pos-line)+1; (*seq1) = (char *) space((cut+1)*sizeof(char)); (*seq2) = (char *) space(((strlen(line)-cut)+2)*sizeof(char)); if (strchr(pos+1, '&')) nrerror("more than one cut-point in input"); *pos = '\0'; (void) sscanf(line, "%s", *seq1); (void) sscanf(pos+1, "%s", *seq2); } else { (*seq1) = (char *) space((strlen(line)+1)*sizeof(char)); (*seq2) = NULL; sscanf(line, "%s", *seq1); } if (cut > -1) { if (cut_point==-1) cut_point = cut; else if (cut_point != cut) { fprintf(stderr,"cut_point = %d cut = %d\n", cut_point, cut); nrerror("Sequence and Structure have different cut points."); } } free(line); return; } /* divide the constraints string in intermolecular constrains (inter) and intramolecular constrains within both sequences */ /* len1 is the length of the LONGER input seq ! */ void seperate_bp(char **inter, int len1, char **intra_l, char **intra_s) { int i,j,len; short *pt=NULL; char *temp_inter, *pt_inter; len=strlen((*inter)); /* printf("inter\n%s\n",(*inter)); */ i = len+1; temp_inter=(char*)space(sizeof(char)*i); /* to make a pair_table convert <|> to (|) */ pt_inter=(char*)space(sizeof(char)*i); /* if shorter seq is first seq in constrained string, write the longer one as the first one */ temp_inter[strlen((*inter))] = '\0'; pt_inter[strlen((*inter))] = '\0'; if (cut_point < len1) { /* write the constrain for the longer seq first */ for (j=0,i=cut_point-1;i=cut_point){ temp_inter[i-1] = (*inter)[i-1]; (*intra_l)[i-1] = '.'; (*intra_s)[pt[i]-cut_point] = '.'; } else { /* intramolekular bp */ (*intra_l)[i-1] = (*inter)[i-1]; temp_inter[i-1] = '.'; } } else { /* i>=cut_point */ /* intermolekular bp */ if (pt[i] < cut_point){ temp_inter[i-1] = (*inter)[i-1]; /* (*intra_s)[i-1] = '.'; */ } else { /* intramolekular bp */ (*intra_s)[i-cut_point] = (*inter)[i-1]; temp_inter[i-1] = '.'; } } } } /* printf("%s -1\n%s -2\n%s -3\n%s -4\n",(*inter),temp_inter,(*intra_l),(*intra_s)); */ strcpy((*inter),temp_inter); free(temp_inter); free(pt_inter); free(pt); } PRIVATE void print_interaction(interact *Int, char *s1, char *s2, pu_contrib *p_c, pu_contrib *p_c2, int w, int incr3, int incr5) { char *i_long,*i_short; int i,len, l_l, l_s, len1, end5, end3, i_min, j_min, l1, add_a, add_b,nix_up; double p_c_S; double G_min,Gi_min,Gul, G_sum, Gus, diff; duplexT mfe; char *struc; G_min = Int->Gikjl; Gi_min = Int->Gikjl_wo; len1 = Int->length; len=strlen(s1)+strlen(s2); /* use duplexfold() to fold the interaction site */ l_l = (Int->i-Int->k+1); i_long = (char*)space(sizeof(char)*(l_l+1)); l_s = (Int->l-Int->j+1); i_short = (char*)space(sizeof(char)*(l_s+1)); strncpy(i_long,&s1[Int->k-1],l_l); i_long[l_l] = '\0'; strncpy(i_short,&s2[Int->j-1],l_s); i_short[l_s] = '\0'; mfe = duplexfold(i_long,i_short); i_min = mfe.i; j_min = mfe.j ; l1 = strchr(mfe.structure, '&')-mfe.structure; /* printf("%s %3d,%-3d : %3d,%-3d (%5.2f)\n", mfe.structure, i_min+1-l1, i_min, j_min, j_min+strlen(mfe.structure)-l1-2, mfe.energy ); */ /* structure by duplexfold is shorter than structure by RNAup:*/ add_a = add_b = 0; /* length difference in longer / shorter sequence*/ nix_up = 0; if(((i_min+1-l1) - i_min) != ( Int->k - Int->i)) { add_a = Int->i - Int->k + 2; } if(((j_min+strlen(mfe.structure)-l1-2) - j_min) != (Int->l - Int->j)) { add_b = Int->l - Int->j +2; } /* printf("add_a %d add_b %d\n",add_a,add_b); */ if( add_a || add_b ) { nix_up = 1; if(add_a && add_b == 0) add_b = Int->l - Int->j + 2; if(add_a == 0 && add_b) add_a = Int->i - Int->k + 2; struc = (char*)space(sizeof(char)*(add_a+add_b+3)); for(i=0;i<(add_a+add_b-1);i++) { if(i != l_l) struc[i] = '.'; if(i == l_l) struc[i] = '&'; } struc[i] = '\0'; } else { l1=strlen(mfe.structure); struc = (char*)space(sizeof(char)*(l1+1)); strcpy(struc,mfe.structure); } end5 = MAX(1,Int->k-incr5); end3 = MIN(MIN(l_l-1+incr3,w+incr3+incr5),len1); p_c_S = p_c->H[end5][end3]+p_c->I[end5][end3]+p_c->M[end5][end3]+p_c->E[end5][end3]; Gul = -RT*log(p_c_S); if (p_c2 == NULL) { G_sum = Gi_min + Gul; /* printf("dG = dGint + dGu_l\n"); */ printf("%s %3d,%-3d : %3d,%-3d (%.2f = %.2f + %.2f)\n", struc, Int->k, Int->i, Int->j, Int->l, G_min, Gi_min, Gul); printf("%s&%s\n",i_long,i_short); } else { p_c_S = p_c2->H[Int->j][(Int->l)-(Int->j)]+ p_c2->I[Int->j][(Int->l)-(Int->j)]+ p_c2->M[Int->j][(Int->l)-(Int->j)]+ p_c2->E[Int->j][(Int->l)-(Int->j)]; Gus = -RT*log(p_c_S); G_sum = Gi_min + Gul +Gus; /* printf("dG = dGint + dGu_l + dGu_s\n"); */ printf("%s %3d,%-3d : %3d,%-3d (%.2f = %.2f + %.2f + %.2f)\n", struc, Int->k, Int->i, Int->j, Int->l, G_min, Gi_min, Gul, Gus); printf("%s&%s\n",i_long,i_short); } if (!EQUAL(G_min,G_sum)) { printf("ERROR\n"); diff = fabs((G_min)-(G_sum)); printf("diff %.18f\n",diff); } if(nix_up) fprintf(stderr,"RNAduplex structure doesn't match any structure of RNAup structure ensemble\n"); free(i_long); free(i_short); free(mfe.structure); free(struc); } /* print coordinates and free energy for the region of highest accessibility */ PRIVATE void print_unstru(pu_contrib *p_c, int w) { int i,j,len,min_i,min_j; double dG_u, min_gu; if (p_c != NULL) { min_gu = 1000.0; len = p_c->length; for (i=1; i<=len; i++) { for (j=i; j < MIN((i+w),len+1); j++) { double blubb; if ((j-i+1) == w && i+w-1 <= len) { blubb = p_c->H[i][j-i]+p_c->I[i][j-i]+p_c->M[i][j-i]+p_c->E[i][j-i]; dG_u = -RT*log(blubb); if (dG_u < min_gu ) { min_gu = dG_u; min_i=i; min_j=i+w-1; } } } } printf("%4d,%4d \t (%.3f) \t for u=%3d\n",min_i,min_j,min_gu,w); } else { nrerror("error with prob unpaired"); } }