#include #include #include #include #include #include #include #include #ifndef WIN32 #include "config.h" #endif #ifdef HAVE_LIBXMLPLUSPLUS #ifdef HAVE_LIBXML2 #include #endif #endif #include "debug.h" #include "misc.h" #include "rnafuncs.h" #include "utils.h" #include "rna_profile_alignment.h" #ifndef HAVE_LIBRNA #undef HAVE_LIBG2 #endif #include #ifdef HAVE_LIBG2 #include #include #include #ifdef HAVE_LIBGD #include #endif #endif #define COLOR_RED 0 #define COLOR_GREEN 1 #define COLOR_BLUE 2 #define COLOR_YELLOW 3 #define COLOR_MAGENTA 4 #define COLOR_TURKIS 5 #define NUM_COLORS 6 #define COLOR_DEF_RED 1,0,0 #define COLOR_DEF_GREEN 0,1,0 #define COLOR_DEF_BLUE 0,0,1 #define COLOR_DEF_YELLOW 1,1,0 #define COLOR_DEF_MAGENTA 0,1,1 #define COLOR_DEF_TURKIS 1,0,1 bool RNAFuncs::isRNAString(const string &str) { string::const_iterator it; for(it=str.begin(); it!=str.end(); it++) { switch(tolower(*it)) { case 'a': case 'c': case 'g': case 'u': case 't': case '.': case '-': break; default: return false; } } return true; } bool RNAFuncs::isViennaString(const string &str, Ulong &basePairCount, Ulong &maxDepth) { long brackets=0; maxDepth=0; basePairCount=0; string::const_iterator it; for(it=str.begin(); it!=str.end(); it++) { switch(*it) { case '.': break; case '(': brackets++; basePairCount++; if(brackets>0) maxDepth=max(static_cast(maxDepth),static_cast(brackets)); break; case ')': if(brackets) { brackets--; break; } default: return false; } } // equal nr of opening and closing brackets ? if(brackets) return false; return true; } #ifdef HAVE_LIBG2 // This features require the g2 library void RNAFuncs::drawRNAStructure(const string &seq, const string &structure, const string &filename_prefix, const string &structname, const list > ®ions, const SquigglePlotOptions &options) { const double base_fontsize=12; float *X, *Y,min_X=0,max_X=0,min_Y=0,max_Y=0; Uint i; short *pair_table; int id_PS,id_FIG=0,id; int color_black, *colors; double xpos,ypos; char buf[10]; string filename; double *points; int numPoints; #ifdef HAVE_LIBGD int id_PNG=0,id_JPG=0; #endif X = new float[structure.size()]; Y = new float[structure.size()]; points=new double[2*structure.size()]; colors=new int[NUM_COLORS]; assert(seq.size() == structure.size()); pair_table = make_pair_table(structure.c_str()); i = naview_xy_coordinates(pair_table, X, Y); if(i!=structure.size()) cerr << "strange things happening in squigglePlot ..." << endl; // scale image for(i=0;i(options.scale); Y[i]*=static_cast(options.scale); } // calculate image dimensions for(i=0;ii+1) { // pairs in both structures g2_line(id,X[i],Y[i],X[pair_table[i+1]-1],Y[pair_table[i+1]-1]); } } // draw regions g2_set_line_width(id,0.4); list >::const_iterator it; Uint regionNr=0; for(it=regions.begin();it!=regions.end();it++) { double center_x=0,center_y=0; // fill coordinate list for(i=0;i < it->second;i++) { points[2*i]=X[it->first+i]; points[2*i+1]=Y[it->first+i]; center_x+=X[it->first+i]; center_y+=Y[it->first+i]; } numPoints=it->second-1; center_x/=numPoints; // center of gravity center_y/=numPoints; g2_pen(id,colors[regionNr % NUM_COLORS]); g2_poly_line(id,numPoints,points); sprintf(buf,"%d",regionNr+1); g2_string(id,center_x,center_y,buf); // draw region number regionNr++; } // mark 5' end g2_pen(id,color_black); g2_string(id,X[0]-20,Y[0],"5'"); g2_set_font_size(id,base_fontsize*options.scale); g2_set_line_width(id,0.2); // draw sequence for(i=0;i(options.scale); Y[i]*=static_cast(options.scale); } // calculate image dimensions for(i=0;ii+1 && (unsigned short)alt_pair_table[i+1]>i+1) { // pairs in both structures g2_pen(id,ps_color_black); g2_line(id,X[i],Y[i],X[pair_table[i+1]-1],Y[pair_table[i+1]-1]); } else { if((unsigned short)pair_table[i+1]>i+1 && (unsigned short)alt_pair_table[i+1]<=i+1) { // pairs only in first structure if(atX) g2_pen(id,ps_color_blue); else g2_pen(id,ps_color_green); g2_line(id,X[i],Y[i],X[pair_table[i+1]-1],Y[pair_table[i+1]-1]); } else { if((unsigned short)pair_table[i+1]<=i+1 && (unsigned short)alt_pair_table[i+1]>i+1) { // pairs only in second structure if(atX) g2_pen(id,ps_color_green); else g2_pen(id,ps_color_blue); double dashes=2.0; g2_set_dash(id,1,&dashes); g2_line(id,X[i],Y[i],X[alt_pair_table[i+1]-1],Y[alt_pair_table[i+1]-1]); dashes=0; g2_set_dash(id,1,&dashes); } } } } // draw structure names // x-at-y or y-at-x if(atX) { g2_pen(id,ps_color_green); g2_string(id,min_X,max_Y-10,(char*)strname2.c_str()); g2_pen(id,ps_color_black); g2_string(id,min_X,max_Y-20,"at"); g2_pen(id,ps_color_blue); g2_string(id,min_X,max_Y-30,(char*)strname1.c_str()); } else { g2_pen(id,ps_color_blue); g2_string(id,min_X,max_Y-10,(char*)strname1.c_str()); g2_pen(id,ps_color_black); g2_string(id,min_X,max_Y-20,"at"); g2_pen(id,ps_color_green); g2_string(id,min_X,max_Y-30,(char*)strname2.c_str()); } g2_flush(id); g2_close(id); free(pair_table); free(alt_pair_table); DELETE(X); DELETE(Y); } #endif #ifdef HAVE_LIBRNA void RNAFuncs::generateRNAAlignmentXML(const string &structure, const string &altStructure, const string &seq1, const string &seq2, const string &strname1, const string &strname2, ostream &s) { string base; Uint i; float *X, *Y; short *pair_table,*alt_pair_table; Uint basenr_x=1, basenr_y=1; string filename; X = new float[structure.size()]; Y = new float[structure.size()]; assert(seq1.size() == seq2.size()); assert(seq1.size() == structure.size()); assert(seq1.size() == altStructure.size()); // calculate coordinates pair_table = make_pair_table(structure.c_str()); alt_pair_table = make_pair_table(altStructure.c_str()); i = naview_xy_coordinates(pair_table, X, Y); if(i!=structure.size()) cerr << "strange things happening in squigglePlot ..." << endl; // generate XML s << "" << endl; // seqalignment s << " " << endl; for(i=0;i" << endl; if(seq1[i]!='-') basenr_x++; if(seq2[i]!='-') basenr_y++; } s << " " << endl; // pairs s << " " << endl; for(i=0;ii+1 && (unsigned short)alt_pair_table[i+1]>i+1) { s << " " << endl; } else { if((unsigned short)pair_table[i+1]>i+1) { s << " " << endl; } else { if((unsigned short)alt_pair_table[i+1]>i+1) s << " " << endl; } } } s << " " << endl; // coordinates // x structure s << " " << endl; s << " " << endl; for(i=0;i" << endl; } s << " " << endl; s << " " << endl; DELETE(X); DELETE(Y); X = new float[altStructure.size()]; Y = new float[altStructure.size()]; // y structure i = naview_xy_coordinates(alt_pair_table, X, Y); if(i!=structure.size()) cerr << "strange things happening in squigglePlot ..." << endl; s << " " << endl; s << " " << endl; for(i=0;i" << endl; } s << " " << endl; s << " " << endl; s << "" << endl; free(pair_table); free(alt_pair_table); DELETE(X); DELETE(Y); } #endif #ifdef HAVE_LIBXMLPLUSPLUS #ifdef HAVE_LIBXML2 void RNAFuncs::printPAliXML(const string &id1, const string &id2, const string &seq1, const string &seq2, const string &str1, const string &str2, double &score, const RNAforesterOptions &options,AddXmlInfos &xmlInfos,const string &outputFile) { char arr_Score[20]; const char** args; int nrOptions; string command; bool xmlInput; string comment1; string comment2; string description1; string description2; xmlpp::Document* xmlDocOrig; xmlpp::DomParser domParser; xmlpp::Element* elemRootOrig; xmlpp::NodeSet nodeSetOrig; args = options.getArgs(); nrOptions = options.getNrOfOptions(); for(int i=0;iset_namespace_declaration(xsdurl, ""); nodeRoot->set_namespace_declaration("http://www.w3.org/2001/XMLSchema-instance","xsi"); nodeRoot->set_attribute("schemaLocation",xsdurl + " http://bibiserv.techfak.uni-bielefeld.de/xsd/net/sourceforge/hobit/20060515/rnastructAlignmentML.xsd","xsi"); xmlpp::Element* nodeRnaStructAlignment = nodeRoot->add_child("rnastructurealignment"); //convert double to char array to use in set_attribute sprintf(arr_Score,"%.0f",score); nodeRnaStructAlignment->set_attribute("score",arr_Score); nodeSequences[1] = nodeRnaStructAlignment->add_child("sequence"); nodeSequences[1]->set_attribute("seqID",xmlInfos.idmapping[atoi(id1.c_str())]); if(options.has(RNAforesterOptions::LocalSimilarity)){ stringstream ss; ss << xmlInfos.xbasepos; nodeSequences[1]->set_attribute("startingposition",ss.str()); } if(xmlInfos.names.count(atoi(id1.c_str())) > 0){ nodeSequences[1]->add_child("name")->set_child_text(xmlInfos.names[atoi(id1.c_str())]); } if(xmlInfos.synonyms.count(atoi(id1.c_str())) > 0){ nodeSequences[1]->add_child("synonyms")->set_child_text(xmlInfos.synonyms[atoi(id1.c_str())]); } if(xmlInfos.descriptions.count(atoi(id1.c_str())) > 0){ nodeSequences[1]->add_child("description")->set_child_text(xmlInfos.descriptions[atoi(id1.c_str())]); } nodeSequences[1]->add_child("alignedFreeSequence")->set_child_text(UpperCase(seq1)); nodeSequences[1]->add_child("structure")->set_child_text(str1); // id is of type string index of map is of size int => convert if(xmlInfos.comments.count(atoi(id1.c_str())) > 0){ nodeSequences[1]->add_child("comment")->set_child_text(xmlInfos.comments[atoi(id1.c_str())]); } nodeSequences[2] = nodeRnaStructAlignment->add_child("sequence"); nodeSequences[2]->set_attribute("seqID",xmlInfos.idmapping[atoi(id2.c_str())]); if(options.has(RNAforesterOptions::LocalSimilarity)){ stringstream ss; ss << xmlInfos.ybasepos; nodeSequences[2]->set_attribute("startingposition",ss.str()); } if(xmlInfos.names.count(atoi(id2.c_str())) > 0){ nodeSequences[2]->add_child("name")->set_child_text(xmlInfos.names[atoi(id2.c_str())]); } if(xmlInfos.synonyms.count(atoi(id2.c_str())) > 0){ nodeSequences[2]->add_child("synonyms")->set_child_text(xmlInfos.synonyms[atoi(id2.c_str())]); } if(xmlInfos.descriptions.count(atoi(id2.c_str())) > 0){ nodeSequences[2]->add_child("description")->set_child_text(xmlInfos.descriptions[atoi(id2.c_str())]); } nodeSequences[2]->add_child("alignedFreeSequence")->set_child_text(UpperCase(seq2)); nodeSequences[2]->add_child("structure")->set_child_text(str2); // id is of type string index of map is of size int => convert if(xmlInfos.comments.count(atoi(id2.c_str())) > 0){ nodeSequences[2]->add_child("comment")->set_child_text(xmlInfos.comments[atoi(id2.c_str())]); } nodeRnaStructAlignment->add_child("program")->set_attribute("command",command); document.write_to_file("./" + outputFile,"UTF-8"); // display the mapping from the internal id to the name of the sequence / structure qualified in the name element in the rnastructML if(xmlInfos.xmlInput){ printMapping(xmlInfos.idmapping); } } catch(const std::exception& ex) { cout << "xml exception: " << ex.what() << std::endl; } } void RNAFuncs::printMAliXML(deque > &resultList,const RNAforesterOptions &options, double &minPairProb,AddXmlInfos &xmlInfos,const string &outputFile) { string xsdurl = getXSDURL(); int j; // get RNAforester call const char** args; int nrOptions; string command; args = options.getArgs(); nrOptions = options.getNrOfOptions(); for(int i=0;i > seqAli; deque strAli; string consSeq, consStr; deque baseprobs; deque,double> > pairprobs; stringstream tmpBase; //create root node xmlpp::Element* nodeRoot = document.create_root_node("rnastructAlignmentML", "", ""); //set namespace declarations to root element nodeRoot->set_namespace_declaration(xsdurl, ""); nodeRoot->set_namespace_declaration("http://www.w3.org/2001/XMLSchema-instance","xsi"); nodeRoot->set_attribute("schemaLocation",xsdurl + " http://bibiserv.techfak.uni-bielefeld.de/xsd/net/sourceforge/hobit/20060515/rnastructAlignmentML.xsd","xsi"); // N VALues in the deque are n cluster deque >::const_iterator it; for(it=resultList.begin();it!=resultList.end();it++) { // create node rnastructalignment xmlpp::Element* nodeRnaStructAlignment = nodeRoot->add_child("rnastructurealignment"); //print aligned sequences and structures seqAli = it->second->getSeqAli(); strAli = it->second->getStrAli(); // j iterator to get structure alignment j=0; deque >::const_iterator it2; for(it2=seqAli.begin();it2!=seqAli.end();it2++) { xmlpp::Element* nodeSequence = nodeRnaStructAlignment->add_child("sequence"); nodeSequence->set_attribute("seqID",xmlInfos.idmapping[atoi(it2->first.c_str())]); if(xmlInfos.names.count(atoi(it2->first.c_str())) > 0){ nodeSequence->add_child("name")->set_child_text(xmlInfos.names[atoi(it2->first.c_str())]); } if(xmlInfos.synonyms.count(atoi(it2->first.c_str())) > 0){ nodeSequence->add_child("synonyms")->set_child_text(xmlInfos.synonyms[atoi(it2->first.c_str())]); } if(xmlInfos.descriptions.count(atoi(it2->first.c_str())) > 0){ nodeSequence->add_child("description")->set_child_text(xmlInfos.descriptions[atoi(it2->first.c_str())]); } xmlpp::Element* nodeAlignedFreeSequence = nodeSequence->add_child("alignedFreeSequence"); nodeAlignedFreeSequence->set_child_text(UpperCase(it2->second)); xmlpp::Element* nodeStructure = nodeSequence->add_child("structure"); nodeStructure->set_child_text(strAli[j]); if(xmlInfos.comments.count(atoi(it2->first.c_str())) > 0){ nodeSequence->add_child("comment")->set_child_text(xmlInfos.comments[atoi(it2->first.c_str())]); } j++; } // insert consensus element xmlpp::Element* nodeConsensus = nodeRnaStructAlignment->add_child("consensus"); // get and insert structure pairprobs it->second->getPairProb(minPairProb, pairprobs); xmlpp::Element* nodeStrProbs = nodeConsensus->add_child("structureprobabilities"); deque,double> >::iterator it3; for(it3=pairprobs.begin();it3!=pairprobs.end();it3++) { xmlpp::Element* nodePt = nodeStrProbs->add_child("pt"); tmpBase << it3->first.first; nodePt->set_attribute("a",tmpBase.str()); tmpBase.str(""); tmpBase << it3->first.second; nodePt->set_attribute("b",tmpBase.str()); tmpBase.str(""); tmpBase << it3->second; nodePt->set_attribute("probability",tmpBase.str()); tmpBase.str(""); } // get and insert consensus sequence consSeq = it->second->getConsSeq(); xmlpp::Element* nodeConsSeq = nodeConsensus->add_child("sequence"); nodeConsSeq->set_attribute("seqID","consensus"); xmlpp::Element* nodeAlignedFSCons = nodeConsSeq->add_child("alignedFreeSequence"); nodeAlignedFSCons->set_child_text(UpperCase(consSeq)); // get and insert consensus structure consStr = it->second->getConsStr(minPairProb); xmlpp::Element* nodeConsStr = nodeConsSeq->add_child("structure"); nodeConsStr->set_child_text(consStr); //xmlpp::Element* nodeConsStr = nodeConsensus->add_child("structure"); //nodeConsStr->set_child_text(consStr); // insert nodes program and attribute command xmlpp::Element* nodeProgram = nodeRnaStructAlignment->add_child("program"); nodeProgram->set_attribute("command",command); } // end for document.write_to_file("./" + outputFile,"UTF-8"); // display the mapping from the internal id to the name of the sequence / structure qualified in the name element in the rnastructML if(xmlInfos.xmlInput){ printMapping(xmlInfos.idmapping); } } catch(const std::exception& ex) { cout << "xml exception: " << ex.what() << std::endl; } } void RNAFuncs::printMapping(map &mapping){ map::iterator it; cout << "mapping: " << endl; for(it=mapping.begin();it!=mapping.end();it++) { cout << it->first <<": " << it->second << endl; } cout << "\n"; } #endif #endif void RNAFuncs::printAli(const string &name1, const string &name2, const string &seq1, const string &seq2, const string &str1, const string &str2) { Uint i; string info; for(i=0;i