#ifndef _RNA_ALGEBRA_H_ #define _RNA_ALGEBRA_H_ #include #include #include #include "algebra.h" #include "debug.h" #include "misc.h" #include "rna_alphabet.h" #include "rnaforester_options.h" /* ****************************************** */ /* Definitions and typedefs */ /* ****************************************** */ const double DBL_NEG = -100000000.0; // the values from limits.h caused problems.. const double DBL_POS = 100000000.0; typedef Algebra DoubleScoreProfileAlgebraType; typedef Algebra IntScore_AlgebraType; typedef RNA_Algebra IntScoreRNA_AlgebraType; typedef SZAlgebra IntScoreSZAlgebraType; /* ****************************************** */ /* Class Score */ /* ****************************************** */ /** Class Score reads scoring parameters from RNAforester command line. */ class Score { private: bool m_isDistance; bool m_isLocal; bool m_isRIBOSUM; public: int m_bp_rep_score; int m_bp_del_score; int m_b_match_score; int m_b_rep_score; int m_b_del_score; Score(RNAforesterOptions &options); Score(const Score &s); void print(); }; /* ****************************************** */ /* RNA Algebra Classes */ /* ****************************************** */ /** Similarity algebra for RNA forests */ class IntSimiRNA_Algebra : public IntScoreRNA_AlgebraType { private: Score m_s; public: int empty() const {return 0;}; int replacepair(RNA_Alphabet la, RNA_Alphabet lb, int down, RNA_Alphabet ra, RNA_Alphabet rb, int over) const { return m_s.m_bp_rep_score+down+over; }; int replace(RNA_Alphabet a,int down, RNA_Alphabet b, int over) const { if(a==ALPHA_BASEPAIR && b == ALPHA_BASEPAIR) return m_s.m_bp_rep_score+down+over; else { if(a==ALPHA_BASEPAIR || b==ALPHA_BASEPAIR) return INT_MIN/4; else { if(a==b) return m_s.m_b_match_score+down+over; else return m_s.m_b_rep_score+down+over; } } }; int del(RNA_Alphabet a,int down, int over) const { if(a==ALPHA_BASEPAIR) return m_s.m_bp_del_score+down+over; else return m_s.m_b_del_score+down+over; }; int insert(int down,RNA_Alphabet b,int over) const { if(b==ALPHA_BASEPAIR) return m_s.m_bp_del_score+down+over; else return m_s.m_b_del_score+down+over; }; int choice(int a, int b) const { return max(a,b); }; int worst_score() const { return INT_MIN; }; IntSimiRNA_Algebra(const Score &s) : m_s(s) {}; }; /** Distance algebra for RNA forests */ class IntDistRNA_Algebra : public IntScoreRNA_AlgebraType { private: Score m_s; public: int empty() const {return 0;}; int replacepair(RNA_Alphabet la, RNA_Alphabet lb, int down, RNA_Alphabet ra, RNA_Alphabet rb, int over) const { return m_s.m_bp_rep_score+down+over; }; int replace(RNA_Alphabet a,int down, RNA_Alphabet b, int over) const { if(a==ALPHA_BASEPAIR && b == ALPHA_BASEPAIR) return m_s.m_bp_rep_score+down+over; else { if(a==ALPHA_BASEPAIR || b==ALPHA_BASEPAIR) return INT_MAX/4; else { if(a==b) return m_s.m_b_match_score+down+over; else return m_s.m_b_rep_score+down+over; } } }; int del(RNA_Alphabet a,int down,int over) const { if(a==ALPHA_BASEPAIR) return m_s.m_bp_del_score+down+over; else return m_s.m_b_del_score+down+over; }; int insert(int down,RNA_Alphabet b,int over) const { if(b==ALPHA_BASEPAIR) return m_s.m_bp_del_score+down+over; else return m_s.m_b_del_score+down+over; }; int choice(int a, int b) const { return min(a,b); }; int worst_score() const { return INT_MAX; }; IntDistRNA_Algebra(const Score &s) : m_s(s) {}; }; /** RIBOSUM85-60 matrix published in RSEARCH: Finding homologs of single structured RNA sequences * R. Klein and S. Eddy, BMC Bioinformatics 2003 Vol.4 */ class RIBOSUM8560 : public IntScoreRNA_AlgebraType { private: Score m_s; int m_baseSubstMtrx[4][4]; int m_basepairSubstMtrx[4][4][4][4]; public: int empty() const {return 0;}; int replacepair(RNA_Alphabet la, RNA_Alphabet lb, int down, RNA_Alphabet ra, RNA_Alphabet rb, int over) const { int i,j,k,l; i=alpha2RNA_Alpha(la); j=alpha2RNA_Alpha(ra); k=alpha2RNA_Alpha(lb); l=alpha2RNA_Alpha(rb); return m_basepairSubstMtrx[i][j][k][l]+down+over; }; int replace(RNA_Alphabet a,int down, RNA_Alphabet b, int over) const { assert(!(a==ALPHA_BASEPAIR && b==ALPHA_BASEPAIR)); if(a==ALPHA_BASEPAIR || b==ALPHA_BASEPAIR) return INT_MIN/4; else { int i,j; i=alpha2RNA_Alpha(a); j=alpha2RNA_Alpha(b); return m_baseSubstMtrx[i][j]+down+over; } }; int del(RNA_Alphabet a,int down, int over) const { if(a==ALPHA_BASEPAIR) return m_s.m_bp_del_score+down+over; else return m_s.m_b_del_score+down+over; }; int insert(int down,RNA_Alphabet b,int over) const { if(b==ALPHA_BASEPAIR) return m_s.m_bp_del_score+down+over; else return m_s.m_b_del_score+down+over; }; int choice(int a, int b) const { return max(a,b); }; int worst_score() const { return INT_MIN; }; RIBOSUM8560(const Score &s) : m_s(s) { int i,j,k,l; // set substitution matrices // base replacement m_baseSubstMtrx[ALPHA_PRO_BASE_A][ALPHA_PRO_BASE_A]=222; m_baseSubstMtrx[ALPHA_PRO_BASE_A][ALPHA_PRO_BASE_C]=-186; m_baseSubstMtrx[ALPHA_PRO_BASE_A][ALPHA_PRO_BASE_G]=-146; m_baseSubstMtrx[ALPHA_PRO_BASE_A][ALPHA_PRO_BASE_U]=-139; m_baseSubstMtrx[ALPHA_PRO_BASE_C][ALPHA_PRO_BASE_C]=116; m_baseSubstMtrx[ALPHA_PRO_BASE_C][ALPHA_PRO_BASE_G]=-248; m_baseSubstMtrx[ALPHA_PRO_BASE_C][ALPHA_PRO_BASE_U]=-105; m_baseSubstMtrx[ALPHA_PRO_BASE_G][ALPHA_PRO_BASE_G]=103; m_baseSubstMtrx[ALPHA_PRO_BASE_G][ALPHA_PRO_BASE_U]=-174; m_baseSubstMtrx[ALPHA_PRO_BASE_U][ALPHA_PRO_BASE_U]=165; // copy triangle for(i=0;i<=ALPHA_PRO_BASE_U;i++) for(j=0;j0 && b.p[ALPHA_PRO_BASEPAIR]>0) { // pair replacement return a.p[ALPHA_PRO_BASEPAIR]*b.p[ALPHA_PRO_BASEPAIR]*m_s.m_bp_rep_score + down+over; } else { if(a.p[ALPHA_PRO_BASE]>0 && b.p[ALPHA_PRO_BASE]>0) { double s=0; // base replacement for(int i=ALPHA_PRO_BASE_A;i<=ALPHA_PRO_BASE_U;i++) for(int j=ALPHA_PRO_BASE_A;j<=ALPHA_PRO_BASE_U;j++) s+= i==j ? a.p[i]*b.p[j]*m_s.m_b_match_score : a.p[i]*b.p[j]*m_s.m_b_rep_score; if(s==0) // no sequence information s=a.p[ALPHA_PRO_BASE]*b.p[ALPHA_PRO_BASE]*m_s.m_b_rep_score; return s+down+over; } else { // undefined operation (replace base by basepair ??) return DBL_NEG/4; } } }; double del(RNA_Alphabet_Profile a,double down, double over) const { if(a.p[ALPHA_PRO_BASEPAIR]>0) return a.p[ALPHA_PRO_BASEPAIR]*m_s.m_bp_del_score+down+over; else return a.p[ALPHA_PRO_BASE]*m_s.m_b_del_score+down+over; }; double insert(double down,RNA_Alphabet_Profile b,double over) const { if(b.p[ALPHA_PRO_BASEPAIR]>0) return b.p[ALPHA_PRO_BASEPAIR]*m_s.m_bp_del_score+down+over; else return b.p[ALPHA_PRO_BASE]*m_s.m_b_del_score+down+over; }; double choice(double a, double b) const { return max(a,b); }; double worst_score() const { return DBL_NEG; }; DoubleSimiProfileAlgebra(const Score &s) : m_s(s) {}; }; /** Distance algebra for RNA profile forests */ class DoubleDistProfileAlgebra : public DoubleScoreProfileAlgebraType { private: Score m_s; public: double empty() const {return 0.0;}; double replace(RNA_Alphabet_Profile a,double down, RNA_Alphabet_Profile b, double over) const { TRACE(DBG_ALGEBRA,"rep","inside!!!"); if(a.p[ALPHA_PRO_BASEPAIR]>0 && b.p[ALPHA_PRO_BASEPAIR]>0) { // pair replacement return a.p[ALPHA_PRO_BASEPAIR]*b.p[ALPHA_PRO_BASEPAIR]*m_s.m_bp_rep_score + down+over; } else { if(a.p[ALPHA_PRO_BASE]>0 && b.p[ALPHA_PRO_BASE]>0) { double s=0; // base replacement for(int i=ALPHA_PRO_BASE_A;i<=ALPHA_PRO_BASE_U;i++) for(int j=ALPHA_PRO_BASE_A;j<=ALPHA_PRO_BASE_U;j++) s+= i==j ? a.p[i]*b.p[j]*m_s.m_b_match_score : a.p[i]*b.p[j]*m_s.m_b_rep_score; if(s==0) // no sequence information s=a.p[ALPHA_PRO_BASE]*b.p[ALPHA_PRO_BASE]*m_s.m_b_rep_score; return s+down+over; } else { // undefined operation (replace base by basepair ??) return DBL_POS/4; } } }; double del(RNA_Alphabet_Profile a,double down, double over) const { if(a.p[ALPHA_PRO_BASEPAIR]>0) return a.p[ALPHA_PRO_BASEPAIR]*m_s.m_bp_del_score+down+over; else return a.p[ALPHA_PRO_BASE]*m_s.m_b_del_score+down+over; }; double insert(double down,RNA_Alphabet_Profile b,double over) const { if(b.p[ALPHA_PRO_BASEPAIR]>0) return b.p[ALPHA_PRO_BASEPAIR]*m_s.m_bp_del_score+down+over; else return b.p[ALPHA_PRO_BASE]*m_s.m_b_del_score+down+over; }; double choice(double a, double b) const { return min(a,b); }; double worst_score() const { return DBL_POS; }; DoubleDistProfileAlgebra(const Score &s) : m_s(s) {}; }; /* ****************************************** */ /* SZAlgebra Classes */ /* ****************************************** */ class IntSimiSZAlgebra : public IntScoreSZAlgebraType { private: Score m_s; public: int empty() const {return 0;}; int replace(RNA_Alphabet a,int down, RNA_Alphabet b) const { if(a==ALPHA_BASEPAIR && b == ALPHA_BASEPAIR) return m_s.m_bp_rep_score+down; else { if(a==ALPHA_BASEPAIR || b==ALPHA_BASEPAIR) return INT_MIN/4; else { if(a==b) return m_s.m_b_match_score+down; else return m_s.m_b_rep_score+down; } } }; int del(RNA_Alphabet a,int down) const { if(a==ALPHA_BASEPAIR) return m_s.m_bp_del_score+down; else return m_s.m_b_del_score+down; }; int insert(int down,RNA_Alphabet b) const { if(b==ALPHA_BASEPAIR) return m_s.m_bp_del_score+down; else return m_s.m_b_del_score+down; }; int choice(int a, int b) const { return max(a,b); }; int worst_score() const { return INT_MIN; }; IntSimiSZAlgebra(const Score &s) : m_s(s) {}; }; class IntDistSZAlgebra : public IntScoreSZAlgebraType { private: Score m_s; public: int empty() const {return 0;}; int replace(RNA_Alphabet a,int down, RNA_Alphabet b) const { if(a==ALPHA_BASEPAIR && b == ALPHA_BASEPAIR) return m_s.m_bp_rep_score+down; else { if(a==ALPHA_BASEPAIR || b==ALPHA_BASEPAIR) return INT_MAX/4; else { if(a==b) return m_s.m_b_match_score+down; else return m_s.m_b_rep_score+down; } } }; int del(RNA_Alphabet a,int down) const { if(a==ALPHA_BASEPAIR) return m_s.m_bp_del_score+down; else return m_s.m_b_del_score+down; }; int insert(int down,RNA_Alphabet b) const { if(b==ALPHA_BASEPAIR) return m_s.m_bp_del_score+down; else return m_s.m_b_del_score+down; }; int choice(int a, int b) const { return min(a,b); }; int worst_score() const { return INT_MAX; }; IntDistSZAlgebra(const Score &s) : m_s(s) {}; }; /* ****************************************** */ /* General Algebra Classe */ /* ****************************************** */ /** Distance algebra for forests */ class IntDist_Algebra : public IntScore_AlgebraType { private: Score m_s; public: int empty() const {return 0;}; int replace(RNA_Alphabet a,int down, RNA_Alphabet b, int over) const { if(a==ALPHA_BASEPAIR && b == ALPHA_BASEPAIR) return m_s.m_bp_rep_score+down+over; else { if(a==ALPHA_BASEPAIR || b==ALPHA_BASEPAIR) return INT_MAX/4; else { if(a==b) return m_s.m_b_match_score+down+over; else return m_s.m_b_rep_score+down+over; } } }; int del(RNA_Alphabet a,int down,int over) const { if(a==ALPHA_BASEPAIR) return m_s.m_bp_del_score+down+over; else return m_s.m_b_del_score+down+over; }; int insert(int down,RNA_Alphabet b,int over) const { if(b==ALPHA_BASEPAIR) return m_s.m_bp_del_score+down+over; else return m_s.m_b_del_score+down+over; }; int choice(int a, int b) const { return min(a,b); }; int worst_score() const { return INT_MAX; }; IntDist_Algebra(const Score &s) : m_s(s) {}; }; #endif