#include #include #include #include #include "utils.h" #include "StrEdit_CostMatrix.h" #define PUBLIC #define PRIVATE static #define MAXSEQS 1000 PUBLIC float **read_distance_matrix(char type[]); PUBLIC char **read_sequence_list(int *n_of_seqs, char *mask); PUBLIC float **Hamming_Distance_Matrix(char **seqs, int n_of_seqs); PUBLIC float **StrEdit_SimpleDistMatrix(char **seqs, int n_of_seqs); PUBLIC float **StrEdit_GotohDistMatrix(char **seqs, int n_of_seqs); PUBLIC void free_distance_matrix(float **x); PUBLIC void printf_distance_matrix(float **x); PUBLIC void printf_taxa_list(void); PUBLIC char *get_taxon_label(int whoami); PUBLIC float StrEdit_SimpleDist(char *str1, char *str2); PUBLIC float StrEdit_GotohDist(char *str1, char *str2); PUBLIC void Set_StrEdit_CostMatrix(char type); PUBLIC void Set_StrEdit_GapCosts(float per_digit, float per_gap); /* NOTE: x[0][0] = (float)size_of_matrix; */ PRIVATE void read_taxa_list(void); PRIVATE int string_consists_of(char line[],char *mask); PRIVATE float StrEditCost( int i, int j, char *T1, char *T2); PRIVATE int decode(char id); PRIVATE char Taxa_List[MAXSEQS][50]; PRIVATE int Taxa_Numbers[MAXSEQS]; PRIVATE int N_of_named_taxa=0; PRIVATE char *file_name; PRIVATE char N_of_infiles=0; PRIVATE float **StrEdit_CostMatrix; PRIVATE char *StrEdit_ValidAlphabet; PRIVATE float StrEdit_GapCost = 1.; PRIVATE float StrEdit_GotohAlpha = 1.; PRIVATE float StrEdit_GotohBeta = 1.; PUBLIC float **read_distance_matrix(char type[]) { char *line; float **D; float tmp; int i,j,size; while(1) { type[0]= '\0'; size = 0; D = NULL; if ((line = get_line(stdin))==NULL) return NULL; if (*line =='@') return NULL; if (*line =='*') { N_of_infiles++; if(file_name) free(file_name); if(strlen(line)>1) { file_name = (char *) space(sizeof(char)*strlen(line)); sscanf(line,"*%s",file_name); } else { file_name = (char *) space(10); sprintf(file_name,"%d",N_of_infiles); } read_taxa_list(); } else if (*line=='>') { int r; size = 0; r = sscanf(line,"> %1s%*[ ] %d", type, &size); fprintf(stderr, "%d ", r); if (r==EOF) return NULL; if((r==2)&&(size>1)) { D=(float **)space((size+1)*sizeof(float *)); for(i=0; i<=size; i++) D[i] = (float *)space((size+1)*sizeof(float)); D[0][0] = (float)size; D[1][1] = 0.0; for(i=2; i<= size; i++) { D[i][i] = 0.0; for(j=1; j1) { file_name = (char *) space(sizeof(char)*strlen(line)); sscanf(line,"*%s",file_name); } else { file_name = (char *) space(10); sprintf(file_name,"%d",N_of_infiles); } read_taxa_list(); free(line); continue; } len = strlen(line); if(string_consists_of(line,mask)){ if(mask[0]=='%') { for(i=0;i false */ return 1; /* loop left after last char -> true */ } /* -------------------------------------------------------------------------- */ PUBLIC void free_distance_matrix(float **x) { int i,n; n=(int) x[0][0]; for(i=0;i<=n;i++) free(x[i]); free(x); x=NULL; } /* -------------------------------------------------------------------------- */ PUBLIC void printf_distance_matrix(float **x) { int i,j,n; n=(int) x[0][0]; printf("> X %d\n",n); if(n>1){ for(i=2;i<=n;i++) { for(j=1;j0){ printf("* List of Taxa: %s\n", file_name); for(i=0;i0) i1 = decode(T1[i-1]); else i1 = 0; if(j>0) j1 = decode(T2[j-1]); else j1 = 0; if(StrEdit_CostMatrix==NULL) { if(i&&j) return (float)(i1!=j1); else return (float) StrEdit_GapCost; } else return (float) StrEdit_CostMatrix[i1][j1]; } /* -------------------------------------------------------------------------- */ PRIVATE int decode(char id) { int n,alen; if (!StrEdit_ValidAlphabet) return (int)id; alen = strlen(StrEdit_ValidAlphabet); if(!alen) return (int)id; for(n=0;n set to ~gap~\n"); return 0; } /* -------------------------------------------------------------------------- */ PUBLIC void Set_StrEdit_CostMatrix(char type) { int i,j; if(StrEdit_ValidAlphabet) { free(StrEdit_ValidAlphabet); StrEdit_ValidAlphabet = NULL; } if(StrEdit_CostMatrix) { free(StrEdit_CostMatrix); StrEdit_CostMatrix = NULL; } switch(type){ case 'D' : StrEdit_ValidAlphabet = (char*) space((20+2)*sizeof(char)); strcpy(StrEdit_ValidAlphabet,StrEdit_DayhoffA); StrEdit_CostMatrix = (float**) space((20+1)*sizeof(float*)); for(i=0;i<=20;i++) StrEdit_CostMatrix[i] = (float*)space((20+1)*sizeof(float)); for(i=1;i<=20;i++) { for(j=1;j<=20;j++) { StrEdit_CostMatrix[i][j] = MAX2(StrEdit_DayhoffM[i][i],StrEdit_DayhoffM[j][j])- StrEdit_DayhoffM[i][j]; } StrEdit_CostMatrix[i][0] = StrEdit_DayhoffM[i][i]; StrEdit_CostMatrix[0][i] = StrEdit_DayhoffM[i][i]; } StrEdit_CostMatrix[0][0] = StrEdit_DayhoffM[0][0]; break; case 'A' : StrEdit_ValidAlphabet = (char*) space((20+2)*sizeof(char)); strcpy(StrEdit_ValidAlphabet,StrEdit_GLHA); StrEdit_CostMatrix = (float**) space((20+1)*sizeof(float*)); for(i=0;i<=20;i++) StrEdit_CostMatrix[i] = (float*)space((20+1)*sizeof(float)); for(i=1;i<=20;i++) { for(j=1;j<=20;j++) StrEdit_CostMatrix[i][j] = StrEdit_GLHM[i][j]; StrEdit_CostMatrix[i][0] = StrEdit_CostMatrix[0][i] = StrEdit_GapCost; } StrEdit_CostMatrix[0][0] = StrEdit_GLHM[0][0]; break; case 'B' : StrEdit_ValidAlphabet = space((4+2)*sizeof(char)); strcpy(StrEdit_ValidAlphabet,StrEdit_BinCodeA); StrEdit_CostMatrix = (float**) space((4+1)*sizeof(float*)); for(i=0;i<=4;i++) StrEdit_CostMatrix[i] = (float*)space((4+1)*sizeof(float)); for(i=0;i<=4;i++) for(j=0;j<=4;j++) StrEdit_CostMatrix[i][j] = StrEdit_BinCodeM[i][j]; break; case 'H' : StrEdit_ValidAlphabet = space((4+2)*sizeof(char)); strcpy(StrEdit_ValidAlphabet,StrEdit_HogewegA); StrEdit_CostMatrix = (float**) space((4+1)*sizeof(float*)); for(i=0;i<=4;i++) StrEdit_CostMatrix[i] = (float*)space((4+1)*sizeof(float)); for(i=0;i<=4;i++) for(j=0;j<=4;j++) StrEdit_CostMatrix[i][j] = StrEdit_HogewegM[i][j]; StrEdit_GotohAlpha = 3.; StrEdit_GotohBeta = 0.; break; default: if(!StrEdit_GapCost) StrEdit_GapCost = 1.; /* This is the simple distance */ } } /* -------------------------------------------------------------------------- */ PUBLIC void Set_StrEdit_GapCosts(float per_digit, float per_gap) { if(per_gap==0.) per_gap = per_digit; if(per_digit<0) nrerror("Gap Costs invalid."); if(per_digit>per_gap) nrerror("Gap Costs invalid."); StrEdit_GapCost = per_digit; StrEdit_GotohAlpha = per_digit; /* Gotoh gap function g(k) = a + b(k-1) */ StrEdit_GotohBeta = per_gap; } /* -------------------------------------------------------------------------- */