]> git.donarmstrong.com Git - mothur.git/blobdiff - refchimeratest.cpp
fixed subsample name file name issue. added count parameter to cluster command....
[mothur.git] / refchimeratest.cpp
index e540d8a255363cc980318b2edf1c97bd4adcd722..2f397e8f71ec0b8a1d47338b1f12cfc552642ed2 100644 (file)
@@ -14,11 +14,10 @@ int MAXINT = numeric_limits<int>::max();
 
 //***************************************************************************************************************
 
-RefChimeraTest::RefChimeraTest(vector<Sequence>& refs, string chimeraReportFileName){
+RefChimeraTest::RefChimeraTest(vector<Sequence>& refs){
 
        m = MothurOut::getInstance();
 
-       m->openOutputFile(chimeraReportFileName, chimeraReportFile);
        numRefSeqs = refs.size();
 
        referenceSeqs.resize(numRefSeqs);
@@ -30,14 +29,22 @@ RefChimeraTest::RefChimeraTest(vector<Sequence>& refs, string chimeraReportFileN
        
        alignLength = referenceSeqs[0].length();
 
-       chimeraReportFile << "queryName\tbestRef\tbestSequenceMismatch\tleftParentChi,rightParentChi\tbreakPointChi\tminMismatchToChimera\tdistToBestMera\tnumParents";
-//     chimeraReportFile << "leftParentTri,middleParentTri,rightParentTri\tbreakPointTriA,breakPointTriB\tminMismatchToTrimera\tdistToBestMera\tnMera" << endl;
 
 }
+//***************************************************************************************************************
 
+int RefChimeraTest::printHeader(ofstream& chimeraReportFile){
+       try {
+               chimeraReportFile << "queryName\tbestRef\tbestSequenceMismatch\tleftParentChi,rightParentChi\tbreakPointChi\tminMismatchToChimera\tdistToBestMera\tnumParents" << endl;
+               return 0; 
+       }catch(exception& e) {
+               m->errorOut(e, "RefChimeraTest", "execute");
+               exit(1);
+       }
+}
 //***************************************************************************************************************
 
-int RefChimeraTest::analyzeQuery(string queryName, string querySeq){
+int RefChimeraTest::analyzeQuery(string queryName, string querySeq, ofstream& chimeraReportFile){
        
        vector<vector<int> > left(numRefSeqs);
        vector<int> singleLeft, bestLeft;
@@ -60,12 +67,8 @@ int RefChimeraTest::analyzeQuery(string queryName, string querySeq){
        int nMera = 0;
        string chimeraRefSeq = "";
        
-       if(bestSequenceMismatch - minMismatchToChimera <= 3){
-               nMera = 1;
-               chimeraRefSeq = referenceSeqs[bestMatch];
-       }
-       else {
-
+       if(bestSequenceMismatch - minMismatchToChimera >= 3){// || (minMismatchToChimera == 0 && bestSequenceMismatch != 0)){
+       
                nMera = 2;
                chimeraRefSeq = stitchBimera(leftParentBi, rightParentBi, breakPointBi);
                
@@ -81,6 +84,12 @@ int RefChimeraTest::analyzeQuery(string queryName, string querySeq){
 //             }
                
        }
+       else{
+               nMera = 1;
+               chimeraRefSeq = referenceSeqs[bestMatch];
+       }
+       
+       
        double distToChimera = calcDistToChimera(querySeq, chimeraRefSeq);
        
 //     double loonIndex = calcLoonIndex(querySeq, referenceSeqs[leftParentBi], referenceSeqs[rightParentBi], breakPointBi, binMatrix);         
@@ -110,8 +119,11 @@ int RefChimeraTest::getMismatches(string& querySeq, vector<vector<int> >& left,
        for(int i=0;i<numRefSeqs;i++){
                
                int lDiffs = 0;
+               
                for(int l=0;l<alignLength;l++){
-                       if(querySeq[l] != '.' && querySeq[l] != referenceSeqs[i][l]){
+//                     if(querySeq[l] != '.' && querySeq[l] != referenceSeqs[i][l]){
+                                               
+                       if(querySeq[l] != '.' && referenceSeqs[i][l] != '.' && querySeq[l] != referenceSeqs[i][l] && referenceSeqs[i][l] != 'N'){
                                lDiffs++;
                        }
                        left[i][l] = lDiffs;
@@ -120,11 +132,13 @@ int RefChimeraTest::getMismatches(string& querySeq, vector<vector<int> >& left,
                int rDiffs = 0;
                int index = 0;
                for(int l=alignLength-1;l>=0;l--){
-                       if(querySeq[l] != '.' && querySeq[l] != referenceSeqs[i][l]){
+//                     if(querySeq[l] != '.' && querySeq[l] != referenceSeqs[i][l]){
+                       if(querySeq[l] != '.' && referenceSeqs[i][l] != '.' && querySeq[l] != referenceSeqs[i][l] && referenceSeqs[i][l] != 'N'){
                                rDiffs++;
                        }                       
                        right[i][index++] = rDiffs;
                }
+               
                if(lDiffs < bestSequenceMismatch){
                        bestSequenceMismatch = lDiffs;
                        bestRefSeq = i;
@@ -254,8 +268,9 @@ double RefChimeraTest::calcDistToChimera(string& querySeq, string& chimeraRefSeq
        int mismatch = 0;
        
        for(int i=0;i<alignLength;i++){
-               if(querySeq[i] != '.' && chimeraRefSeq[i] != '.'){
-                       if(querySeq[i] == '-' && chimeraRefSeq[i] == '-'){      /*      do nothing      */      }
+//             if(querySeq[i] != '.' && chimeraRefSeq[i] != '.'){
+               if(chimeraRefSeq[i] != '.' && querySeq[i] != '.'){
+                       if(querySeq[i] == '-' && chimeraRefSeq[i] == '-' && chimeraRefSeq[i] != 'N'){   /*      do nothing      */      }
                        else if(querySeq[i] == chimeraRefSeq[i]){
                                match++;
                        }