]> git.donarmstrong.com Git - mothur.git/blobdiff - ccode.cpp
fixes while testing 1.33.0
[mothur.git] / ccode.cpp
index bfcd44d8239a30f8ad18db73f0c06268dc0e7474..00cd3f1948e009a86456e32bbcde2a40c6f889ac 100644 (file)
--- a/ccode.cpp
+++ b/ccode.cpp
 #include "ignoregaps.h"
 #include "eachgapdist.h"
 
-
 //***************************************************************************************************************
-Ccode::Ccode(string filename, string o) {  
-       fastafile = filename;  outputDir = o; 
+Ccode::Ccode(string filename, string temp, bool f, string mask, int win, int numW, string o) : Chimera() {  
+ try { 
+       
+       fastafile = filename;  
+       outputDir = o; 
+       templateFileName = temp;  templateSeqs = readSeqs(temp); 
+       setMask(mask);
+       filter = f;
+       window = win;
+       numWanted = numW;
+       
        distCalc = new eachGapDist();
        decalc = new DeCalculator();
        
-       mapInfo = outputDir + getRootName(getSimpleName(fastafile)) + "mapinfo";
-       ofstream out2;
-       openOutputFile(mapInfo, out2);
+       mapInfo = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "mapinfo";
+       
+       #ifdef USE_MPI
+               
+               //char* inFileName = new char[mapInfo.length()];
+               //memcpy(inFileName, mapInfo.c_str(), mapInfo.length());
+               
+               char inFileName[1024];
+               strcpy(inFileName, mapInfo.c_str());
+               
+               int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
+
+               MPI_File_open(MPI_COMM_WORLD, inFileName, outMode, MPI_INFO_NULL, &outMap);  //comm, filename, mode, info, filepointer
+               
+               //delete inFileName;
+
+               int pid;
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+               
+               if (pid == 0) {
+                       string outString = "Place in masked, filtered and trimmed sequence\tPlace in original alignment\n";
+                       
+                       MPI_Status status;
+                       int length = outString.length();
+                       char* buf2 = new char[length];
+                       memcpy(buf2, outString.c_str(), length);
+                               
+                       MPI_File_write_shared(outMap, buf2, length, MPI_CHAR, &status);
+                       delete buf2;
+               }
+       #else
+
+               ofstream out2;
+               m->openOutputFile(mapInfo, out2);
                
-       out2 << "Place in masked, filtered and trimmed sequence\tPlace in original alignment" << endl;
-       out2.close();
+               out2 << "Place in masked, filtered and trimmed sequence\tPlace in original alignment" << endl;
+               out2.close();
+       #endif
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Ccode", "Ccode");
+               exit(1);
+       }
 }
 //***************************************************************************************************************
 Ccode::~Ccode() {
        delete distCalc;
        delete decalc;
+       
+       #ifdef USE_MPI
+               MPI_File_close(&outMap);
+       #endif
 }      
 //***************************************************************************************************************
-void Ccode::printHeader(ostream& out) {
-       out << "For full window mapping info refer to " << mapInfo << endl << endl;
-}
-//***************************************************************************************************************
-void Ccode::print(ostream& out) {
+Sequence Ccode::print(ostream& out, ostream& outAcc) {
        try {
                
-               mothurOutEndLine();
-               
                ofstream out2;
-               openOutputFileAppend(mapInfo, out2);
+               m->openOutputFileAppend(mapInfo, out2);
                
                out2 << querySeq->getName() << endl;
                for (it = spotMap.begin(); it!= spotMap.end(); it++) {
                        out2 << it->first << '\t' << it->second << endl;
                }
                out2.close();
-               
-               out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
-               
                out << querySeq->getName() << endl << endl << "Reference sequences used and distance to query:" << endl;
                        
                for (int j = 0; j < closest.size(); j++) {
-                       out << setprecision(3) << closest[j].seq->getName() << '\t' << closest[j].dist << endl;
+                       out << closest[j].seq->getName() << '\t' << closest[j].dist << endl;
                }
                out << endl << endl;
                
@@ -72,14 +112,12 @@ void Ccode::print(ostream& out) {
                        
                out << endl << "Window\tStartPos\tEndPos" << endl;
                it = trim.begin();
-                       
                for (int k = 0; k < windows.size()-1; k++) {
                        out << k+1 << '\t' << spotMap[windows[k]-it->first] << '\t' << spotMap[windows[k]-it->first+windowSizes] << endl;
                }
                        
                out << windows.size() << '\t' << spotMap[windows[windows.size()-1]-it->first] << '\t' << spotMap[it->second-it->first-1] << endl;
                out << endl;
-                       
                out << "Window\tAvgQ\t(sdQ)\tAvgR\t(sdR)\tRatio\tAnova" << endl;
                for (int k = 0; k < windows.size(); k++) {
                        float ds = averageQuery[k] / averageRef[k]; 
@@ -94,7 +132,7 @@ void Ccode::print(ostream& out) {
                //float fs = varQuery[query] / varRef[query];   /* F-Snedecor, test for differences of variances */
                        
                bool results = false;   
-                                               
+                                       
                //confidence limit, t - Student, anova
                out << "Window\tConfidenceLimit\tt-Student\tAnova" << endl;
                        
@@ -116,16 +154,149 @@ void Ccode::print(ostream& out) {
                out << endl;    
                        
                if (results) {
-                       mothurOut(querySeq->getName() + " was found have at least one chimeric window."); mothurOutEndLine();
+                       m->mothurOut(querySeq->getName() + " was found have at least one chimeric window."); m->mothurOutEndLine();
+                       outAcc << querySeq->getName() << endl;
                }
+
+               //free memory
+               for (int i = 0; i < closest.size(); i++) {  delete closest[i].seq;  }
+
+               return *querySeq;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Ccode", "print");
+               exit(1);
+       }
+}
+#ifdef USE_MPI
+//***************************************************************************************************************
+Sequence Ccode::print(MPI_File& out, MPI_File& outAcc) {
+       try {
+               
+               string outMapString = "";
+               
+               outMapString += querySeq->getName() + "\n";
+               for (it = spotMap.begin(); it!= spotMap.end(); it++) {
+                       outMapString += toString(it->first)  + "\t" + toString(it->second)  + "\n";
+               }
+               printMapping(outMapString);
+               outMapString = "";
                
+               string outString = "";
+               string outAccString = "";
+               
+               outString +=  querySeq->getName() + "\n\nReference sequences used and distance to query:\n";
+                       
+               for (int j = 0; j < closest.size(); j++) {
+                       outString += closest[j].seq->getName() + "\t" + toString(closest[j].dist) + "\n";
+               }
+               outString += "\n\nMapping information: ";
+               
+               //for each window
+               //window mapping info.
+               //you mask and did not filter
+               if ((seqMask != "") && (!filter)) { outString += "mask and trim."; }
+                               
+               //you filtered and did not mask
+               if ((seqMask == "") && (filter)) { outString += "filter and trim."; }
+                               
+               //you masked and filtered
+               if ((seqMask != "") && (filter)) { outString += "mask, filter and trim."; }
+                       
+               outString += "\nWindow\tStartPos\tEndPos\n";
+               it = trim.begin();
+               for (int k = 0; k < windows.size()-1; k++) {
+                       outString += toString(k+1) + "\t" + toString(spotMap[windows[k]-it->first]) + "\t" + toString(spotMap[windows[k]-it->first+windowSizes]) + "\n";
+               }
+                       
+               outString += toString(windows.size()) + "\t" + toString(spotMap[windows[windows.size()-1]-it->first]) + "\t" + toString(spotMap[it->second-it->first-1]) + "\n\n";
+               
+               outString += "Window\tAvgQ\t(sdQ)\tAvgR\t(sdR)\tRatio\tAnova\n";
+               for (int k = 0; k < windows.size(); k++) {
+                       float ds = averageQuery[k] / averageRef[k]; 
+                       outString += toString(k+1) + "\t" + toString(averageQuery[k]) + "\t" + toString(sdQuery[k]) + "\t" + toString(averageRef[k]) + "\t" + toString(sdRef[k]) + "\t" + toString(ds) + "\t" + toString(anova[k]) + "\n";
+               }
+                       
+               //varRef
+               //varQuery
+               /* F test for differences among variances.
+               * varQuery is expected to be higher or similar than varRef */
+               //float fs = varQuery[query] / varRef[query];   /* F-Snedecor, test for differences of variances */
+                       
+               bool results = false;   
+                                       
+               //confidence limit, t - Student, anova
+               outString += "\nWindow\tConfidenceLimit\tt-Student\tAnova\n";
+                       
+               for (int k = 0; k < windows.size(); k++) {
+                       string temp = "";
+                       if (isChimericConfidence[k]) {  temp += "*\t"; }
+                       else { temp += "\t"; }
+                               
+                       if (isChimericTStudent[k]) {  temp += "*\t"; }
+                       else { temp += "\t"; }
+                               
+                       if (isChimericANOVA[k]) {  temp += "*\t"; }
+                       else { temp += "\t"; }
+                       
+                       outString += toString(k+1) + "\t" + temp + "\n";
+                               
+                       if (temp == "*\t*\t*\t") {  results = true;  }
+               }
+               outString += "\n";      
+               
+               MPI_Status status;
+               int length = outString.length();
+               char* buf2 = new char[length];
+               memcpy(buf2, outString.c_str(), length);
+                               
+               MPI_File_write_shared(out, buf2, length, MPI_CHAR, &status);
+               delete buf2;
+
+               if (results) {
+                       m->mothurOut(querySeq->getName() + " was found have at least one chimeric window."); m->mothurOutEndLine();
+                       outAccString += querySeq->getName() + "\n";
+                       
+                       MPI_Status statusAcc;
+                       length = outAccString.length();
+                       char* buf = new char[length];
+                       memcpy(buf, outAccString.c_str(), length);
+                               
+                       MPI_File_write_shared(outAcc, buf, length, MPI_CHAR, &statusAcc);
+                       delete buf;
+               }
+
+               //free memory
+               for (int i = 0; i < closest.size(); i++) {  delete closest[i].seq;  }
+
+               return *querySeq;
        }
        catch(exception& e) {
-               errorOut(e, "Ccode", "print");
+               m->errorOut(e, "Ccode", "print");
                exit(1);
        }
 }
 //***************************************************************************************************************
+int Ccode::printMapping(string& output) {
+       try {
+                       MPI_Status status;
+                       int length = output.length();
+                       char* buf = new char[length];
+                       memcpy(buf, output.c_str(), length);
+                               
+                       MPI_File_write_shared(outMap, buf, length, MPI_CHAR, &status);
+                       delete buf;
+                       
+                       return 0;
+
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Ccode", "printMapping");
+               exit(1);
+       }
+}
+#endif
+//***************************************************************************************************************
 int Ccode::getChimeras(Sequence* query) {
        try {
        
@@ -156,6 +327,8 @@ int Ccode::getChimeras(Sequence* query) {
                //find closest matches to query
                closest = findClosest(query, numWanted);
                
+               if (m->control_pressed) {  return 0;  }
+               
                //initialize spotMap
                for (int i = 0; i < query->getAligned().length(); i++) {        spotMap[i] = i;         }
        
@@ -176,9 +349,12 @@ int Ccode::getChimeras(Sequence* query) {
                        for (int i = 0; i < closest.size(); i++) { temp.push_back(closest[i].seq);  }
                        temp.push_back(query);  
                        
-                       createFilter(temp);
+                       createFilter(temp, 0.5);
                
-                       for (int i = 0; i < temp.size(); i++) { runFilter(temp[i]);  }
+                       for (int i = 0; i < temp.size(); i++) { 
+                               if (m->control_pressed) {  return 0;  }
+                               runFilter(temp[i]);  
+                       }
                        
                        //update spotMap
                        map<int, int> newMap;
@@ -196,38 +372,37 @@ int Ccode::getChimeras(Sequence* query) {
 
                //trim sequences - this follows ccodes remove_extra_gaps 
                trimSequences(query);
-               
+               if (m->control_pressed) {  return 0;  }
                
                //windows are equivalent to words - ccode paper recommends windows are between 5% and 20% on alignment length().  
                //Our default will be 10% and we will warn if user tries to use a window above or below these recommendations
                windows = findWindows();  
-               
+               if (m->control_pressed) {  return 0;  }
 
                //remove sequences that are more than 20% different and less than 0.5% different - may want to allow user to specify this later 
                removeBadReferenceSeqs(closest);
-               
+               if (m->control_pressed) {  return 0;  }
                
                //find the averages for each querys references
                getAverageRef(closest);  //fills sumRef, averageRef, sumSquaredRef and refCombo.
                getAverageQuery(closest, query);  //fills sumQuery, averageQuery, sumSquaredQuery.
-                                       
+               if (m->control_pressed) {  return 0;  }                 
                
                //find the averages for each querys references 
                findVarianceRef();  //fills varRef and sdRef also sets minimum error rate to 0.001 to avoid divide by 0.
-                       
+               if (m->control_pressed) {  return 0;  } 
                        
                //find the averages for the query 
                findVarianceQuery();  //fills varQuery and sdQuery also sets minimum error rate to 0.001 to avoid divide by 0.
+               if (m->control_pressed) {  return 0;  }
                                        
                determineChimeras();  //fills anova, isChimericConfidence, isChimericTStudent and isChimericANOVA. 
-               
-               //free memory
-               for (int i = 0; i < closest.size(); i++) {  delete closest[i].seq;  }
+               if (m->control_pressed) {  return 0;  }
                
                return 0;
        }
        catch(exception& e) {
-               errorOut(e, "Ccode", "getChimeras");
+               m->errorOut(e, "Ccode", "getChimeras");
                exit(1);
        }
 }
@@ -309,7 +484,7 @@ void Ccode::trimSequences(Sequence* query) {
 
                
                //check to make sure that is not whole seq
-               if ((rearPos - frontPos - 1) <= 0) {  mothurOut("Error, when I trim your sequences, the entire sequence is trimmed."); mothurOutEndLine(); exit(1);  }
+               if ((rearPos - frontPos - 1) <= 0) {  m->mothurOut("Error, when I trim your sequences, the entire sequence is trimmed."); m->mothurOutEndLine(); exit(1);  }
                
                map<int, int> tempTrim;
                tempTrim[frontPos] = rearPos;
@@ -329,7 +504,7 @@ void Ccode::trimSequences(Sequence* query) {
                spotMap = newMap;
        }
        catch(exception& e) {
-               errorOut(e, "Ccode", "trimSequences");
+               m->errorOut(e, "Ccode", "trimSequences");
                exit(1);
        }
 }
@@ -344,13 +519,13 @@ vector<int> Ccode::findWindows() {
        
                //default is wanted = 10% of total length
                if (windowSizes > length) { 
-                       mothurOut("You have slected a window larger than your sequence length after all filters, masks and trims have been done. I will use the default 10% of sequence length.");
+                       m->mothurOut("You have slected a window larger than your sequence length after all filters, masks and trims have been done. I will use the default 10% of sequence length.");
                        windowSizes = length / 10;
                }else if (windowSizes == 0) { windowSizes = length / 10;  }
                else if (windowSizes > (length * 0.20)) {
-                       mothurOut("You have selected a window that is larger than 20% of your sequence length.  This is not recommended, but I will continue anyway."); mothurOutEndLine();
+                       m->mothurOut("You have selected a window that is larger than 20% of your sequence length.  This is not recommended, but I will continue anyway."); m->mothurOutEndLine();
                }else if (windowSizes < (length * 0.05)) {
-                       mothurOut("You have selected a window that is smaller than 5% of your sequence length.  This is not recommended, but I will continue anyway."); mothurOutEndLine();
+                       m->mothurOut("You have selected a window that is smaller than 5% of your sequence length.  This is not recommended, but I will continue anyway."); m->mothurOutEndLine();
                }
                
                //save starting points of each window
@@ -364,7 +539,7 @@ vector<int> Ccode::findWindows() {
                return win;
        }
        catch(exception& e) {
-               errorOut(e, "Ccode", "findWindows");
+               m->errorOut(e, "Ccode", "findWindows");
                exit(1);
        }
 }
@@ -419,7 +594,7 @@ int Ccode::getDiff(string seqA, string seqB) {
        
        }
        catch(exception& e) {
-               errorOut(e, "Ccode", "getDiff");
+               m->errorOut(e, "Ccode", "getDiff");
                exit(1);
        }
 }
@@ -489,12 +664,12 @@ void Ccode::removeBadReferenceSeqs(vector<SeqDist>& seqs) {
                        seqs = goodSeqs;
                        
                }else { //warn, but dont remove any
-                       mothurOut(querySeq->getName() + " does not have an adaquate number of reference sequences that are within 20% and 0.5% similarity.  I will continue, but please check."); mothurOutEndLine();  
+                       m->mothurOut(querySeq->getName() + " does not have an adaquate number of reference sequences that are within 20% and 0.5% similarity.  I will continue, but please check."); m->mothurOutEndLine();  
                }
 
        }
        catch(exception& e) {
-               errorOut(e, "Ccode", "removeBadReferenceSeqs");
+               m->errorOut(e, "Ccode", "removeBadReferenceSeqs");
                exit(1);
        }
 }
@@ -525,12 +700,16 @@ vector<SeqDist>  Ccode::findClosest(Sequence* q, int numWanted) {
                }
                        
                sort(topMatches.begin(), topMatches.end(), compareSeqDist);
+               
+               for (int i = numWanted; i < topMatches.size(); i++) {  delete topMatches[i].seq;  }
+               
+               topMatches.resize(numWanted);
                        
                return topMatches;
 
        }
        catch(exception& e) {
-               errorOut(e, "Ccode", "findClosestSides");
+               m->errorOut(e, "Ccode", "findClosestSides");
                exit(1);
        }
 }
@@ -617,7 +796,7 @@ void Ccode::getAverageRef(vector<SeqDist> ref) {
                
        }
        catch(exception& e) {
-               errorOut(e, "Ccode", "getAverageRef");
+               m->errorOut(e, "Ccode", "getAverageRef");
                exit(1);
        }
 }
@@ -686,7 +865,7 @@ void Ccode::getAverageQuery (vector<SeqDist> ref, Sequence* query) {
                }
        }
        catch(exception& e) {
-               errorOut(e, "Ccode", "getAverageQuery");
+               m->errorOut(e, "Ccode", "getAverageQuery");
                exit(1);
        }
 }
@@ -712,7 +891,7 @@ void Ccode::findVarianceRef() {
                }
        }
        catch(exception& e) {
-               errorOut(e, "Ccode", "findVarianceRef");
+               m->errorOut(e, "Ccode", "findVarianceRef");
                exit(1);
        }
 }
@@ -737,7 +916,7 @@ void Ccode::findVarianceQuery() {
 
        }
        catch(exception& e) {
-               errorOut(e, "Ccode", "findVarianceQuery");
+               m->errorOut(e, "Ccode", "findVarianceQuery");
                exit(1);
        }
 }
@@ -799,7 +978,7 @@ void Ccode::determineChimeras() {
                
        }
        catch(exception& e) {
-               errorOut(e, "Ccode", "determineChimeras");
+               m->errorOut(e, "Ccode", "determineChimeras");
                exit(1);
        }
 }
@@ -844,13 +1023,13 @@ float Ccode::getT(int numseq) {
         else if (numseq > 2) tvalue = 2.353;
         else if (numseq > 1) tvalue = 2.920;
                else if (numseq <= 1) {
-                       mothurOut("Two or more reference sequences are required, your data will be flawed.\n"); mothurOutEndLine();
+                       m->mothurOut("Two or more reference sequences are required, your data will be flawed.\n"); m->mothurOutEndLine();
                }
                
                return tvalue;
        }
        catch(exception& e) {
-               errorOut(e, "Ccode", "getT");
+               m->errorOut(e, "Ccode", "getT");
                exit(1);
        }
 }
@@ -896,13 +1075,13 @@ float Ccode::getF(int numseq) {
         else if (numseq > 1) fvalue = 18.5;
         else if (numseq > 0) fvalue = 161;
                else if (numseq <= 0) {
-                       mothurOut("Two or more reference sequences are required, your data will be flawed.\n"); mothurOutEndLine();
+                       m->mothurOut("Two or more reference sequences are required, your data will be flawed.\n"); m->mothurOutEndLine();
         }
                
                return fvalue;
        }
        catch(exception& e) {
-               errorOut(e, "Ccode", "getF");
+               m->errorOut(e, "Ccode", "getF");
                exit(1);
        }
 }