]> git.donarmstrong.com Git - mothur.git/blobdiff - chimeracheckrdp.cpp
created mothurOut class to handle logfiles
[mothur.git] / chimeracheckrdp.cpp
index 12785137232677e3a056a43fd31b705f9b618734..3e6dacdee85dbb75fb9b6535f259eead8a4f2509 100644 (file)
 #include "chimeracheckrdp.h"
                
 //***************************************************************************************************************
-ChimeraCheckRDP::ChimeraCheckRDP(string filename, string temp) {  fastafile = filename;  templateFile = temp;  }
+ChimeraCheckRDP::ChimeraCheckRDP(string filename,  string o) {  fastafile = filename;  outputDir = o;  }
 //***************************************************************************************************************
 
 ChimeraCheckRDP::~ChimeraCheckRDP() {
        try {
-               for (int i = 0; i < querySeqs.size(); i++)              {  delete querySeqs[i];         }
                delete templateDB;
                delete kmer;
        }
        catch(exception& e) {
-               errorOut(e, "ChimeraCheckRDP", "~AlignSim");
+               m->errorOut(e, "ChimeraCheckRDP", "~AlignSim");
                exit(1);
        }
 }      
 //***************************************************************************************************************
-void ChimeraCheckRDP::print(ostream& out) {
+void ChimeraCheckRDP::print(ostream& out, ostream& outAcc) {
        try {
                
-               mothurOutEndLine();
+               m->mothurOut("Processing: " + querySeq->getName()); m->mothurOutEndLine();
                
-               //vector<bool> isChimeric;  isChimeric.resize(querySeqs.size(), false);
-               
-               for (int i = 0; i < querySeqs.size(); i++) {
+               out << querySeq->getName() << endl;
+               out << "IS scores: " << '\t';
                        
-                               out << querySeqs[i]->getName() << endl;
-                               out << "IS scores: " << '\t';
-                               
-                               //int lastChimericWindowFound = 0;
-                               
-                               for (int k = 0; k < IS[i].size(); k++) {
-                                       out << IS[i][k].score << '\t'; 
-                                       //if (IS[i][k].score > chimeraCutoff) {  isChimeric[i] = true;   lastChimericWindowFound = k;           }                       
-                               }
-                               out << endl;
-                               //if (isChimeric[i]) { 
-                                       //mothurOut(querySeqs[i]->getName() + "\tIS: " + toString(IS[i][lastChimericWindowFound].score) + "\tbreakpoint: " + toString(IS[i][lastChimericWindowFound].midpoint) + "\tleft parent: " + IS[i][lastChimericWindowFound].leftParent + "\tright parent: " + IS[i][lastChimericWindowFound].rightParent); mothurOutEndLine();
-                                       //out << endl << "chimera: YES" << endl;
-                               //}else{
-                                       //out << endl << "chimera: NO" << endl;
-                               //}
+               for (int k = 0; k < IS.size(); k++) {
+                       out << IS[k].score << '\t'; 
+               }
+               out << endl;
+               
+               if (svg) {
+                       if (name != "") { //if user has specific names
+                               map<string, string>::iterator it = names.find(querySeq->getName());
                                
-                               if (svg) {
-                                       
-                                       if (name != "") { //if user has specific names
-                                               map<string, string>::iterator it = names.find(querySeqs[i]->getName());
-                                       
-                                               if (it != names.end()) { //user wants pic of this
-                                                       makeSVGpic(IS[i], i);  //zeros out negative results
-                                               }
-                                       }else{//output them all
-                                               makeSVGpic(IS[i], i);  //zeros out negative results
-                                       }
+                               if (it != names.end()) { //user wants pic of this
+                                       makeSVGpic(IS);  //zeros out negative results
                                }
+                       }else{//output them all
+                               makeSVGpic(IS);  //zeros out negative results
+                       }
                }
-               
-               mothurOut("This method does not determine if a sequence is chimeric, but allows you to make that determination based on the IS values."); mothurOutEndLine();
        }
        catch(exception& e) {
-               errorOut(e, "ChimeraCheckRDP", "print");
+               m->errorOut(e, "ChimeraCheckRDP", "print");
                exit(1);
        }
 }
-
 //***************************************************************************************************************
-int ChimeraCheckRDP::getChimeras() {
+void ChimeraCheckRDP::doPrep() {
        try {
-               
-               //read in query sequences and subject sequences
-               mothurOutEndLine();
-               mothurOut("Reading query sequences... "); cout.flush();
-               querySeqs = readSeqs(fastafile);
-               mothurOut("Done."); 
-               //templateSeqs = readSeqs(templateFile);
-               templateDB = new AlignmentDB(templateFile, "kmer", kmerSize, 0.0,0.0,0.0,0.0);
-               mothurOutEndLine();
-               
-               int numSeqs = querySeqs.size();
-               
-               IS.resize(numSeqs);
-               closest.resize(numSeqs);
-               
-               //break up file if needed
-               int linesPerProcess = numSeqs / processors ;
-               
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                       //find breakup of sequences for all times we will Parallelize
-                       if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
-                       else {
-                               //fill line pairs
-                               for (int i = 0; i < (processors-1); i++) {                      
-                                       lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
-                               }
-                               //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
-                               int i = processors - 1;
-                               lines.push_back(new linePair((i*linesPerProcess), numSeqs));
-                       }
-                       
-               #else
-                       lines.push_back(new linePair(0, numSeqs));
-               #endif
+               templateDB = new AlignmentDB(templateFileName, "kmer", kmerSize, 0.0,0.0,0.0,0.0);
+               m->mothurOutEndLine();
                
                kmer = new Kmer(kmerSize);
                
                if (name != "") { 
                        readName(name);  //fills name map with names of seqs the user wants to have .svg for.  
                }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraCheckRDP", "doPrep");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+int ChimeraCheckRDP::getChimeras(Sequence* query) {
+       try {
                
-               //find closest seq to each querySeq
-               for (int i = 0; i < querySeqs.size(); i++) {
-                       closest[i] = templateDB->findClosestSequence(querySeqs[i]);  
-               }
-
-               //for each query find IS value  
-               if (processors == 1) {
-                       for (int i = 0; i < querySeqs.size(); i++) {
-                               IS[i] = findIS(i); 
-                       }
-               }else {         createProcessesIS();    }
-               
+               IS.clear();
+                               
+               querySeq = query;
+                       
+               closest = templateDB->findClosestSequence(query);  
+       
+               IS = findIS(); 
+                                       
                //determine chimera report cutoff - window score above 95%
                //getCutoff();  - not very acurate predictor
                
-               //free memory
-               for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];        }
-       
                return 0;
        }
        catch(exception& e) {
-               errorOut(e, "ChimeraCheckRDP", "getChimeras");
+               m->errorOut(e, "ChimeraCheckRDP", "getChimeras");
                exit(1);
        }
 }
 //***************************************************************************************************************
-vector<sim> ChimeraCheckRDP::findIS(int query) {
+vector<sim> ChimeraCheckRDP::findIS() {
        try {
                
                
@@ -154,13 +105,11 @@ vector<sim> ChimeraCheckRDP::findIS(int query) {
                vector< map<int, int> > subjectKmerInfo;
                
                vector<sim>  isValues;
-               string queryName = querySeqs[query]->getName();
-               string seq = querySeqs[query]->getUnaligned();
-               
-               mothurOut("Finding IS values for sequence " + toString(query+1)); mothurOutEndLine();
+               string queryName = querySeq->getName();
+               string seq = querySeq->getUnaligned();
                
                queryKmerInfo = kmer->getKmerCounts(seq);
-               subjectKmerInfo = kmer->getKmerCounts(closest[query].getUnaligned());
+               subjectKmerInfo = kmer->getKmerCounts(closest.getUnaligned());
                
                //find total kmers you have in common with closest[query] by looking at the last entry in the vector of maps for each
                int nTotal = calcKmers(queryKmerInfo[(queryKmerInfo.size()-1)], subjectKmerInfo[(subjectKmerInfo.size()-1)]);
@@ -169,14 +118,14 @@ vector<sim> ChimeraCheckRDP::findIS(int query) {
                int start = seq.length() / 10;
                        
                //for each window
-               for (int m = start; m < (seq.length() - start); m+=increment) {
+               for (int f = start; f < (seq.length() - start); f+=increment) {
                        
-                       if ((m - kmerSize) < 0)  { mothurOut("Your sequence is too short for your kmerSize."); mothurOutEndLine(); exit(1); }
+                       if ((f - kmerSize) < 0)  { m->mothurOut("Your sequence is too short for your kmerSize."); m->mothurOutEndLine(); exit(1); }
                        
                        sim temp;
                        
-                       string fragLeft = seq.substr(0, m);  //left side of breakpoint
-                       string fragRight = seq.substr(m);  //right side of breakpoint
+                       string fragLeft = seq.substr(0, f);  //left side of breakpoint
+                       string fragRight = seq.substr(f);  //right side of breakpoint
                        
                        //make a sequence of the left side and right side
                        Sequence* left = new Sequence(queryName, fragLeft);
@@ -196,7 +145,7 @@ vector<sim> ChimeraCheckRDP::findIS(int query) {
                        //right side is tricky - since the counts grow on eachother to find the correct counts of only the right side you must subtract the counts of the left side
                        //iterate through left sides map to subtract the number of times you saw things before you got the the right side
                        map<int, int> rightside = queryKmerInfo[queryKmerInfo.size()-1];
-                       for (map<int, int>::iterator itleft = queryKmerInfo[m-kmerSize].begin(); itleft != queryKmerInfo[m-kmerSize].end(); itleft++) {
+                       for (map<int, int>::iterator itleft = queryKmerInfo[f-kmerSize].begin(); itleft != queryKmerInfo[f-kmerSize].end(); itleft++) {
                                int howManyTotal = queryKmerInfo[queryKmerInfo.size()-1][itleft->first];   //times that kmer was seen in total
 
                                //itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side
@@ -209,7 +158,7 @@ vector<sim> ChimeraCheckRDP::findIS(int query) {
                        }
                        
                        map<int, int> closerightside = closeRightKmerInfo[closeRightKmerInfo.size()-1];
-                       for (map<int, int>::iterator itright = closeRightKmerInfo[m-kmerSize].begin(); itright != closeRightKmerInfo[m-kmerSize].end(); itright++) {
+                       for (map<int, int>::iterator itright = closeRightKmerInfo[f-kmerSize].begin(); itright != closeRightKmerInfo[f-kmerSize].end(); itright++) {
                                int howManyTotal = closeRightKmerInfo[(closeRightKmerInfo.size()-1)][itright->first];   //times that kmer was seen in total
 
                                //itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side
@@ -222,7 +171,7 @@ vector<sim> ChimeraCheckRDP::findIS(int query) {
                        }
 
                        
-                       int nLeft = calcKmers(closeLeftKmerInfo[m-kmerSize], queryKmerInfo[m-kmerSize]);
+                       int nLeft = calcKmers(closeLeftKmerInfo[f-kmerSize], queryKmerInfo[f-kmerSize]);
 
                        int nRight = calcKmers(closerightside, rightside);
 
@@ -232,19 +181,19 @@ vector<sim> ChimeraCheckRDP::findIS(int query) {
                        temp.leftParent = closestLeft.getName();
                        temp.rightParent = closestRight.getName();
                        temp.score = is;
-                       temp.midpoint = m;
+                       temp.midpoint = f;
                        
                        isValues.push_back(temp);
                        
                        delete left;
                        delete right;
-               }       
+               }
                
                return isValues;
        
        }
        catch(exception& e) {
-               errorOut(e, "ChimeraCheckRDP", "findIS");
+               m->errorOut(e, "ChimeraCheckRDP", "findIS");
                exit(1);
        }
 }
@@ -266,7 +215,7 @@ void ChimeraCheckRDP::readName(string namefile) {
        
        }
        catch(exception& e) {
-               errorOut(e, "ChimeraCheckRDP", "readName");
+               m->errorOut(e, "ChimeraCheckRDP", "readName");
                exit(1);
        }
 }
@@ -303,42 +252,16 @@ int ChimeraCheckRDP::calcKmers(map<int, int> query, map<int, int> subject) {
                
        }
        catch(exception& e) {
-               errorOut(e, "ChimeraCheckRDP", "calcKmers");
-               exit(1);
-       }
-}
-
-//***************************************************************************************************************
-void ChimeraCheckRDP::getCutoff() {
-       try{
-               
-               vector<float> temp;
-               
-               //store all is scores for all windows
-               for (int i = 0; i < IS.size(); i++) {
-                       for (int j = 0; j < IS[i].size(); j++) {
-                               temp.push_back(IS[i][j].score);
-                       }
-               }
-               
-               //sort them
-               sort(temp.begin(), temp.end());
-               
-               //get 95%
-               chimeraCutoff = temp[int(temp.size() * 0.95)];
-
-       }
-       catch(exception& e) {
-               errorOut(e, "ChimeraCheckRDP", "getCutoff");
+               m->errorOut(e, "ChimeraCheckRDP", "calcKmers");
                exit(1);
        }
 }
 
 //***************************************************************************************************************
-void ChimeraCheckRDP::makeSVGpic(vector<sim> info, int query) {
+void ChimeraCheckRDP::makeSVGpic(vector<sim> info) {
        try{
                
-               string file = querySeqs[query]->getName() + ".chimeracheck.svg";
+               string file = outputDir + querySeq->getName() + ".chimeracheck.svg";
                ofstream outsvg;
                openOutputFile(file, outsvg);
                
@@ -346,7 +269,7 @@ void ChimeraCheckRDP::makeSVGpic(vector<sim> info, int query) {
                
                outsvg << "<svg xmlns:svg=\"http://www.w3.org/2000/svg\" xmlns=\"http://www.w3.org/2000/svg\" width=\"100%\" height=\"100%\" viewBox=\"0 0 700 " + toString(width) + "\">\n";
                outsvg << "<g>\n";
-               outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"25\">Plotted IS values for " + querySeqs[query]->getName() + "</text>\n";
+               outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"25\">Plotted IS values for " + querySeq->getName() + "</text>\n";
                
                outsvg <<  "<line x1=\"75\" y1=\"600\" x2=\"" + toString((info.size()*5) + 75) + "\" y2=\"600\" stroke=\"black\" stroke-width=\"2\"/>\n";  
                outsvg <<  "<line x1=\"75\" y1=\"600\" x2=\"75\" y2=\"125\" stroke=\"black\" stroke-width=\"2\"/>\n";
@@ -387,101 +310,10 @@ void ChimeraCheckRDP::makeSVGpic(vector<sim> info, int query) {
 
        }
        catch(exception& e) {
-               errorOut(e, "ChimeraCheckRDP", "makeSVGpic");
+               m->errorOut(e, "ChimeraCheckRDP", "makeSVGpic");
                exit(1);
        }
 }
 //***************************************************************************************************************
-void ChimeraCheckRDP::createProcessesIS() {
-       try {
-       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               vector<int> processIDS;
-               
-               //loop through and create all the processes you want
-               while (process != processors) {
-                       int pid = fork();
-                       
-                       if (pid > 0) {
-                               processIDS.push_back(pid);  
-                               process++;
-                       }else if (pid == 0){
-                                                       
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       IS[i] = findIS(i);  
-                               }                               
-                               
-                               //write out data to file so parent can read it
-                               ofstream out;
-                               string s = toString(getpid()) + ".temp";
-                               openOutputFile(s, out);
-                               
-                               //output pairs
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       out << IS[i].size() << endl;
-                                       for (int j = 0; j < IS[i].size(); j++) {
-                                               out << IS[i][j].leftParent << '\t'<< IS[i][j].rightParent << '\t' << IS[i][j].midpoint << '\t' << IS[i][j].score << endl;
-                                       }
-                               }
-
-                               out.close();
-                               
-                               exit(0);
-                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
-               }
-               
-               //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
-                       int temp = processIDS[i];
-                       wait(&temp);
-               }
-       
-               //get data created by processes
-               for (int i=0;i<processors;i++) { 
-                       ifstream in;
-                       string s = toString(processIDS[i]) + ".temp";
-                       openInputFile(s, in);
-               
-                       //get pairs
-                       for (int k = lines[i]->start; k < lines[i]->end; k++) {
-                       
-                               int size;
-                               in >> size; gobble(in);
-                               
-                               string left, right;
-                               int mid;
-                               float score;
-                               
-                               IS[k].clear();
-                               
-                               for (int j = 0; j < size; j++) {
-                                       in >> left >> right >> mid >> score;  gobble(in);
-                                       
-                                       sim temp;
-                                       temp.leftParent = left;
-                                       temp.rightParent = right;
-                                       temp.midpoint = mid;
-                                       temp.score = score;
-                                       
-                                       IS[k].push_back(temp);
-                               }
-                       }
-                       
-                       in.close();
-                       remove(s.c_str());
-               }
-#else
-                       for (int i = 0; i < querySeqs.size(); i++) {
-                               IS[i] = findIS(i);
-                       }
-#endif         
-       }
-       catch(exception& e) {
-               errorOut(e, "ChimeraCheckRDP", "createProcessesIS");
-               exit(1);
-       }
-}
-
-//***************************************************************************************************************