X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=chimeracheckrdp.cpp;h=3e6dacdee85dbb75fb9b6535f259eead8a4f2509;hb=74844a60d80c6dd06e3fb02ee9b928424f9019b0;hp=12785137232677e3a056a43fd31b705f9b618734;hpb=a5afca18544555fba2d9c3670ad1f8574916b0a0;p=mothur.git diff --git a/chimeracheckrdp.cpp b/chimeracheckrdp.cpp index 1278513..3e6dacd 100644 --- a/chimeracheckrdp.cpp +++ b/chimeracheckrdp.cpp @@ -10,140 +10,91 @@ #include "chimeracheckrdp.h" //*************************************************************************************************************** -ChimeraCheckRDP::ChimeraCheckRDP(string filename, string temp) { fastafile = filename; templateFile = temp; } +ChimeraCheckRDP::ChimeraCheckRDP(string filename, string o) { fastafile = filename; outputDir = o; } //*************************************************************************************************************** ChimeraCheckRDP::~ChimeraCheckRDP() { try { - for (int i = 0; i < querySeqs.size(); i++) { delete querySeqs[i]; } delete templateDB; delete kmer; } catch(exception& e) { - errorOut(e, "ChimeraCheckRDP", "~AlignSim"); + m->errorOut(e, "ChimeraCheckRDP", "~AlignSim"); exit(1); } } //*************************************************************************************************************** -void ChimeraCheckRDP::print(ostream& out) { +void ChimeraCheckRDP::print(ostream& out, ostream& outAcc) { try { - mothurOutEndLine(); + m->mothurOut("Processing: " + querySeq->getName()); m->mothurOutEndLine(); - //vector isChimeric; isChimeric.resize(querySeqs.size(), false); - - for (int i = 0; i < querySeqs.size(); i++) { + out << querySeq->getName() << endl; + out << "IS scores: " << '\t'; - out << querySeqs[i]->getName() << endl; - out << "IS scores: " << '\t'; - - //int lastChimericWindowFound = 0; - - for (int k = 0; k < IS[i].size(); k++) { - out << IS[i][k].score << '\t'; - //if (IS[i][k].score > chimeraCutoff) { isChimeric[i] = true; lastChimericWindowFound = k; } - } - out << endl; - //if (isChimeric[i]) { - //mothurOut(querySeqs[i]->getName() + "\tIS: " + toString(IS[i][lastChimericWindowFound].score) + "\tbreakpoint: " + toString(IS[i][lastChimericWindowFound].midpoint) + "\tleft parent: " + IS[i][lastChimericWindowFound].leftParent + "\tright parent: " + IS[i][lastChimericWindowFound].rightParent); mothurOutEndLine(); - //out << endl << "chimera: YES" << endl; - //}else{ - //out << endl << "chimera: NO" << endl; - //} + for (int k = 0; k < IS.size(); k++) { + out << IS[k].score << '\t'; + } + out << endl; + + if (svg) { + if (name != "") { //if user has specific names + map::iterator it = names.find(querySeq->getName()); - if (svg) { - - if (name != "") { //if user has specific names - map::iterator it = names.find(querySeqs[i]->getName()); - - if (it != names.end()) { //user wants pic of this - makeSVGpic(IS[i], i); //zeros out negative results - } - }else{//output them all - makeSVGpic(IS[i], i); //zeros out negative results - } + if (it != names.end()) { //user wants pic of this + makeSVGpic(IS); //zeros out negative results } + }else{//output them all + makeSVGpic(IS); //zeros out negative results + } } - - mothurOut("This method does not determine if a sequence is chimeric, but allows you to make that determination based on the IS values."); mothurOutEndLine(); } catch(exception& e) { - errorOut(e, "ChimeraCheckRDP", "print"); + m->errorOut(e, "ChimeraCheckRDP", "print"); exit(1); } } - //*************************************************************************************************************** -int ChimeraCheckRDP::getChimeras() { +void ChimeraCheckRDP::doPrep() { try { - - //read in query sequences and subject sequences - mothurOutEndLine(); - mothurOut("Reading query sequences... "); cout.flush(); - querySeqs = readSeqs(fastafile); - mothurOut("Done."); - //templateSeqs = readSeqs(templateFile); - templateDB = new AlignmentDB(templateFile, "kmer", kmerSize, 0.0,0.0,0.0,0.0); - mothurOutEndLine(); - - int numSeqs = querySeqs.size(); - - IS.resize(numSeqs); - closest.resize(numSeqs); - - //break up file if needed - int linesPerProcess = numSeqs / processors ; - - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - //find breakup of sequences for all times we will Parallelize - if (processors == 1) { lines.push_back(new linePair(0, numSeqs)); } - else { - //fill line pairs - for (int i = 0; i < (processors-1); i++) { - lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess))); - } - //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end - int i = processors - 1; - lines.push_back(new linePair((i*linesPerProcess), numSeqs)); - } - - #else - lines.push_back(new linePair(0, numSeqs)); - #endif + templateDB = new AlignmentDB(templateFileName, "kmer", kmerSize, 0.0,0.0,0.0,0.0); + m->mothurOutEndLine(); kmer = new Kmer(kmerSize); if (name != "") { readName(name); //fills name map with names of seqs the user wants to have .svg for. } + } + catch(exception& e) { + m->errorOut(e, "ChimeraCheckRDP", "doPrep"); + exit(1); + } +} +//*************************************************************************************************************** +int ChimeraCheckRDP::getChimeras(Sequence* query) { + try { - //find closest seq to each querySeq - for (int i = 0; i < querySeqs.size(); i++) { - closest[i] = templateDB->findClosestSequence(querySeqs[i]); - } - - //for each query find IS value - if (processors == 1) { - for (int i = 0; i < querySeqs.size(); i++) { - IS[i] = findIS(i); - } - }else { createProcessesIS(); } - + IS.clear(); + + querySeq = query; + + closest = templateDB->findClosestSequence(query); + + IS = findIS(); + //determine chimera report cutoff - window score above 95% //getCutoff(); - not very acurate predictor - //free memory - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } - return 0; } catch(exception& e) { - errorOut(e, "ChimeraCheckRDP", "getChimeras"); + m->errorOut(e, "ChimeraCheckRDP", "getChimeras"); exit(1); } } //*************************************************************************************************************** -vector ChimeraCheckRDP::findIS(int query) { +vector ChimeraCheckRDP::findIS() { try { @@ -154,13 +105,11 @@ vector ChimeraCheckRDP::findIS(int query) { vector< map > subjectKmerInfo; vector isValues; - string queryName = querySeqs[query]->getName(); - string seq = querySeqs[query]->getUnaligned(); - - mothurOut("Finding IS values for sequence " + toString(query+1)); mothurOutEndLine(); + string queryName = querySeq->getName(); + string seq = querySeq->getUnaligned(); queryKmerInfo = kmer->getKmerCounts(seq); - subjectKmerInfo = kmer->getKmerCounts(closest[query].getUnaligned()); + subjectKmerInfo = kmer->getKmerCounts(closest.getUnaligned()); //find total kmers you have in common with closest[query] by looking at the last entry in the vector of maps for each int nTotal = calcKmers(queryKmerInfo[(queryKmerInfo.size()-1)], subjectKmerInfo[(subjectKmerInfo.size()-1)]); @@ -169,14 +118,14 @@ vector ChimeraCheckRDP::findIS(int query) { int start = seq.length() / 10; //for each window - for (int m = start; m < (seq.length() - start); m+=increment) { + for (int f = start; f < (seq.length() - start); f+=increment) { - if ((m - kmerSize) < 0) { mothurOut("Your sequence is too short for your kmerSize."); mothurOutEndLine(); exit(1); } + if ((f - kmerSize) < 0) { m->mothurOut("Your sequence is too short for your kmerSize."); m->mothurOutEndLine(); exit(1); } sim temp; - string fragLeft = seq.substr(0, m); //left side of breakpoint - string fragRight = seq.substr(m); //right side of breakpoint + string fragLeft = seq.substr(0, f); //left side of breakpoint + string fragRight = seq.substr(f); //right side of breakpoint //make a sequence of the left side and right side Sequence* left = new Sequence(queryName, fragLeft); @@ -196,7 +145,7 @@ vector ChimeraCheckRDP::findIS(int query) { //right side is tricky - since the counts grow on eachother to find the correct counts of only the right side you must subtract the counts of the left side //iterate through left sides map to subtract the number of times you saw things before you got the the right side map rightside = queryKmerInfo[queryKmerInfo.size()-1]; - for (map::iterator itleft = queryKmerInfo[m-kmerSize].begin(); itleft != queryKmerInfo[m-kmerSize].end(); itleft++) { + for (map::iterator itleft = queryKmerInfo[f-kmerSize].begin(); itleft != queryKmerInfo[f-kmerSize].end(); itleft++) { int howManyTotal = queryKmerInfo[queryKmerInfo.size()-1][itleft->first]; //times that kmer was seen in total //itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side @@ -209,7 +158,7 @@ vector ChimeraCheckRDP::findIS(int query) { } map closerightside = closeRightKmerInfo[closeRightKmerInfo.size()-1]; - for (map::iterator itright = closeRightKmerInfo[m-kmerSize].begin(); itright != closeRightKmerInfo[m-kmerSize].end(); itright++) { + for (map::iterator itright = closeRightKmerInfo[f-kmerSize].begin(); itright != closeRightKmerInfo[f-kmerSize].end(); itright++) { int howManyTotal = closeRightKmerInfo[(closeRightKmerInfo.size()-1)][itright->first]; //times that kmer was seen in total //itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side @@ -222,7 +171,7 @@ vector ChimeraCheckRDP::findIS(int query) { } - int nLeft = calcKmers(closeLeftKmerInfo[m-kmerSize], queryKmerInfo[m-kmerSize]); + int nLeft = calcKmers(closeLeftKmerInfo[f-kmerSize], queryKmerInfo[f-kmerSize]); int nRight = calcKmers(closerightside, rightside); @@ -232,19 +181,19 @@ vector ChimeraCheckRDP::findIS(int query) { temp.leftParent = closestLeft.getName(); temp.rightParent = closestRight.getName(); temp.score = is; - temp.midpoint = m; + temp.midpoint = f; isValues.push_back(temp); delete left; delete right; - } + } return isValues; } catch(exception& e) { - errorOut(e, "ChimeraCheckRDP", "findIS"); + m->errorOut(e, "ChimeraCheckRDP", "findIS"); exit(1); } } @@ -266,7 +215,7 @@ void ChimeraCheckRDP::readName(string namefile) { } catch(exception& e) { - errorOut(e, "ChimeraCheckRDP", "readName"); + m->errorOut(e, "ChimeraCheckRDP", "readName"); exit(1); } } @@ -303,42 +252,16 @@ int ChimeraCheckRDP::calcKmers(map query, map subject) { } catch(exception& e) { - errorOut(e, "ChimeraCheckRDP", "calcKmers"); - exit(1); - } -} - -//*************************************************************************************************************** -void ChimeraCheckRDP::getCutoff() { - try{ - - vector temp; - - //store all is scores for all windows - for (int i = 0; i < IS.size(); i++) { - for (int j = 0; j < IS[i].size(); j++) { - temp.push_back(IS[i][j].score); - } - } - - //sort them - sort(temp.begin(), temp.end()); - - //get 95% - chimeraCutoff = temp[int(temp.size() * 0.95)]; - - } - catch(exception& e) { - errorOut(e, "ChimeraCheckRDP", "getCutoff"); + m->errorOut(e, "ChimeraCheckRDP", "calcKmers"); exit(1); } } //*************************************************************************************************************** -void ChimeraCheckRDP::makeSVGpic(vector info, int query) { +void ChimeraCheckRDP::makeSVGpic(vector info) { try{ - string file = querySeqs[query]->getName() + ".chimeracheck.svg"; + string file = outputDir + querySeq->getName() + ".chimeracheck.svg"; ofstream outsvg; openOutputFile(file, outsvg); @@ -346,7 +269,7 @@ void ChimeraCheckRDP::makeSVGpic(vector info, int query) { outsvg << "\n"; outsvg << "\n"; - outsvg << "Plotted IS values for " + querySeqs[query]->getName() + "\n"; + outsvg << "Plotted IS values for " + querySeq->getName() + "\n"; outsvg << "\n"; outsvg << "\n"; @@ -387,101 +310,10 @@ void ChimeraCheckRDP::makeSVGpic(vector info, int query) { } catch(exception& e) { - errorOut(e, "ChimeraCheckRDP", "makeSVGpic"); + m->errorOut(e, "ChimeraCheckRDP", "makeSVGpic"); exit(1); } } //*************************************************************************************************************** -void ChimeraCheckRDP::createProcessesIS() { - try { - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - int process = 0; - vector processIDS; - - //loop through and create all the processes you want - while (process != processors) { - int pid = fork(); - - if (pid > 0) { - processIDS.push_back(pid); - process++; - }else if (pid == 0){ - - for (int i = lines[process]->start; i < lines[process]->end; i++) { - IS[i] = findIS(i); - } - - //write out data to file so parent can read it - ofstream out; - string s = toString(getpid()) + ".temp"; - openOutputFile(s, out); - - //output pairs - for (int i = lines[process]->start; i < lines[process]->end; i++) { - out << IS[i].size() << endl; - for (int j = 0; j < IS[i].size(); j++) { - out << IS[i][j].leftParent << '\t'<< IS[i][j].rightParent << '\t' << IS[i][j].midpoint << '\t' << IS[i][j].score << endl; - } - } - - out.close(); - - exit(0); - }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); } - } - - //force parent to wait until all the processes are done - for (int i=0;istart; k < lines[i]->end; k++) { - - int size; - in >> size; gobble(in); - - string left, right; - int mid; - float score; - - IS[k].clear(); - - for (int j = 0; j < size; j++) { - in >> left >> right >> mid >> score; gobble(in); - - sim temp; - temp.leftParent = left; - temp.rightParent = right; - temp.midpoint = mid; - temp.score = score; - - IS[k].push_back(temp); - } - } - - in.close(); - remove(s.c_str()); - } -#else - for (int i = 0; i < querySeqs.size(); i++) { - IS[i] = findIS(i); - } -#endif - } - catch(exception& e) { - errorOut(e, "ChimeraCheckRDP", "createProcessesIS"); - exit(1); - } -} - -//***************************************************************************************************************