X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=chimeracheckrdp.cpp;h=42b531282c5f020664277a9e907699a3bf227658;hp=f9b2319a0dfc489c41d1634c2e4519e3db73bdc2;hb=1a20e24ee786195ab0e1cccd4f5aede7a88f3f4e;hpb=ef3f6d42fe720cd6d91419e5e32f8c04d8765010 diff --git a/chimeracheckrdp.cpp b/chimeracheckrdp.cpp index f9b2319..42b5312 100644 --- a/chimeracheckrdp.cpp +++ b/chimeracheckrdp.cpp @@ -10,139 +10,143 @@ #include "chimeracheckrdp.h" //*************************************************************************************************************** -ChimeraCheckRDP::ChimeraCheckRDP(string filename, string temp) { fastafile = filename; templateFile = temp; } +ChimeraCheckRDP::ChimeraCheckRDP(string filename, string temp, string n, bool s, int inc, int k, string o) : Chimera() { + try { + fastafile = filename; + templateFileName = temp; + name = n; + svg = s; + increment = inc; + kmerSize = k; + outputDir = o; + + templateDB = new AlignmentDB(templateFileName, "kmer", kmerSize, 0.0,0.0,0.0,0.0, rand()); + m->mothurOutEndLine(); + + kmer = new Kmer(kmerSize); + + if (name != "") { + readName(name); //fills name map with names of seqs the user wants to have .svg for. + } + } + catch(exception& e) { + m->errorOut(e, "ChimeraCheckRDP", "ChimeraCheckRDP"); + exit(1); + } +} //*************************************************************************************************************** ChimeraCheckRDP::~ChimeraCheckRDP() { try { - for (int i = 0; i < querySeqs.size(); i++) { delete querySeqs[i]; } delete templateDB; delete kmer; } catch(exception& e) { - errorOut(e, "ChimeraCheckRDP", "~AlignSim"); + m->errorOut(e, "ChimeraCheckRDP", "~ChimeraCheckRDP"); exit(1); } } //*************************************************************************************************************** -void ChimeraCheckRDP::print(ostream& out) { +Sequence ChimeraCheckRDP::print(ostream& out, ostream& outAcc) { try { - mothurOutEndLine(); - - //vector isChimeric; isChimeric.resize(querySeqs.size(), false); + m->mothurOut("Processing: " + querySeq->getName()); m->mothurOutEndLine(); - for (int i = 0; i < querySeqs.size(); i++) { + out << querySeq->getName() << endl; + out << "IS scores: " << '\t'; - out << querySeqs[i]->getName() << endl; - out << "IS scores: " << '\t'; - - //int lastChimericWindowFound = 0; - - for (int k = 0; k < IS[i].size(); k++) { - out << IS[i][k].score << '\t'; - //if (IS[i][k].score > chimeraCutoff) { isChimeric[i] = true; lastChimericWindowFound = k; } - } - - //if (isChimeric[i]) { - //mothurOut(querySeqs[i]->getName() + "\tIS: " + toString(IS[i][lastChimericWindowFound].score) + "\tbreakpoint: " + toString(IS[i][lastChimericWindowFound].midpoint) + "\tleft parent: " + IS[i][lastChimericWindowFound].leftParent + "\tright parent: " + IS[i][lastChimericWindowFound].rightParent); mothurOutEndLine(); - //out << endl << "chimera: YES" << endl; - //}else{ - //out << endl << "chimera: NO" << endl; - //} + for (int k = 0; k < IS.size(); k++) { + out << IS[k].score << '\t'; + } + out << endl; + + if (svg) { + if (name != "") { //if user has specific names + map::iterator it = names.find(querySeq->getName()); - if (svg) { - - if (name != "") { //if user has specific names - map::iterator it = names.find(querySeqs[i]->getName()); - - if (it != names.end()) { //user wants pic of this - makeSVGpic(IS[i], i); //zeros out negative results - } - }else{//output them all - makeSVGpic(IS[i], i); //zeros out negative results - } + if (it != names.end()) { //user wants pic of this + makeSVGpic(IS); //zeros out negative results } + }else{//output them all + makeSVGpic(IS); //zeros out negative results + } } + return *querySeq; } catch(exception& e) { - errorOut(e, "ChimeraCheckRDP", "print"); + m->errorOut(e, "ChimeraCheckRDP", "print"); exit(1); } } - +#ifdef USE_MPI //*************************************************************************************************************** -void ChimeraCheckRDP::getChimeras() { +Sequence ChimeraCheckRDP::print(MPI_File& out, MPI_File& outAcc) { try { - //read in query sequences and subject sequences - mothurOutEndLine(); - mothurOut("Reading query sequences... "); cout.flush(); - querySeqs = readSeqs(fastafile); - mothurOut("Done."); - //templateSeqs = readSeqs(templateFile); - templateDB = new AlignmentDB(templateFile, "kmer", kmerSize, 0.0,0.0,0.0,0.0); - mothurOutEndLine(); - - int numSeqs = querySeqs.size(); - - IS.resize(numSeqs); - closest.resize(numSeqs); - - //break up file if needed - int linesPerProcess = numSeqs / processors ; - - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - //find breakup of sequences for all times we will Parallelize - if (processors == 1) { lines.push_back(new linePair(0, numSeqs)); } - else { - //fill line pairs - for (int i = 0; i < (processors-1); i++) { - lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess))); - } - //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end - int i = processors - 1; - lines.push_back(new linePair((i*linesPerProcess), numSeqs)); - } - - #else - lines.push_back(new linePair(0, numSeqs)); - #endif + cout << "Processing: " << querySeq->getName() << endl; - kmer = new Kmer(kmerSize); + string outString = ""; - if (name != "") { - readName(name); //fills name map with names of seqs the user wants to have .svg for. + outString += querySeq->getName() + "\nIS scores: \t"; + + for (int k = 0; k < IS.size(); k++) { + outString += toString(IS[k].score) + "\t"; } + outString += "\n"; - //find closest seq to each querySeq - for (int i = 0; i < querySeqs.size(); i++) { - closest[i] = templateDB->findClosestSequence(querySeqs[i]); - } + MPI_Status status; + int length = outString.length(); + char* buf = new char[length]; + memcpy(buf, outString.c_str(), length); + + MPI_File_write_shared(out, buf, length, MPI_CHAR, &status); + delete buf; - //for each query find IS value - if (processors == 1) { - for (int i = 0; i < querySeqs.size(); i++) { - IS[i] = findIS(i); + if (svg) { + if (name != "") { //if user has specific names + map::iterator it = names.find(querySeq->getName()); + + if (it != names.end()) { //user wants pic of this + makeSVGpic(IS); //zeros out negative results + } + }else{//output them all + makeSVGpic(IS); //zeros out negative results } - }else { createProcessesIS(); } + } + + return *querySeq; + } + catch(exception& e) { + m->errorOut(e, "ChimeraCheckRDP", "print"); + exit(1); + } +} +#endif +//*************************************************************************************************************** +int ChimeraCheckRDP::getChimeras(Sequence* query) { + try { + IS.clear(); + + querySeq = query; + + closest = templateDB->findClosestSequence(query); + + IS = findIS(); + //determine chimera report cutoff - window score above 95% //getCutoff(); - not very acurate predictor - //free memory - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } - - + return 0; } catch(exception& e) { - errorOut(e, "ChimeraCheckRDP", "getChimeras"); + m->errorOut(e, "ChimeraCheckRDP", "getChimeras"); exit(1); } } //*************************************************************************************************************** -vector ChimeraCheckRDP::findIS(int query) { +vector ChimeraCheckRDP::findIS() { try { @@ -153,13 +157,11 @@ vector ChimeraCheckRDP::findIS(int query) { vector< map > subjectKmerInfo; vector isValues; - string queryName = querySeqs[query]->getName(); - string seq = querySeqs[query]->getUnaligned(); - - mothurOut("Finding IS values for sequence " + toString(query+1)); mothurOutEndLine(); + string queryName = querySeq->getName(); + string seq = querySeq->getUnaligned(); queryKmerInfo = kmer->getKmerCounts(seq); - subjectKmerInfo = kmer->getKmerCounts(closest[query].getUnaligned()); + subjectKmerInfo = kmer->getKmerCounts(closest.getUnaligned()); //find total kmers you have in common with closest[query] by looking at the last entry in the vector of maps for each int nTotal = calcKmers(queryKmerInfo[(queryKmerInfo.size()-1)], subjectKmerInfo[(subjectKmerInfo.size()-1)]); @@ -168,14 +170,16 @@ vector ChimeraCheckRDP::findIS(int query) { int start = seq.length() / 10; //for each window - for (int m = start; m < (seq.length() - start); m+=increment) { + for (int f = start; f < (seq.length() - start); f+=increment) { + + if (m->control_pressed) { return isValues; } - if ((m - kmerSize) < 0) { mothurOut("Your sequence is too short for your kmerSize."); mothurOutEndLine(); exit(1); } + if ((f - kmerSize) < 0) { m->mothurOut("Your sequence is too short for your kmerSize."); m->mothurOutEndLine(); exit(1); } sim temp; - string fragLeft = seq.substr(0, m); //left side of breakpoint - string fragRight = seq.substr(m); //right side of breakpoint + string fragLeft = seq.substr(0, f); //left side of breakpoint + string fragRight = seq.substr(f); //right side of breakpoint //make a sequence of the left side and right side Sequence* left = new Sequence(queryName, fragLeft); @@ -195,7 +199,7 @@ vector ChimeraCheckRDP::findIS(int query) { //right side is tricky - since the counts grow on eachother to find the correct counts of only the right side you must subtract the counts of the left side //iterate through left sides map to subtract the number of times you saw things before you got the the right side map rightside = queryKmerInfo[queryKmerInfo.size()-1]; - for (map::iterator itleft = queryKmerInfo[m-kmerSize].begin(); itleft != queryKmerInfo[m-kmerSize].end(); itleft++) { + for (map::iterator itleft = queryKmerInfo[f-kmerSize].begin(); itleft != queryKmerInfo[f-kmerSize].end(); itleft++) { int howManyTotal = queryKmerInfo[queryKmerInfo.size()-1][itleft->first]; //times that kmer was seen in total //itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side @@ -208,7 +212,7 @@ vector ChimeraCheckRDP::findIS(int query) { } map closerightside = closeRightKmerInfo[closeRightKmerInfo.size()-1]; - for (map::iterator itright = closeRightKmerInfo[m-kmerSize].begin(); itright != closeRightKmerInfo[m-kmerSize].end(); itright++) { + for (map::iterator itright = closeRightKmerInfo[f-kmerSize].begin(); itright != closeRightKmerInfo[f-kmerSize].end(); itright++) { int howManyTotal = closeRightKmerInfo[(closeRightKmerInfo.size()-1)][itright->first]; //times that kmer was seen in total //itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side @@ -221,7 +225,7 @@ vector ChimeraCheckRDP::findIS(int query) { } - int nLeft = calcKmers(closeLeftKmerInfo[m-kmerSize], queryKmerInfo[m-kmerSize]); + int nLeft = calcKmers(closeLeftKmerInfo[f-kmerSize], queryKmerInfo[f-kmerSize]); int nRight = calcKmers(closerightside, rightside); @@ -231,41 +235,76 @@ vector ChimeraCheckRDP::findIS(int query) { temp.leftParent = closestLeft.getName(); temp.rightParent = closestRight.getName(); temp.score = is; - temp.midpoint = m; + temp.midpoint = f; isValues.push_back(temp); delete left; delete right; - } + } return isValues; } catch(exception& e) { - errorOut(e, "ChimeraCheckRDP", "findIS"); + m->errorOut(e, "ChimeraCheckRDP", "findIS"); exit(1); } } //*************************************************************************************************************** void ChimeraCheckRDP::readName(string namefile) { try{ - ifstream in; - openInputFile(namefile, in); + string name; + + #ifdef USE_MPI + + MPI_File inMPI; + MPI_Offset size; + MPI_Status status; + + //char* inFileName = new char[namefile.length()]; + //memcpy(inFileName, namefile.c_str(), namefile.length()); + char inFileName[1024]; + strcpy(inFileName, namefile.c_str()); + + MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); + MPI_File_get_size(inMPI, &size); + + //delete inFileName; + + char* buffer = new char[size]; + MPI_File_read(inMPI, buffer, size, MPI_CHAR, &status); + + string tempBuf = buffer; + if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size); } + istringstream iss (tempBuf,istringstream::in); + delete buffer; + + while(!iss.eof()) { + iss >> name; m->gobble(iss); + names[name] = name; + } + + MPI_File_close(&inMPI); + + #else + + ifstream in; + m->openInputFile(namefile, in); + while (!in.eof()) { - - in >> name; - + in >> name; m->gobble(in); names[name] = name; - - gobble(in); } + in.close(); + + #endif } catch(exception& e) { - errorOut(e, "ChimeraCheckRDP", "readName"); + m->errorOut(e, "ChimeraCheckRDP", "readName"); exit(1); } } @@ -276,25 +315,26 @@ int ChimeraCheckRDP::calcKmers(map query, map subject) { try{ int common = 0; - map::iterator small; - map::iterator large; + map::iterator smallone; + map::iterator largeone; + if (query.size() < subject.size()) { - for (small = query.begin(); small != query.end(); small++) { - large = subject.find(small->first); + for (smallone = query.begin(); smallone != query.end(); smallone++) { + largeone = subject.find(smallone->first); //if you found it they have that kmer in common - if (large != subject.end()) { common++; } + if (largeone != subject.end()) { common++; } } }else { - for (small = subject.begin(); small != subject.end(); small++) { - large = query.find(small->first); + for (smallone = subject.begin(); smallone != subject.end(); smallone++) { + largeone = query.find(smallone->first); //if you found it they have that kmer in common - if (large != query.end()) { common++; } + if (largeone != query.end()) { common++; } } } @@ -302,50 +342,103 @@ int ChimeraCheckRDP::calcKmers(map query, map subject) { } catch(exception& e) { - errorOut(e, "ChimeraCheckRDP", "calcKmers"); + m->errorOut(e, "ChimeraCheckRDP", "calcKmers"); exit(1); } } - +#ifdef USE_MPI //*************************************************************************************************************** -void ChimeraCheckRDP::getCutoff() { +void ChimeraCheckRDP::makeSVGpic(vector info) { try{ - vector temp; + string file = outputDir + querySeq->getName() + ".chimeracheck.svg"; + + MPI_File outSVG; + int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; + + //char* FileName = new char[file.length()]; + //memcpy(FileName, file.c_str(), file.length()); + + char FileName[1024]; + strcpy(FileName, file.c_str()); + + MPI_File_open(MPI_COMM_SELF, FileName, outMode, MPI_INFO_NULL, &outSVG); //comm, filename, mode, info, filepointer + + //delete FileName; + + int width = (info.size()*5) + 150; + + string outString = ""; + + outString += "\n"; + outString += "\n"; + outString += "Plotted IS values for " + querySeq->getName() + "\n"; + + outString += "\n"; + outString += "\n"; + + outString += "" + toString(info[0].midpoint) + "\n"; + outString += "" + toString(info[info.size()-1].midpoint) + "\n"; + outString += "Base Positions\n"; + + outString += "0\n"; - //store all is scores for all windows - for (int i = 0; i < IS.size(); i++) { - for (int j = 0; j < IS[i].size(); j++) { - temp.push_back(IS[i][j].score); + outString += "IS\n"; + + + //find max is score + float biggest = 0.0; + for (int i = 0; i < info.size(); i++) { + if (info[i].score > biggest) { + biggest = info[i].score; } } - //sort them - sort(temp.begin(), temp.end()); + outString += "" + toString(biggest) + "\n"; + + int scaler2 = 500 / biggest; + + + outString += " "; + for (int i = 0; i < info.size(); i++) { + if(info[i].score < 0) { info[i].score = 0; } + outString += toString(((i*5) + 75)) + "," + toString((600 - (info[i].score * scaler2))) + " "; + } + + outString += "\"/> "; + outString += "\n\n"; + + MPI_Status status; + int length = outString.length(); + char* buf2 = new char[length]; + memcpy(buf2, outString.c_str(), length); + + MPI_File_write(outSVG, buf2, length, MPI_CHAR, &status); + delete buf2; - //get 95% - chimeraCutoff = temp[int(temp.size() * 0.95)]; + MPI_File_close(&outSVG); } catch(exception& e) { - errorOut(e, "ChimeraCheckRDP", "getCutoff"); + m->errorOut(e, "ChimeraCheckRDP", "makeSVGpic"); exit(1); } } - +#else //*************************************************************************************************************** -void ChimeraCheckRDP::makeSVGpic(vector info, int query) { +void ChimeraCheckRDP::makeSVGpic(vector info) { try{ - string file = querySeqs[query]->getName() + ".chimeracheck.svg"; + string file = outputDir + querySeq->getName() + ".chimeracheck.svg"; ofstream outsvg; - openOutputFile(file, outsvg); + m->openOutputFile(file, outsvg); int width = (info.size()*5) + 150; outsvg << "\n"; outsvg << "\n"; - outsvg << "Plotted IS values for " + querySeqs[query]->getName() + "\n"; + outsvg << "Plotted IS values for " + querySeq->getName() + "\n"; outsvg << "\n"; outsvg << "\n"; @@ -386,101 +479,11 @@ void ChimeraCheckRDP::makeSVGpic(vector info, int query) { } catch(exception& e) { - errorOut(e, "ChimeraCheckRDP", "makeSVGpic"); + m->errorOut(e, "ChimeraCheckRDP", "makeSVGpic"); exit(1); } } -//*************************************************************************************************************** -void ChimeraCheckRDP::createProcessesIS() { - try { - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - int process = 0; - vector processIDS; - - //loop through and create all the processes you want - while (process != processors) { - int pid = fork(); - - if (pid > 0) { - processIDS.push_back(pid); - process++; - }else if (pid == 0){ - - for (int i = lines[process]->start; i < lines[process]->end; i++) { - IS[i] = findIS(i); - } - - //write out data to file so parent can read it - ofstream out; - string s = toString(getpid()) + ".temp"; - openOutputFile(s, out); - - //output pairs - for (int i = lines[process]->start; i < lines[process]->end; i++) { - out << IS[i].size() << endl; - for (int j = 0; j < IS[i].size(); j++) { - out << IS[i][j].leftParent << '\t'<< IS[i][j].rightParent << '\t' << IS[i][j].midpoint << '\t' << IS[i][j].score << endl; - } - } - - out.close(); - - exit(0); - }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); } - } - - //force parent to wait until all the processes are done - for (int i=0;istart; k < lines[i]->end; k++) { - - int size; - in >> size; gobble(in); - - string left, right; - int mid; - float score; - - IS[k].clear(); - - for (int j = 0; j < size; j++) { - in >> left >> right >> mid >> score; gobble(in); - - sim temp; - temp.leftParent = left; - temp.rightParent = right; - temp.midpoint = mid; - temp.score = score; - - IS[k].push_back(temp); - } - } - - in.close(); - remove(s.c_str()); - } -#else - for (int i = 0; i < querySeqs.size(); i++) { - IS[i] = findIS(i); - } -#endif - } - catch(exception& e) { - errorOut(e, "ChimeraCheckRDP", "createProcessesIS"); - exit(1); - } -} - -//*************************************************************************************************************** +#endif +//***************************************************************************************************************/