X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=ccode.cpp;h=00cd3f1948e009a86456e32bbcde2a40c6f889ac;hp=00a30e5b6c0db1f58cb90b3c564703b7d3d77f6d;hb=b206f634aae1b4ce13978d203247fb64757d5482;hpb=74844a60d80c6dd06e3fb02ee9b928424f9019b0 diff --git a/ccode.cpp b/ccode.cpp index 00a30e5..00cd3f1 100644 --- a/ccode.cpp +++ b/ccode.cpp @@ -11,35 +11,80 @@ #include "ignoregaps.h" #include "eachgapdist.h" - //*************************************************************************************************************** -Ccode::Ccode(string filename, string o) { - fastafile = filename; outputDir = o; +Ccode::Ccode(string filename, string temp, bool f, string mask, int win, int numW, string o) : Chimera() { + try { + + fastafile = filename; + outputDir = o; + templateFileName = temp; templateSeqs = readSeqs(temp); + setMask(mask); + filter = f; + window = win; + numWanted = numW; + distCalc = new eachGapDist(); decalc = new DeCalculator(); - mapInfo = outputDir + getRootName(getSimpleName(fastafile)) + "mapinfo"; - ofstream out2; - openOutputFile(mapInfo, out2); + mapInfo = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "mapinfo"; + + #ifdef USE_MPI + + //char* inFileName = new char[mapInfo.length()]; + //memcpy(inFileName, mapInfo.c_str(), mapInfo.length()); + + char inFileName[1024]; + strcpy(inFileName, mapInfo.c_str()); + + int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; + + MPI_File_open(MPI_COMM_WORLD, inFileName, outMode, MPI_INFO_NULL, &outMap); //comm, filename, mode, info, filepointer + + //delete inFileName; + + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + if (pid == 0) { + string outString = "Place in masked, filtered and trimmed sequence\tPlace in original alignment\n"; + + MPI_Status status; + int length = outString.length(); + char* buf2 = new char[length]; + memcpy(buf2, outString.c_str(), length); + + MPI_File_write_shared(outMap, buf2, length, MPI_CHAR, &status); + delete buf2; + } + #else + + ofstream out2; + m->openOutputFile(mapInfo, out2); - out2 << "Place in masked, filtered and trimmed sequence\tPlace in original alignment" << endl; - out2.close(); + out2 << "Place in masked, filtered and trimmed sequence\tPlace in original alignment" << endl; + out2.close(); + #endif + } + catch(exception& e) { + m->errorOut(e, "Ccode", "Ccode"); + exit(1); + } } //*************************************************************************************************************** Ccode::~Ccode() { delete distCalc; delete decalc; + + #ifdef USE_MPI + MPI_File_close(&outMap); + #endif } //*************************************************************************************************************** -void Ccode::printHeader(ostream& out) { - out << "For full window mapping info refer to " << mapInfo << endl << endl; -} -//*************************************************************************************************************** -void Ccode::print(ostream& out, ostream& outAcc) { +Sequence Ccode::print(ostream& out, ostream& outAcc) { try { ofstream out2; - openOutputFileAppend(mapInfo, out2); + m->openOutputFileAppend(mapInfo, out2); out2 << querySeq->getName() << endl; for (it = spotMap.begin(); it!= spotMap.end(); it++) { @@ -116,7 +161,115 @@ void Ccode::print(ostream& out, ostream& outAcc) { //free memory for (int i = 0; i < closest.size(); i++) { delete closest[i].seq; } + return *querySeq; + } + catch(exception& e) { + m->errorOut(e, "Ccode", "print"); + exit(1); + } +} +#ifdef USE_MPI +//*************************************************************************************************************** +Sequence Ccode::print(MPI_File& out, MPI_File& outAcc) { + try { + + string outMapString = ""; + + outMapString += querySeq->getName() + "\n"; + for (it = spotMap.begin(); it!= spotMap.end(); it++) { + outMapString += toString(it->first) + "\t" + toString(it->second) + "\n"; + } + printMapping(outMapString); + outMapString = ""; + + string outString = ""; + string outAccString = ""; + + outString += querySeq->getName() + "\n\nReference sequences used and distance to query:\n"; + + for (int j = 0; j < closest.size(); j++) { + outString += closest[j].seq->getName() + "\t" + toString(closest[j].dist) + "\n"; + } + outString += "\n\nMapping information: "; + + //for each window + //window mapping info. + //you mask and did not filter + if ((seqMask != "") && (!filter)) { outString += "mask and trim."; } + + //you filtered and did not mask + if ((seqMask == "") && (filter)) { outString += "filter and trim."; } + + //you masked and filtered + if ((seqMask != "") && (filter)) { outString += "mask, filter and trim."; } + + outString += "\nWindow\tStartPos\tEndPos\n"; + it = trim.begin(); + for (int k = 0; k < windows.size()-1; k++) { + outString += toString(k+1) + "\t" + toString(spotMap[windows[k]-it->first]) + "\t" + toString(spotMap[windows[k]-it->first+windowSizes]) + "\n"; + } + + outString += toString(windows.size()) + "\t" + toString(spotMap[windows[windows.size()-1]-it->first]) + "\t" + toString(spotMap[it->second-it->first-1]) + "\n\n"; + outString += "Window\tAvgQ\t(sdQ)\tAvgR\t(sdR)\tRatio\tAnova\n"; + for (int k = 0; k < windows.size(); k++) { + float ds = averageQuery[k] / averageRef[k]; + outString += toString(k+1) + "\t" + toString(averageQuery[k]) + "\t" + toString(sdQuery[k]) + "\t" + toString(averageRef[k]) + "\t" + toString(sdRef[k]) + "\t" + toString(ds) + "\t" + toString(anova[k]) + "\n"; + } + + //varRef + //varQuery + /* F test for differences among variances. + * varQuery is expected to be higher or similar than varRef */ + //float fs = varQuery[query] / varRef[query]; /* F-Snedecor, test for differences of variances */ + + bool results = false; + + //confidence limit, t - Student, anova + outString += "\nWindow\tConfidenceLimit\tt-Student\tAnova\n"; + + for (int k = 0; k < windows.size(); k++) { + string temp = ""; + if (isChimericConfidence[k]) { temp += "*\t"; } + else { temp += "\t"; } + + if (isChimericTStudent[k]) { temp += "*\t"; } + else { temp += "\t"; } + + if (isChimericANOVA[k]) { temp += "*\t"; } + else { temp += "\t"; } + + outString += toString(k+1) + "\t" + temp + "\n"; + + if (temp == "*\t*\t*\t") { results = true; } + } + outString += "\n"; + + MPI_Status status; + int length = outString.length(); + char* buf2 = new char[length]; + memcpy(buf2, outString.c_str(), length); + + MPI_File_write_shared(out, buf2, length, MPI_CHAR, &status); + delete buf2; + + if (results) { + m->mothurOut(querySeq->getName() + " was found have at least one chimeric window."); m->mothurOutEndLine(); + outAccString += querySeq->getName() + "\n"; + + MPI_Status statusAcc; + length = outAccString.length(); + char* buf = new char[length]; + memcpy(buf, outAccString.c_str(), length); + + MPI_File_write_shared(outAcc, buf, length, MPI_CHAR, &statusAcc); + delete buf; + } + + //free memory + for (int i = 0; i < closest.size(); i++) { delete closest[i].seq; } + + return *querySeq; } catch(exception& e) { m->errorOut(e, "Ccode", "print"); @@ -124,6 +277,26 @@ void Ccode::print(ostream& out, ostream& outAcc) { } } //*************************************************************************************************************** +int Ccode::printMapping(string& output) { + try { + MPI_Status status; + int length = output.length(); + char* buf = new char[length]; + memcpy(buf, output.c_str(), length); + + MPI_File_write_shared(outMap, buf, length, MPI_CHAR, &status); + delete buf; + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "Ccode", "printMapping"); + exit(1); + } +} +#endif +//*************************************************************************************************************** int Ccode::getChimeras(Sequence* query) { try { @@ -154,6 +327,8 @@ int Ccode::getChimeras(Sequence* query) { //find closest matches to query closest = findClosest(query, numWanted); + if (m->control_pressed) { return 0; } + //initialize spotMap for (int i = 0; i < query->getAligned().length(); i++) { spotMap[i] = i; } @@ -176,7 +351,10 @@ int Ccode::getChimeras(Sequence* query) { createFilter(temp, 0.5); - for (int i = 0; i < temp.size(); i++) { runFilter(temp[i]); } + for (int i = 0; i < temp.size(); i++) { + if (m->control_pressed) { return 0; } + runFilter(temp[i]); + } //update spotMap map newMap; @@ -194,30 +372,32 @@ int Ccode::getChimeras(Sequence* query) { //trim sequences - this follows ccodes remove_extra_gaps trimSequences(query); - + if (m->control_pressed) { return 0; } //windows are equivalent to words - ccode paper recommends windows are between 5% and 20% on alignment length(). //Our default will be 10% and we will warn if user tries to use a window above or below these recommendations windows = findWindows(); - + if (m->control_pressed) { return 0; } //remove sequences that are more than 20% different and less than 0.5% different - may want to allow user to specify this later removeBadReferenceSeqs(closest); - + if (m->control_pressed) { return 0; } //find the averages for each querys references getAverageRef(closest); //fills sumRef, averageRef, sumSquaredRef and refCombo. getAverageQuery(closest, query); //fills sumQuery, averageQuery, sumSquaredQuery. - + if (m->control_pressed) { return 0; } //find the averages for each querys references findVarianceRef(); //fills varRef and sdRef also sets minimum error rate to 0.001 to avoid divide by 0. - + if (m->control_pressed) { return 0; } //find the averages for the query findVarianceQuery(); //fills varQuery and sdQuery also sets minimum error rate to 0.001 to avoid divide by 0. + if (m->control_pressed) { return 0; } determineChimeras(); //fills anova, isChimericConfidence, isChimericTStudent and isChimericANOVA. + if (m->control_pressed) { return 0; } return 0; }