X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=ccode.cpp;h=00cd3f1948e009a86456e32bbcde2a40c6f889ac;hp=bfcd44d8239a30f8ad18db73f0c06268dc0e7474;hb=b206f634aae1b4ce13978d203247fb64757d5482;hpb=5a1e62397b91f57d0d3aff635891df04b8999a88 diff --git a/ccode.cpp b/ccode.cpp index bfcd44d..00cd3f1 100644 --- a/ccode.cpp +++ b/ccode.cpp @@ -11,50 +11,90 @@ #include "ignoregaps.h" #include "eachgapdist.h" - //*************************************************************************************************************** -Ccode::Ccode(string filename, string o) { - fastafile = filename; outputDir = o; +Ccode::Ccode(string filename, string temp, bool f, string mask, int win, int numW, string o) : Chimera() { + try { + + fastafile = filename; + outputDir = o; + templateFileName = temp; templateSeqs = readSeqs(temp); + setMask(mask); + filter = f; + window = win; + numWanted = numW; + distCalc = new eachGapDist(); decalc = new DeCalculator(); - mapInfo = outputDir + getRootName(getSimpleName(fastafile)) + "mapinfo"; - ofstream out2; - openOutputFile(mapInfo, out2); + mapInfo = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "mapinfo"; + + #ifdef USE_MPI + + //char* inFileName = new char[mapInfo.length()]; + //memcpy(inFileName, mapInfo.c_str(), mapInfo.length()); + + char inFileName[1024]; + strcpy(inFileName, mapInfo.c_str()); + + int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; + + MPI_File_open(MPI_COMM_WORLD, inFileName, outMode, MPI_INFO_NULL, &outMap); //comm, filename, mode, info, filepointer + + //delete inFileName; + + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + if (pid == 0) { + string outString = "Place in masked, filtered and trimmed sequence\tPlace in original alignment\n"; + + MPI_Status status; + int length = outString.length(); + char* buf2 = new char[length]; + memcpy(buf2, outString.c_str(), length); + + MPI_File_write_shared(outMap, buf2, length, MPI_CHAR, &status); + delete buf2; + } + #else + + ofstream out2; + m->openOutputFile(mapInfo, out2); - out2 << "Place in masked, filtered and trimmed sequence\tPlace in original alignment" << endl; - out2.close(); + out2 << "Place in masked, filtered and trimmed sequence\tPlace in original alignment" << endl; + out2.close(); + #endif + } + catch(exception& e) { + m->errorOut(e, "Ccode", "Ccode"); + exit(1); + } } //*************************************************************************************************************** Ccode::~Ccode() { delete distCalc; delete decalc; + + #ifdef USE_MPI + MPI_File_close(&outMap); + #endif } //*************************************************************************************************************** -void Ccode::printHeader(ostream& out) { - out << "For full window mapping info refer to " << mapInfo << endl << endl; -} -//*************************************************************************************************************** -void Ccode::print(ostream& out) { +Sequence Ccode::print(ostream& out, ostream& outAcc) { try { - mothurOutEndLine(); - ofstream out2; - openOutputFileAppend(mapInfo, out2); + m->openOutputFileAppend(mapInfo, out2); out2 << querySeq->getName() << endl; for (it = spotMap.begin(); it!= spotMap.end(); it++) { out2 << it->first << '\t' << it->second << endl; } out2.close(); - - out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); - out << querySeq->getName() << endl << endl << "Reference sequences used and distance to query:" << endl; for (int j = 0; j < closest.size(); j++) { - out << setprecision(3) << closest[j].seq->getName() << '\t' << closest[j].dist << endl; + out << closest[j].seq->getName() << '\t' << closest[j].dist << endl; } out << endl << endl; @@ -72,14 +112,12 @@ void Ccode::print(ostream& out) { out << endl << "Window\tStartPos\tEndPos" << endl; it = trim.begin(); - for (int k = 0; k < windows.size()-1; k++) { out << k+1 << '\t' << spotMap[windows[k]-it->first] << '\t' << spotMap[windows[k]-it->first+windowSizes] << endl; } out << windows.size() << '\t' << spotMap[windows[windows.size()-1]-it->first] << '\t' << spotMap[it->second-it->first-1] << endl; out << endl; - out << "Window\tAvgQ\t(sdQ)\tAvgR\t(sdR)\tRatio\tAnova" << endl; for (int k = 0; k < windows.size(); k++) { float ds = averageQuery[k] / averageRef[k]; @@ -94,7 +132,7 @@ void Ccode::print(ostream& out) { //float fs = varQuery[query] / varRef[query]; /* F-Snedecor, test for differences of variances */ bool results = false; - + //confidence limit, t - Student, anova out << "Window\tConfidenceLimit\tt-Student\tAnova" << endl; @@ -116,16 +154,149 @@ void Ccode::print(ostream& out) { out << endl; if (results) { - mothurOut(querySeq->getName() + " was found have at least one chimeric window."); mothurOutEndLine(); + m->mothurOut(querySeq->getName() + " was found have at least one chimeric window."); m->mothurOutEndLine(); + outAcc << querySeq->getName() << endl; } + + //free memory + for (int i = 0; i < closest.size(); i++) { delete closest[i].seq; } + + return *querySeq; + } + catch(exception& e) { + m->errorOut(e, "Ccode", "print"); + exit(1); + } +} +#ifdef USE_MPI +//*************************************************************************************************************** +Sequence Ccode::print(MPI_File& out, MPI_File& outAcc) { + try { + + string outMapString = ""; + + outMapString += querySeq->getName() + "\n"; + for (it = spotMap.begin(); it!= spotMap.end(); it++) { + outMapString += toString(it->first) + "\t" + toString(it->second) + "\n"; + } + printMapping(outMapString); + outMapString = ""; + string outString = ""; + string outAccString = ""; + + outString += querySeq->getName() + "\n\nReference sequences used and distance to query:\n"; + + for (int j = 0; j < closest.size(); j++) { + outString += closest[j].seq->getName() + "\t" + toString(closest[j].dist) + "\n"; + } + outString += "\n\nMapping information: "; + + //for each window + //window mapping info. + //you mask and did not filter + if ((seqMask != "") && (!filter)) { outString += "mask and trim."; } + + //you filtered and did not mask + if ((seqMask == "") && (filter)) { outString += "filter and trim."; } + + //you masked and filtered + if ((seqMask != "") && (filter)) { outString += "mask, filter and trim."; } + + outString += "\nWindow\tStartPos\tEndPos\n"; + it = trim.begin(); + for (int k = 0; k < windows.size()-1; k++) { + outString += toString(k+1) + "\t" + toString(spotMap[windows[k]-it->first]) + "\t" + toString(spotMap[windows[k]-it->first+windowSizes]) + "\n"; + } + + outString += toString(windows.size()) + "\t" + toString(spotMap[windows[windows.size()-1]-it->first]) + "\t" + toString(spotMap[it->second-it->first-1]) + "\n\n"; + + outString += "Window\tAvgQ\t(sdQ)\tAvgR\t(sdR)\tRatio\tAnova\n"; + for (int k = 0; k < windows.size(); k++) { + float ds = averageQuery[k] / averageRef[k]; + outString += toString(k+1) + "\t" + toString(averageQuery[k]) + "\t" + toString(sdQuery[k]) + "\t" + toString(averageRef[k]) + "\t" + toString(sdRef[k]) + "\t" + toString(ds) + "\t" + toString(anova[k]) + "\n"; + } + + //varRef + //varQuery + /* F test for differences among variances. + * varQuery is expected to be higher or similar than varRef */ + //float fs = varQuery[query] / varRef[query]; /* F-Snedecor, test for differences of variances */ + + bool results = false; + + //confidence limit, t - Student, anova + outString += "\nWindow\tConfidenceLimit\tt-Student\tAnova\n"; + + for (int k = 0; k < windows.size(); k++) { + string temp = ""; + if (isChimericConfidence[k]) { temp += "*\t"; } + else { temp += "\t"; } + + if (isChimericTStudent[k]) { temp += "*\t"; } + else { temp += "\t"; } + + if (isChimericANOVA[k]) { temp += "*\t"; } + else { temp += "\t"; } + + outString += toString(k+1) + "\t" + temp + "\n"; + + if (temp == "*\t*\t*\t") { results = true; } + } + outString += "\n"; + + MPI_Status status; + int length = outString.length(); + char* buf2 = new char[length]; + memcpy(buf2, outString.c_str(), length); + + MPI_File_write_shared(out, buf2, length, MPI_CHAR, &status); + delete buf2; + + if (results) { + m->mothurOut(querySeq->getName() + " was found have at least one chimeric window."); m->mothurOutEndLine(); + outAccString += querySeq->getName() + "\n"; + + MPI_Status statusAcc; + length = outAccString.length(); + char* buf = new char[length]; + memcpy(buf, outAccString.c_str(), length); + + MPI_File_write_shared(outAcc, buf, length, MPI_CHAR, &statusAcc); + delete buf; + } + + //free memory + for (int i = 0; i < closest.size(); i++) { delete closest[i].seq; } + + return *querySeq; } catch(exception& e) { - errorOut(e, "Ccode", "print"); + m->errorOut(e, "Ccode", "print"); exit(1); } } //*************************************************************************************************************** +int Ccode::printMapping(string& output) { + try { + MPI_Status status; + int length = output.length(); + char* buf = new char[length]; + memcpy(buf, output.c_str(), length); + + MPI_File_write_shared(outMap, buf, length, MPI_CHAR, &status); + delete buf; + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "Ccode", "printMapping"); + exit(1); + } +} +#endif +//*************************************************************************************************************** int Ccode::getChimeras(Sequence* query) { try { @@ -156,6 +327,8 @@ int Ccode::getChimeras(Sequence* query) { //find closest matches to query closest = findClosest(query, numWanted); + if (m->control_pressed) { return 0; } + //initialize spotMap for (int i = 0; i < query->getAligned().length(); i++) { spotMap[i] = i; } @@ -176,9 +349,12 @@ int Ccode::getChimeras(Sequence* query) { for (int i = 0; i < closest.size(); i++) { temp.push_back(closest[i].seq); } temp.push_back(query); - createFilter(temp); + createFilter(temp, 0.5); - for (int i = 0; i < temp.size(); i++) { runFilter(temp[i]); } + for (int i = 0; i < temp.size(); i++) { + if (m->control_pressed) { return 0; } + runFilter(temp[i]); + } //update spotMap map newMap; @@ -196,38 +372,37 @@ int Ccode::getChimeras(Sequence* query) { //trim sequences - this follows ccodes remove_extra_gaps trimSequences(query); - + if (m->control_pressed) { return 0; } //windows are equivalent to words - ccode paper recommends windows are between 5% and 20% on alignment length(). //Our default will be 10% and we will warn if user tries to use a window above or below these recommendations windows = findWindows(); - + if (m->control_pressed) { return 0; } //remove sequences that are more than 20% different and less than 0.5% different - may want to allow user to specify this later removeBadReferenceSeqs(closest); - + if (m->control_pressed) { return 0; } //find the averages for each querys references getAverageRef(closest); //fills sumRef, averageRef, sumSquaredRef and refCombo. getAverageQuery(closest, query); //fills sumQuery, averageQuery, sumSquaredQuery. - + if (m->control_pressed) { return 0; } //find the averages for each querys references findVarianceRef(); //fills varRef and sdRef also sets minimum error rate to 0.001 to avoid divide by 0. - + if (m->control_pressed) { return 0; } //find the averages for the query findVarianceQuery(); //fills varQuery and sdQuery also sets minimum error rate to 0.001 to avoid divide by 0. + if (m->control_pressed) { return 0; } determineChimeras(); //fills anova, isChimericConfidence, isChimericTStudent and isChimericANOVA. - - //free memory - for (int i = 0; i < closest.size(); i++) { delete closest[i].seq; } + if (m->control_pressed) { return 0; } return 0; } catch(exception& e) { - errorOut(e, "Ccode", "getChimeras"); + m->errorOut(e, "Ccode", "getChimeras"); exit(1); } } @@ -309,7 +484,7 @@ void Ccode::trimSequences(Sequence* query) { //check to make sure that is not whole seq - if ((rearPos - frontPos - 1) <= 0) { mothurOut("Error, when I trim your sequences, the entire sequence is trimmed."); mothurOutEndLine(); exit(1); } + if ((rearPos - frontPos - 1) <= 0) { m->mothurOut("Error, when I trim your sequences, the entire sequence is trimmed."); m->mothurOutEndLine(); exit(1); } map tempTrim; tempTrim[frontPos] = rearPos; @@ -329,7 +504,7 @@ void Ccode::trimSequences(Sequence* query) { spotMap = newMap; } catch(exception& e) { - errorOut(e, "Ccode", "trimSequences"); + m->errorOut(e, "Ccode", "trimSequences"); exit(1); } } @@ -344,13 +519,13 @@ vector Ccode::findWindows() { //default is wanted = 10% of total length if (windowSizes > length) { - mothurOut("You have slected a window larger than your sequence length after all filters, masks and trims have been done. I will use the default 10% of sequence length."); + m->mothurOut("You have slected a window larger than your sequence length after all filters, masks and trims have been done. I will use the default 10% of sequence length."); windowSizes = length / 10; }else if (windowSizes == 0) { windowSizes = length / 10; } else if (windowSizes > (length * 0.20)) { - mothurOut("You have selected a window that is larger than 20% of your sequence length. This is not recommended, but I will continue anyway."); mothurOutEndLine(); + m->mothurOut("You have selected a window that is larger than 20% of your sequence length. This is not recommended, but I will continue anyway."); m->mothurOutEndLine(); }else if (windowSizes < (length * 0.05)) { - mothurOut("You have selected a window that is smaller than 5% of your sequence length. This is not recommended, but I will continue anyway."); mothurOutEndLine(); + m->mothurOut("You have selected a window that is smaller than 5% of your sequence length. This is not recommended, but I will continue anyway."); m->mothurOutEndLine(); } //save starting points of each window @@ -364,7 +539,7 @@ vector Ccode::findWindows() { return win; } catch(exception& e) { - errorOut(e, "Ccode", "findWindows"); + m->errorOut(e, "Ccode", "findWindows"); exit(1); } } @@ -419,7 +594,7 @@ int Ccode::getDiff(string seqA, string seqB) { } catch(exception& e) { - errorOut(e, "Ccode", "getDiff"); + m->errorOut(e, "Ccode", "getDiff"); exit(1); } } @@ -489,12 +664,12 @@ void Ccode::removeBadReferenceSeqs(vector& seqs) { seqs = goodSeqs; }else { //warn, but dont remove any - mothurOut(querySeq->getName() + " does not have an adaquate number of reference sequences that are within 20% and 0.5% similarity. I will continue, but please check."); mothurOutEndLine(); + m->mothurOut(querySeq->getName() + " does not have an adaquate number of reference sequences that are within 20% and 0.5% similarity. I will continue, but please check."); m->mothurOutEndLine(); } } catch(exception& e) { - errorOut(e, "Ccode", "removeBadReferenceSeqs"); + m->errorOut(e, "Ccode", "removeBadReferenceSeqs"); exit(1); } } @@ -525,12 +700,16 @@ vector Ccode::findClosest(Sequence* q, int numWanted) { } sort(topMatches.begin(), topMatches.end(), compareSeqDist); + + for (int i = numWanted; i < topMatches.size(); i++) { delete topMatches[i].seq; } + + topMatches.resize(numWanted); return topMatches; } catch(exception& e) { - errorOut(e, "Ccode", "findClosestSides"); + m->errorOut(e, "Ccode", "findClosestSides"); exit(1); } } @@ -617,7 +796,7 @@ void Ccode::getAverageRef(vector ref) { } catch(exception& e) { - errorOut(e, "Ccode", "getAverageRef"); + m->errorOut(e, "Ccode", "getAverageRef"); exit(1); } } @@ -686,7 +865,7 @@ void Ccode::getAverageQuery (vector ref, Sequence* query) { } } catch(exception& e) { - errorOut(e, "Ccode", "getAverageQuery"); + m->errorOut(e, "Ccode", "getAverageQuery"); exit(1); } } @@ -712,7 +891,7 @@ void Ccode::findVarianceRef() { } } catch(exception& e) { - errorOut(e, "Ccode", "findVarianceRef"); + m->errorOut(e, "Ccode", "findVarianceRef"); exit(1); } } @@ -737,7 +916,7 @@ void Ccode::findVarianceQuery() { } catch(exception& e) { - errorOut(e, "Ccode", "findVarianceQuery"); + m->errorOut(e, "Ccode", "findVarianceQuery"); exit(1); } } @@ -799,7 +978,7 @@ void Ccode::determineChimeras() { } catch(exception& e) { - errorOut(e, "Ccode", "determineChimeras"); + m->errorOut(e, "Ccode", "determineChimeras"); exit(1); } } @@ -844,13 +1023,13 @@ float Ccode::getT(int numseq) { else if (numseq > 2) tvalue = 2.353; else if (numseq > 1) tvalue = 2.920; else if (numseq <= 1) { - mothurOut("Two or more reference sequences are required, your data will be flawed.\n"); mothurOutEndLine(); + m->mothurOut("Two or more reference sequences are required, your data will be flawed.\n"); m->mothurOutEndLine(); } return tvalue; } catch(exception& e) { - errorOut(e, "Ccode", "getT"); + m->errorOut(e, "Ccode", "getT"); exit(1); } } @@ -896,13 +1075,13 @@ float Ccode::getF(int numseq) { else if (numseq > 1) fvalue = 18.5; else if (numseq > 0) fvalue = 161; else if (numseq <= 0) { - mothurOut("Two or more reference sequences are required, your data will be flawed.\n"); mothurOutEndLine(); + m->mothurOut("Two or more reference sequences are required, your data will be flawed.\n"); m->mothurOutEndLine(); } return fvalue; } catch(exception& e) { - errorOut(e, "Ccode", "getF"); + m->errorOut(e, "Ccode", "getF"); exit(1); } }