X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=ccode.cpp;h=00cd3f1948e009a86456e32bbcde2a40c6f889ac;hp=326af7cec61a88eb6ab859f203098c0ded3a9cfb;hb=b206f634aae1b4ce13978d203247fb64757d5482;hpb=81276c241b984898f8d30ad123c00592ee6db7b8 diff --git a/ccode.cpp b/ccode.cpp index 326af7c..00cd3f1 100644 --- a/ccode.cpp +++ b/ccode.cpp @@ -11,37 +11,80 @@ #include "ignoregaps.h" #include "eachgapdist.h" - //*************************************************************************************************************** -Ccode::Ccode(string filename, string o) { - fastafile = filename; outputDir = o; +Ccode::Ccode(string filename, string temp, bool f, string mask, int win, int numW, string o) : Chimera() { + try { + + fastafile = filename; + outputDir = o; + templateFileName = temp; templateSeqs = readSeqs(temp); + setMask(mask); + filter = f; + window = win; + numWanted = numW; + distCalc = new eachGapDist(); decalc = new DeCalculator(); - mapInfo = outputDir + getRootName(getSimpleName(fastafile)) + "mapinfo"; - ofstream out2; - openOutputFile(mapInfo, out2); + mapInfo = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "mapinfo"; + + #ifdef USE_MPI + + //char* inFileName = new char[mapInfo.length()]; + //memcpy(inFileName, mapInfo.c_str(), mapInfo.length()); + + char inFileName[1024]; + strcpy(inFileName, mapInfo.c_str()); + + int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; + + MPI_File_open(MPI_COMM_WORLD, inFileName, outMode, MPI_INFO_NULL, &outMap); //comm, filename, mode, info, filepointer + + //delete inFileName; + + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + if (pid == 0) { + string outString = "Place in masked, filtered and trimmed sequence\tPlace in original alignment\n"; + + MPI_Status status; + int length = outString.length(); + char* buf2 = new char[length]; + memcpy(buf2, outString.c_str(), length); + + MPI_File_write_shared(outMap, buf2, length, MPI_CHAR, &status); + delete buf2; + } + #else + + ofstream out2; + m->openOutputFile(mapInfo, out2); - out2 << "Place in masked, filtered and trimmed sequence\tPlace in original alignment" << endl; - out2.close(); + out2 << "Place in masked, filtered and trimmed sequence\tPlace in original alignment" << endl; + out2.close(); + #endif + } + catch(exception& e) { + m->errorOut(e, "Ccode", "Ccode"); + exit(1); + } } //*************************************************************************************************************** Ccode::~Ccode() { delete distCalc; delete decalc; + + #ifdef USE_MPI + MPI_File_close(&outMap); + #endif } //*************************************************************************************************************** -void Ccode::printHeader(ostream& out) { - out << "For full window mapping info refer to " << mapInfo << endl << endl; -} -//*************************************************************************************************************** -void Ccode::print(ostream& out) { +Sequence Ccode::print(ostream& out, ostream& outAcc) { try { - mothurOutEndLine(); - ofstream out2; - openOutputFileAppend(mapInfo, out2); + m->openOutputFileAppend(mapInfo, out2); out2 << querySeq->getName() << endl; for (it = spotMap.begin(); it!= spotMap.end(); it++) { @@ -111,20 +154,149 @@ void Ccode::print(ostream& out) { out << endl; if (results) { - mothurOut(querySeq->getName() + " was found have at least one chimeric window."); mothurOutEndLine(); + m->mothurOut(querySeq->getName() + " was found have at least one chimeric window."); m->mothurOutEndLine(); + outAcc << querySeq->getName() << endl; } //free memory for (int i = 0; i < closest.size(); i++) { delete closest[i].seq; } + return *querySeq; + } + catch(exception& e) { + m->errorOut(e, "Ccode", "print"); + exit(1); + } +} +#ifdef USE_MPI +//*************************************************************************************************************** +Sequence Ccode::print(MPI_File& out, MPI_File& outAcc) { + try { + + string outMapString = ""; + + outMapString += querySeq->getName() + "\n"; + for (it = spotMap.begin(); it!= spotMap.end(); it++) { + outMapString += toString(it->first) + "\t" + toString(it->second) + "\n"; + } + printMapping(outMapString); + outMapString = ""; + + string outString = ""; + string outAccString = ""; + + outString += querySeq->getName() + "\n\nReference sequences used and distance to query:\n"; + + for (int j = 0; j < closest.size(); j++) { + outString += closest[j].seq->getName() + "\t" + toString(closest[j].dist) + "\n"; + } + outString += "\n\nMapping information: "; + + //for each window + //window mapping info. + //you mask and did not filter + if ((seqMask != "") && (!filter)) { outString += "mask and trim."; } + + //you filtered and did not mask + if ((seqMask == "") && (filter)) { outString += "filter and trim."; } + + //you masked and filtered + if ((seqMask != "") && (filter)) { outString += "mask, filter and trim."; } + + outString += "\nWindow\tStartPos\tEndPos\n"; + it = trim.begin(); + for (int k = 0; k < windows.size()-1; k++) { + outString += toString(k+1) + "\t" + toString(spotMap[windows[k]-it->first]) + "\t" + toString(spotMap[windows[k]-it->first+windowSizes]) + "\n"; + } + + outString += toString(windows.size()) + "\t" + toString(spotMap[windows[windows.size()-1]-it->first]) + "\t" + toString(spotMap[it->second-it->first-1]) + "\n\n"; + + outString += "Window\tAvgQ\t(sdQ)\tAvgR\t(sdR)\tRatio\tAnova\n"; + for (int k = 0; k < windows.size(); k++) { + float ds = averageQuery[k] / averageRef[k]; + outString += toString(k+1) + "\t" + toString(averageQuery[k]) + "\t" + toString(sdQuery[k]) + "\t" + toString(averageRef[k]) + "\t" + toString(sdRef[k]) + "\t" + toString(ds) + "\t" + toString(anova[k]) + "\n"; + } + + //varRef + //varQuery + /* F test for differences among variances. + * varQuery is expected to be higher or similar than varRef */ + //float fs = varQuery[query] / varRef[query]; /* F-Snedecor, test for differences of variances */ + + bool results = false; + + //confidence limit, t - Student, anova + outString += "\nWindow\tConfidenceLimit\tt-Student\tAnova\n"; + + for (int k = 0; k < windows.size(); k++) { + string temp = ""; + if (isChimericConfidence[k]) { temp += "*\t"; } + else { temp += "\t"; } + + if (isChimericTStudent[k]) { temp += "*\t"; } + else { temp += "\t"; } + + if (isChimericANOVA[k]) { temp += "*\t"; } + else { temp += "\t"; } + + outString += toString(k+1) + "\t" + temp + "\n"; + + if (temp == "*\t*\t*\t") { results = true; } + } + outString += "\n"; + MPI_Status status; + int length = outString.length(); + char* buf2 = new char[length]; + memcpy(buf2, outString.c_str(), length); + + MPI_File_write_shared(out, buf2, length, MPI_CHAR, &status); + delete buf2; + + if (results) { + m->mothurOut(querySeq->getName() + " was found have at least one chimeric window."); m->mothurOutEndLine(); + outAccString += querySeq->getName() + "\n"; + + MPI_Status statusAcc; + length = outAccString.length(); + char* buf = new char[length]; + memcpy(buf, outAccString.c_str(), length); + + MPI_File_write_shared(outAcc, buf, length, MPI_CHAR, &statusAcc); + delete buf; + } + + //free memory + for (int i = 0; i < closest.size(); i++) { delete closest[i].seq; } + + return *querySeq; } catch(exception& e) { - errorOut(e, "Ccode", "print"); + m->errorOut(e, "Ccode", "print"); exit(1); } } //*************************************************************************************************************** +int Ccode::printMapping(string& output) { + try { + MPI_Status status; + int length = output.length(); + char* buf = new char[length]; + memcpy(buf, output.c_str(), length); + + MPI_File_write_shared(outMap, buf, length, MPI_CHAR, &status); + delete buf; + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "Ccode", "printMapping"); + exit(1); + } +} +#endif +//*************************************************************************************************************** int Ccode::getChimeras(Sequence* query) { try { @@ -155,6 +327,8 @@ int Ccode::getChimeras(Sequence* query) { //find closest matches to query closest = findClosest(query, numWanted); + if (m->control_pressed) { return 0; } + //initialize spotMap for (int i = 0; i < query->getAligned().length(); i++) { spotMap[i] = i; } @@ -177,7 +351,10 @@ int Ccode::getChimeras(Sequence* query) { createFilter(temp, 0.5); - for (int i = 0; i < temp.size(); i++) { runFilter(temp[i]); } + for (int i = 0; i < temp.size(); i++) { + if (m->control_pressed) { return 0; } + runFilter(temp[i]); + } //update spotMap map newMap; @@ -195,35 +372,37 @@ int Ccode::getChimeras(Sequence* query) { //trim sequences - this follows ccodes remove_extra_gaps trimSequences(query); - + if (m->control_pressed) { return 0; } //windows are equivalent to words - ccode paper recommends windows are between 5% and 20% on alignment length(). //Our default will be 10% and we will warn if user tries to use a window above or below these recommendations windows = findWindows(); - + if (m->control_pressed) { return 0; } //remove sequences that are more than 20% different and less than 0.5% different - may want to allow user to specify this later removeBadReferenceSeqs(closest); - + if (m->control_pressed) { return 0; } //find the averages for each querys references getAverageRef(closest); //fills sumRef, averageRef, sumSquaredRef and refCombo. getAverageQuery(closest, query); //fills sumQuery, averageQuery, sumSquaredQuery. - + if (m->control_pressed) { return 0; } //find the averages for each querys references findVarianceRef(); //fills varRef and sdRef also sets minimum error rate to 0.001 to avoid divide by 0. - + if (m->control_pressed) { return 0; } //find the averages for the query findVarianceQuery(); //fills varQuery and sdQuery also sets minimum error rate to 0.001 to avoid divide by 0. + if (m->control_pressed) { return 0; } determineChimeras(); //fills anova, isChimericConfidence, isChimericTStudent and isChimericANOVA. + if (m->control_pressed) { return 0; } return 0; } catch(exception& e) { - errorOut(e, "Ccode", "getChimeras"); + m->errorOut(e, "Ccode", "getChimeras"); exit(1); } } @@ -305,7 +484,7 @@ void Ccode::trimSequences(Sequence* query) { //check to make sure that is not whole seq - if ((rearPos - frontPos - 1) <= 0) { mothurOut("Error, when I trim your sequences, the entire sequence is trimmed."); mothurOutEndLine(); exit(1); } + if ((rearPos - frontPos - 1) <= 0) { m->mothurOut("Error, when I trim your sequences, the entire sequence is trimmed."); m->mothurOutEndLine(); exit(1); } map tempTrim; tempTrim[frontPos] = rearPos; @@ -325,7 +504,7 @@ void Ccode::trimSequences(Sequence* query) { spotMap = newMap; } catch(exception& e) { - errorOut(e, "Ccode", "trimSequences"); + m->errorOut(e, "Ccode", "trimSequences"); exit(1); } } @@ -340,13 +519,13 @@ vector Ccode::findWindows() { //default is wanted = 10% of total length if (windowSizes > length) { - mothurOut("You have slected a window larger than your sequence length after all filters, masks and trims have been done. I will use the default 10% of sequence length."); + m->mothurOut("You have slected a window larger than your sequence length after all filters, masks and trims have been done. I will use the default 10% of sequence length."); windowSizes = length / 10; }else if (windowSizes == 0) { windowSizes = length / 10; } else if (windowSizes > (length * 0.20)) { - mothurOut("You have selected a window that is larger than 20% of your sequence length. This is not recommended, but I will continue anyway."); mothurOutEndLine(); + m->mothurOut("You have selected a window that is larger than 20% of your sequence length. This is not recommended, but I will continue anyway."); m->mothurOutEndLine(); }else if (windowSizes < (length * 0.05)) { - mothurOut("You have selected a window that is smaller than 5% of your sequence length. This is not recommended, but I will continue anyway."); mothurOutEndLine(); + m->mothurOut("You have selected a window that is smaller than 5% of your sequence length. This is not recommended, but I will continue anyway."); m->mothurOutEndLine(); } //save starting points of each window @@ -360,7 +539,7 @@ vector Ccode::findWindows() { return win; } catch(exception& e) { - errorOut(e, "Ccode", "findWindows"); + m->errorOut(e, "Ccode", "findWindows"); exit(1); } } @@ -415,7 +594,7 @@ int Ccode::getDiff(string seqA, string seqB) { } catch(exception& e) { - errorOut(e, "Ccode", "getDiff"); + m->errorOut(e, "Ccode", "getDiff"); exit(1); } } @@ -485,12 +664,12 @@ void Ccode::removeBadReferenceSeqs(vector& seqs) { seqs = goodSeqs; }else { //warn, but dont remove any - mothurOut(querySeq->getName() + " does not have an adaquate number of reference sequences that are within 20% and 0.5% similarity. I will continue, but please check."); mothurOutEndLine(); + m->mothurOut(querySeq->getName() + " does not have an adaquate number of reference sequences that are within 20% and 0.5% similarity. I will continue, but please check."); m->mothurOutEndLine(); } } catch(exception& e) { - errorOut(e, "Ccode", "removeBadReferenceSeqs"); + m->errorOut(e, "Ccode", "removeBadReferenceSeqs"); exit(1); } } @@ -530,7 +709,7 @@ vector Ccode::findClosest(Sequence* q, int numWanted) { } catch(exception& e) { - errorOut(e, "Ccode", "findClosestSides"); + m->errorOut(e, "Ccode", "findClosestSides"); exit(1); } } @@ -617,7 +796,7 @@ void Ccode::getAverageRef(vector ref) { } catch(exception& e) { - errorOut(e, "Ccode", "getAverageRef"); + m->errorOut(e, "Ccode", "getAverageRef"); exit(1); } } @@ -686,7 +865,7 @@ void Ccode::getAverageQuery (vector ref, Sequence* query) { } } catch(exception& e) { - errorOut(e, "Ccode", "getAverageQuery"); + m->errorOut(e, "Ccode", "getAverageQuery"); exit(1); } } @@ -712,7 +891,7 @@ void Ccode::findVarianceRef() { } } catch(exception& e) { - errorOut(e, "Ccode", "findVarianceRef"); + m->errorOut(e, "Ccode", "findVarianceRef"); exit(1); } } @@ -737,7 +916,7 @@ void Ccode::findVarianceQuery() { } catch(exception& e) { - errorOut(e, "Ccode", "findVarianceQuery"); + m->errorOut(e, "Ccode", "findVarianceQuery"); exit(1); } } @@ -799,7 +978,7 @@ void Ccode::determineChimeras() { } catch(exception& e) { - errorOut(e, "Ccode", "determineChimeras"); + m->errorOut(e, "Ccode", "determineChimeras"); exit(1); } } @@ -844,13 +1023,13 @@ float Ccode::getT(int numseq) { else if (numseq > 2) tvalue = 2.353; else if (numseq > 1) tvalue = 2.920; else if (numseq <= 1) { - mothurOut("Two or more reference sequences are required, your data will be flawed.\n"); mothurOutEndLine(); + m->mothurOut("Two or more reference sequences are required, your data will be flawed.\n"); m->mothurOutEndLine(); } return tvalue; } catch(exception& e) { - errorOut(e, "Ccode", "getT"); + m->errorOut(e, "Ccode", "getT"); exit(1); } } @@ -896,13 +1075,13 @@ float Ccode::getF(int numseq) { else if (numseq > 1) fvalue = 18.5; else if (numseq > 0) fvalue = 161; else if (numseq <= 0) { - mothurOut("Two or more reference sequences are required, your data will be flawed.\n"); mothurOutEndLine(); + m->mothurOut("Two or more reference sequences are required, your data will be flawed.\n"); m->mothurOutEndLine(); } return fvalue; } catch(exception& e) { - errorOut(e, "Ccode", "getF"); + m->errorOut(e, "Ccode", "getF"); exit(1); } }