//***************************************************************************************************************
-Ccode::Ccode(string filename, string temp) { fastafile = filename; templateFile = temp; }
-//***************************************************************************************************************
+Ccode::Ccode(string filename, string temp, bool f, string mask, int win, int numW, string o) : Chimera() {
+ fastafile = filename;
+ outputDir = o;
+ templateFileName = temp; templateSeqs = readSeqs(temp);
+ setMask(mask);
+ filter = f;
+ window = win;
+ numWanted = numW;
+
+ distCalc = new eachGapDist();
+ decalc = new DeCalculator();
+
+ mapInfo = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "mapinfo";
+
+ #ifdef USE_MPI
+
+ //char* inFileName = new char[mapInfo.length()];
+ //memcpy(inFileName, mapInfo.c_str(), mapInfo.length());
+
+ char inFileName[1024];
+ strcpy(inFileName, mapInfo.c_str());
+
+ int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
+
+ MPI_File_open(MPI_COMM_WORLD, inFileName, outMode, MPI_INFO_NULL, &outMap); //comm, filename, mode, info, filepointer
+
+ //delete inFileName;
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+
+ if (pid == 0) {
+ string outString = "Place in masked, filtered and trimmed sequence\tPlace in original alignment\n";
+
+ MPI_Status status;
+ int length = outString.length();
+ char* buf2 = new char[length];
+ memcpy(buf2, outString.c_str(), length);
+
+ MPI_File_write_shared(outMap, buf2, length, MPI_CHAR, &status);
+ delete buf2;
+ }
+ #else
+
+ ofstream out2;
+ m->openOutputFile(mapInfo, out2);
+
+ out2 << "Place in masked, filtered and trimmed sequence\tPlace in original alignment" << endl;
+ out2.close();
+ #endif
+}
+//***************************************************************************************************************
Ccode::~Ccode() {
- try {
- for (int i = 0; i < querySeqs.size(); i++) { delete querySeqs[i]; }
- for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; }
- }
- catch(exception& e) {
- errorOut(e, "Ccode", "~Ccode");
- exit(1);
- }
+ delete distCalc;
+ delete decalc;
+
+ #ifdef USE_MPI
+ MPI_File_close(&outMap);
+ #endif
}
//***************************************************************************************************************
-void Ccode::print(ostream& out) {
+Sequence* Ccode::print(ostream& out, ostream& outAcc) {
try {
- mothurOutEndLine();
-
- string mapInfo = getRootName(fastafile) + "mapinfo";
ofstream out2;
- openOutputFile(mapInfo, out2);
+ m->openOutputFileAppend(mapInfo, out2);
- out2 << "Place in masked, filtered and trimmed sequence\tPlace in original alignment" << endl;
-
- for (int j = 0; j < querySeqs.size(); j++) {
- out2 << querySeqs[j]->getName() << endl;
- for (it = spotMap[j].begin(); it!= spotMap[j].end(); it++) {
- out2 << it->first << '\t' << it->second << endl;
- }
+ out2 << querySeq->getName() << endl;
+ for (it = spotMap.begin(); it!= spotMap.end(); it++) {
+ out2 << it->first << '\t' << it->second << endl;
}
out2.close();
+ out << querySeq->getName() << endl << endl << "Reference sequences used and distance to query:" << endl;
+
+ for (int j = 0; j < closest.size(); j++) {
+ out << closest[j].seq->getName() << '\t' << closest[j].dist << endl;
+ }
+ out << endl << endl;
+
+ //for each window
+ //window mapping info.
+ out << "Mapping information: ";
+ //you mask and did not filter
+ if ((seqMask != "") && (!filter)) { out << "mask and trim."; }
+
+ //you filtered and did not mask
+ if ((seqMask == "") && (filter)) { out << "filter and trim."; }
+
+ //you masked and filtered
+ if ((seqMask != "") && (filter)) { out << "mask, filter and trim."; }
+
+ out << endl << "Window\tStartPos\tEndPos" << endl;
+ it = trim.begin();
+ for (int k = 0; k < windows.size()-1; k++) {
+ out << k+1 << '\t' << spotMap[windows[k]-it->first] << '\t' << spotMap[windows[k]-it->first+windowSizes] << endl;
+ }
+
+ out << windows.size() << '\t' << spotMap[windows[windows.size()-1]-it->first] << '\t' << spotMap[it->second-it->first-1] << endl;
+ out << endl;
+ out << "Window\tAvgQ\t(sdQ)\tAvgR\t(sdR)\tRatio\tAnova" << endl;
+ for (int k = 0; k < windows.size(); k++) {
+ float ds = averageQuery[k] / averageRef[k];
+ out << k+1 << '\t' << averageQuery[k] << '\t' << sdQuery[k] << '\t' << averageRef[k] << '\t'<< sdRef[k] << '\t' << ds << '\t' << anova[k] << endl;
+ }
+ out << endl;
+
+ //varRef
+ //varQuery
+ /* F test for differences among variances.
+ * varQuery is expected to be higher or similar than varRef */
+ //float fs = varQuery[query] / varRef[query]; /* F-Snedecor, test for differences of variances */
+
+ bool results = false;
+
+ //confidence limit, t - Student, anova
+ out << "Window\tConfidenceLimit\tt-Student\tAnova" << endl;
+
+ for (int k = 0; k < windows.size(); k++) {
+ string temp = "";
+ if (isChimericConfidence[k]) { temp += "*\t"; }
+ else { temp += "\t"; }
+
+ if (isChimericTStudent[k]) { temp += "*\t"; }
+ else { temp += "\t"; }
+
+ if (isChimericANOVA[k]) { temp += "*\t"; }
+ else { temp += "\t"; }
+
+ out << k+1 << '\t' << temp << endl;
+
+ if (temp == "*\t*\t*\t") { results = true; }
+ }
+ out << endl;
+
+ if (results) {
+ m->mothurOut(querySeq->getName() + " was found have at least one chimeric window."); m->mothurOutEndLine();
+ outAcc << querySeq->getName() << endl;
+ }
+
+ //free memory
+ for (int i = 0; i < closest.size(); i++) { delete closest[i].seq; }
+
+ return NULL;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Ccode", "print");
+ exit(1);
+ }
+}
+#ifdef USE_MPI
+//***************************************************************************************************************
+Sequence* Ccode::print(MPI_File& out, MPI_File& outAcc) {
+ try {
- out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
+ string outMapString = "";
- out << "For full window mapping info refer to " << mapInfo << endl << endl;
+ outMapString += querySeq->getName() + "\n";
+ for (it = spotMap.begin(); it!= spotMap.end(); it++) {
+ outMapString += toString(it->first) + "\t" + toString(it->second) + "\n";
+ }
+ printMapping(outMapString);
+ outMapString = "";
- for (int i = 0; i < querySeqs.size(); i++) {
+ string outString = "";
+ string outAccString = "";
- out << querySeqs[i]->getName() << endl << endl << "Reference sequences used and distance to query:" << endl;
+ outString += querySeq->getName() + "\n\nReference sequences used and distance to query:\n";
- for (int j = 0; j < closest[i].size(); j++) {
- out << setprecision(3) << closest[i][j].seq->getName() << '\t' << closest[i][j].dist << endl;
- }
- out << endl << endl;
+ for (int j = 0; j < closest.size(); j++) {
+ outString += closest[j].seq->getName() + "\t" + toString(closest[j].dist) + "\n";
+ }
+ outString += "\n\nMapping information: ";
- //for each window
- //window mapping info.
- out << "Mapping information: ";
- //you mask and did not filter
- if ((seqMask != "") && (!filter)) { out << "mask and trim."; }
+ //for each window
+ //window mapping info.
+ //you mask and did not filter
+ if ((seqMask != "") && (!filter)) { outString += "mask and trim."; }
- //you filtered and did not mask
- if ((seqMask == "") && (filter)) { out << "filter and trim."; }
+ //you filtered and did not mask
+ if ((seqMask == "") && (filter)) { outString += "filter and trim."; }
- //you masked and filtered
- if ((seqMask != "") && (filter)) { out << "mask, filter and trim."; }
+ //you masked and filtered
+ if ((seqMask != "") && (filter)) { outString += "mask, filter and trim."; }
- out << endl << "Window\tStartPos\tEndPos" << endl;
- it = trim[i].begin();
-
- for (int k = 0; k < windows[i].size()-1; k++) {
- out << k+1 << '\t' << spotMap[i][windows[i][k]-it->first] << '\t' << spotMap[i][windows[i][k]-it->first+windowSizes[i]] << endl;
- }
-
- out << windows[i].size() << '\t' << spotMap[i][windows[i][windows[i].size()-1]-it->first] << '\t' << spotMap[i][it->second-it->first-1] << endl;
- out << endl;
+ outString += "\nWindow\tStartPos\tEndPos\n";
+ it = trim.begin();
+ for (int k = 0; k < windows.size()-1; k++) {
+ outString += toString(k+1) + "\t" + toString(spotMap[windows[k]-it->first]) + "\t" + toString(spotMap[windows[k]-it->first+windowSizes]) + "\n";
+ }
- out << "Window\tAvgQ\t(sdQ)\tAvgR\t(sdR)\tRatio\tAnova" << endl;
- for (int k = 0; k < windows[i].size(); k++) {
- float ds = averageQuery[i][k] / averageRef[i][k];
- out << k+1 << '\t' << averageQuery[i][k] << '\t' << sdQuery[i][k] << '\t' << averageRef[i][k] << '\t'<< sdRef[i][k] << '\t' << ds << '\t' << anova[i][k] << endl;
- }
- out << endl;
+ outString += toString(windows.size()) + "\t" + toString(spotMap[windows[windows.size()-1]-it->first]) + "\t" + toString(spotMap[it->second-it->first-1]) + "\n\n";
+
+ outString += "Window\tAvgQ\t(sdQ)\tAvgR\t(sdR)\tRatio\tAnova\n";
+ for (int k = 0; k < windows.size(); k++) {
+ float ds = averageQuery[k] / averageRef[k];
+ outString += toString(k+1) + "\t" + toString(averageQuery[k]) + "\t" + toString(sdQuery[k]) + "\t" + toString(averageRef[k]) + "\t" + toString(sdRef[k]) + "\t" + toString(ds) + "\t" + toString(anova[k]) + "\n";
+ }
- //varRef
- //varQuery
- /* F test for differences among variances.
- * varQuery is expected to be higher or similar than varRef */
- //float fs = varQuery[query][i] / varRef[query][i]; /* F-Snedecor, test for differences of variances */
+ //varRef
+ //varQuery
+ /* F test for differences among variances.
+ * varQuery is expected to be higher or similar than varRef */
+ //float fs = varQuery[query] / varRef[query]; /* F-Snedecor, test for differences of variances */
- bool results = false;
-
- //confidence limit, t - Student, anova
- out << "Window\tConfidenceLimit\tt-Student\tAnova" << endl;
+ bool results = false;
+
+ //confidence limit, t - Student, anova
+ outString += "\nWindow\tConfidenceLimit\tt-Student\tAnova\n";
- for (int k = 0; k < windows[i].size(); k++) {
- string temp = "";
- if (isChimericConfidence[i][k]) { temp += "*\t"; }
- else { temp += "\t"; }
+ for (int k = 0; k < windows.size(); k++) {
+ string temp = "";
+ if (isChimericConfidence[k]) { temp += "*\t"; }
+ else { temp += "\t"; }
- if (isChimericTStudent[i][k]) { temp += "*\t"; }
- else { temp += "\t"; }
+ if (isChimericTStudent[k]) { temp += "*\t"; }
+ else { temp += "\t"; }
- if (isChimericANOVA[i][k]) { temp += "*\t"; }
- else { temp += "\t"; }
+ if (isChimericANOVA[k]) { temp += "*\t"; }
+ else { temp += "\t"; }
- out << k+1 << '\t' << temp << endl;
+ outString += toString(k+1) + "\t" + temp + "\n";
- if (temp == "*\t*\t*\t") { results = true; }
- }
- out << endl;
-
- if (results) {
- mothurOut(querySeqs[i]->getName() + " was found have at least one chimeric window."); mothurOutEndLine();
- }
+ if (temp == "*\t*\t*\t") { results = true; }
+ }
+ outString += "\n";
+
+ MPI_Status status;
+ int length = outString.length();
+ char* buf2 = new char[length];
+ memcpy(buf2, outString.c_str(), length);
+
+ MPI_File_write_shared(out, buf2, length, MPI_CHAR, &status);
+ delete buf2;
+
+ if (results) {
+ m->mothurOut(querySeq->getName() + " was found have at least one chimeric window."); m->mothurOutEndLine();
+ outAccString += querySeq->getName() + "\n";
+
+ MPI_Status statusAcc;
+ length = outAccString.length();
+ char* buf = new char[length];
+ memcpy(buf, outAccString.c_str(), length);
+
+ MPI_File_write_shared(outAcc, buf, length, MPI_CHAR, &statusAcc);
+ delete buf;
}
+
+ //free memory
+ for (int i = 0; i < closest.size(); i++) { delete closest[i].seq; }
+
+ return NULL;
}
catch(exception& e) {
- errorOut(e, "Ccode", "print");
+ m->errorOut(e, "Ccode", "print");
exit(1);
}
}
-
//***************************************************************************************************************
-void Ccode::getChimeras() {
+int Ccode::printMapping(string& output) {
try {
-
- //read in query sequences and subject sequences
- mothurOut("Reading sequences and template file... "); cout.flush();
- querySeqs = readSeqs(fastafile);
- templateSeqs = readSeqs(templateFile);
- mothurOut("Done."); mothurOutEndLine();
-
- int numSeqs = querySeqs.size();
-
- closest.resize(numSeqs);
-
- refCombo.resize(numSeqs, 0);
- sumRef.resize(numSeqs);
- varRef.resize(numSeqs);
- varQuery.resize(numSeqs);
- sdRef.resize(numSeqs);
- sdQuery.resize(numSeqs);
- sumQuery.resize(numSeqs);
- sumSquaredRef.resize(numSeqs);
- sumSquaredQuery.resize(numSeqs);
- averageRef.resize(numSeqs);
- averageQuery.resize(numSeqs);
- anova.resize(numSeqs);
- isChimericConfidence.resize(numSeqs);
- isChimericTStudent.resize(numSeqs);
- isChimericANOVA.resize(numSeqs);
- trim.resize(numSeqs);
- spotMap.resize(numSeqs);
- windowSizes.resize(numSeqs, window);
- windows.resize(numSeqs);
-
- //break up file if needed
- int linesPerProcess = numSeqs / processors ;
-
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- //find breakup of sequences for all times we will Parallelize
- if (processors == 1) { lines.push_back(new linePair(0, numSeqs)); }
- else {
- //fill line pairs
- for (int i = 0; i < (processors-1); i++) {
- lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
- }
- //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
- int i = processors - 1;
- lines.push_back(new linePair((i*linesPerProcess), numSeqs));
- }
+ MPI_Status status;
+ int length = output.length();
+ char* buf = new char[length];
+ memcpy(buf, output.c_str(), length);
+
+ MPI_File_write_shared(outMap, buf, length, MPI_CHAR, &status);
+ delete buf;
- //find breakup of templatefile for quantiles
- if (processors == 1) { templateLines.push_back(new linePair(0, templateSeqs.size())); }
- else {
- for (int i = 0; i < processors; i++) {
- templateLines.push_back(new linePair());
- templateLines[i]->start = int (sqrt(float(i)/float(processors)) * templateSeqs.size());
- templateLines[i]->end = int (sqrt(float(i+1)/float(processors)) * templateSeqs.size());
- }
- }
- #else
- lines.push_back(new linePair(0, numSeqs));
- templateLines.push_back(new linePair(0, templateSeqs.size()));
- #endif
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Ccode", "printMapping");
+ exit(1);
+ }
+}
+#endif
+//***************************************************************************************************************
+int Ccode::getChimeras(Sequence* query) {
+ try {
- distCalc = new eachGapDist();
- decalc = new DeCalculator();
-
- //find closest
- if (processors == 1) {
- mothurOut("Finding top matches for sequences... "); cout.flush();
- closest = findClosest(lines[0]->start, lines[0]->end, numWanted);
- mothurOut("Done."); mothurOutEndLine();
- }else { createProcessesClosest(); }
+ closest.clear();
+ refCombo = 0;
+ sumRef.clear();
+ varRef.clear();
+ varQuery.clear();
+ sdRef.clear();
+ sdQuery.clear();
+ sumQuery.clear();
+ sumSquaredRef.clear();
+ sumSquaredQuery.clear();
+ averageRef.clear();
+ averageQuery.clear();
+ anova.clear();
+ isChimericConfidence.clear();
+ isChimericTStudent.clear();
+ isChimericANOVA.clear();
+ trim.clear();
+ spotMap.clear();
+ windowSizes = window;
+ windows.clear();
- //initialize spotMap
- for (int j = 0; j < numSeqs; j++) {
- for (int i = 0; i < querySeqs[0]->getAligned().length(); i++) {
- spotMap[j][i] = i;
- }
- }
+
+ querySeq = query;
+ //find closest matches to query
+ closest = findClosest(query, numWanted);
+
+ if (m->control_pressed) { return 0; }
+
+ //initialize spotMap
+ for (int i = 0; i < query->getAligned().length(); i++) { spotMap[i] = i; }
+
//mask sequences if the user wants to
if (seqMask != "") {
decalc->setMask(seqMask);
- //mask querys
- for (int i = 0; i < querySeqs.size(); i++) {
- decalc->runMask(querySeqs[i]);
- }
-
- //mask templates
- for (int i = 0; i < templateSeqs.size(); i++) {
- decalc->runMask(templateSeqs[i]);
- }
+ decalc->runMask(query);
- for (int i = 0; i < numSeqs; i++) {
- spotMap[i] = decalc->getMaskMap();
- }
+ //mask closest
+ for (int i = 0; i < closest.size(); i++) { decalc->runMask(closest[i].seq); }
+
+ spotMap = decalc->getMaskMap();
}
if (filter) {
- vector<Sequence*> temp = templateSeqs;
- for (int i = 0; i < querySeqs.size(); i++) { temp.push_back(querySeqs[i]); }
+ vector<Sequence*> temp;
+ for (int i = 0; i < closest.size(); i++) { temp.push_back(closest[i].seq); }
+ temp.push_back(query);
- createFilter(temp);
+ createFilter(temp, 0.5);
- runFilter(querySeqs);
- runFilter(templateSeqs);
+ for (int i = 0; i < temp.size(); i++) {
+ if (m->control_pressed) { return 0; }
+ runFilter(temp[i]);
+ }
//update spotMap
map<int, int> newMap;
int spot = 0;
- int j = 0;
for (int i = 0; i < filterString.length(); i++) {
if (filterString[i] == '1') {
//add to newMap
- newMap[spot] = spotMap[j][i];
+ newMap[spot] = spotMap[i];
spot++;
}
}
-
- for (int i = 0; i < numSeqs; i++) {
- spotMap[i] = newMap;
- }
+ spotMap = newMap;
}
//trim sequences - this follows ccodes remove_extra_gaps
- for (int i = 0; i < querySeqs.size(); i++) {
- trimSequences(i);
- }
+ trimSequences(query);
+ if (m->control_pressed) { return 0; }
//windows are equivalent to words - ccode paper recommends windows are between 5% and 20% on alignment length().
//Our default will be 10% and we will warn if user tries to use a window above or below these recommendations
- for (int i = 0; i < querySeqs.size(); i++) {
- windows[i] = findWindows(i);
- }
+ windows = findWindows();
+ if (m->control_pressed) { return 0; }
//remove sequences that are more than 20% different and less than 0.5% different - may want to allow user to specify this later
- if (processors == 1) {
- for (int i = 0; i < closest.size(); i++) {
- removeBadReferenceSeqs(closest[i], i);
- }
- }else { createProcessesRemoveBad(); }
-
+ removeBadReferenceSeqs(closest);
+ if (m->control_pressed) { return 0; }
- if (processors == 1) {
- //find the averages for each querys references
- for (int i = 0; i < numSeqs; i++) {
- getAverageRef(closest[i], i); //fills sumRef[i], averageRef[i], sumSquaredRef[i] and refCombo[i].
- }
-
- //find the averages for the query
- for (int i = 0; i < numSeqs; i++) {
- getAverageQuery(closest[i], i); //fills sumQuery[i], averageQuery[i], sumSquaredQuery[i].
- }
- }else { createProcessesAverages(); }
+ //find the averages for each querys references
+ getAverageRef(closest); //fills sumRef, averageRef, sumSquaredRef and refCombo.
+ getAverageQuery(closest, query); //fills sumQuery, averageQuery, sumSquaredQuery.
+ if (m->control_pressed) { return 0; }
- if (processors == 1) {
- //find the averages for each querys references
- for (int i = 0; i < numSeqs; i++) {
- findVarianceRef(i); //fills varRef[i] and sdRef[i] also sets minimum error rate to 0.001 to avoid divide by 0.
- }
+ //find the averages for each querys references
+ findVarianceRef(); //fills varRef and sdRef also sets minimum error rate to 0.001 to avoid divide by 0.
+ if (m->control_pressed) { return 0; }
- //find the averages for the query
- for (int i = 0; i < numSeqs; i++) {
- findVarianceQuery(i); //fills varQuery[i] and sdQuery[i] also sets minimum error rate to 0.001 to avoid divide by 0.
- }
- }else { createProcessesVariances(); }
-
- if (processors == 1) {
- for (int i = 0; i < numSeqs; i++) {
- determineChimeras(i); //fills anova, isChimericConfidence[i], isChimericTStudent[i] and isChimericANOVA[i].
- }
- }else { createProcessesDetermine(); }
+ //find the averages for the query
+ findVarianceQuery(); //fills varQuery and sdQuery also sets minimum error rate to 0.001 to avoid divide by 0.
+ if (m->control_pressed) { return 0; }
+
+ determineChimeras(); //fills anova, isChimericConfidence, isChimericTStudent and isChimericANOVA.
+ if (m->control_pressed) { return 0; }
- //free memory
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; }
- for (int i = 0; i < templateLines.size(); i++) { delete templateLines[i]; }
- delete distCalc;
- delete decalc;
-
+ return 0;
}
catch(exception& e) {
- errorOut(e, "Ccode", "getChimeras");
+ m->errorOut(e, "Ccode", "getChimeras");
exit(1);
}
}
/***************************************************************************************************************/
//ccode algo says it does this to "Removes the initial and final gaps to avoid biases due to incomplete sequences."
-void Ccode::trimSequences(int query) {
+void Ccode::trimSequences(Sequence* query) {
try {
int frontPos = 0; //should contain first position in all seqs that is not a gap character
- int rearPos = querySeqs[query]->getAligned().length();
+ int rearPos = query->getAligned().length();
//********find first position in closest seqs that is a non gap character***********//
//find first position all query seqs that is a non gap character
- for (int i = 0; i < closest[query].size(); i++) {
+ for (int i = 0; i < closest.size(); i++) {
- string aligned = closest[query][i].seq->getAligned();
+ string aligned = closest[i].seq->getAligned();
int pos = 0;
//find first spot in this seq
}
//find first position all querySeq[query] that is a non gap character
- string aligned = querySeqs[query]->getAligned();
+ string aligned = query->getAligned();
int pos = 0;
//find first spot in this seq
//********find last position in closest seqs that is a non gap character***********//
- for (int i = 0; i < closest[query].size(); i++) {
+ for (int i = 0; i < closest.size(); i++) {
- string aligned = closest[query][i].seq->getAligned();
+ string aligned = closest[i].seq->getAligned();
int pos = aligned.length();
//find first spot in this seq
}
//find last position all querySeqs[query] that is a non gap character
- aligned = querySeqs[query]->getAligned();
+ aligned = query->getAligned();
pos = aligned.length();
//find first spot in this seq
//check to make sure that is not whole seq
- if ((rearPos - frontPos - 1) <= 0) { mothurOut("Error, when I trim your sequences, the entire sequence is trimmed."); mothurOutEndLine(); exit(1); }
+ if ((rearPos - frontPos - 1) <= 0) { m->mothurOut("Error, when I trim your sequences, the entire sequence is trimmed."); m->mothurOutEndLine(); exit(1); }
map<int, int> tempTrim;
tempTrim[frontPos] = rearPos;
//save trimmed locations
- trim[query] = tempTrim;
+ trim = tempTrim;
//update spotMask
map<int, int> newMap;
for (int i = frontPos; i < rearPos; i++) {
//add to newMap
-//cout << query << '\t' << i << '\t' << spotMap[query][i] << endl;
- newMap[spot] = spotMap[query][i];
+ newMap[spot] = spotMap[i];
spot++;
}
-
- //for (it = newMap.begin(); it!=newMap.end(); it++) {
- //cout << query << '\t' << it->first << '\t' << it->second << endl;
- //}
-
- spotMap[query] = newMap;
-
-
+ spotMap = newMap;
}
catch(exception& e) {
- errorOut(e, "Ccode", "trimSequences");
+ m->errorOut(e, "Ccode", "trimSequences");
exit(1);
}
-
}
/***************************************************************************************************************/
-vector<int> Ccode::findWindows(int query) {
+vector<int> Ccode::findWindows() {
try {
vector<int> win;
- it = trim[query].begin();
+ it = trim.begin();
int length = it->second - it->first;
-
+
//default is wanted = 10% of total length
- if (windowSizes[query] > length) {
- mothurOut("You have slected a window larger than your sequence length after all filters, masks and trims have been done. I will use the default 10% of sequence length.");
- windowSizes[query] = length / 10;
- }else if (windowSizes[query] == 0) { windowSizes[query] = length / 10; }
- else if (windowSizes[query] > (length / 20)) {
- mothurOut("You have selected a window that is larger than 20% of your sequence length. This is not recommended, but I will continue anyway."); mothurOutEndLine();
- }else if (windowSizes[query] < (length / 5)) {
- mothurOut("You have selected a window that is smaller than 5% of your sequence length. This is not recommended, but I will continue anyway."); mothurOutEndLine();
+ if (windowSizes > length) {
+ m->mothurOut("You have slected a window larger than your sequence length after all filters, masks and trims have been done. I will use the default 10% of sequence length.");
+ windowSizes = length / 10;
+ }else if (windowSizes == 0) { windowSizes = length / 10; }
+ else if (windowSizes > (length * 0.20)) {
+ m->mothurOut("You have selected a window that is larger than 20% of your sequence length. This is not recommended, but I will continue anyway."); m->mothurOutEndLine();
+ }else if (windowSizes < (length * 0.05)) {
+ m->mothurOut("You have selected a window that is smaller than 5% of your sequence length. This is not recommended, but I will continue anyway."); m->mothurOutEndLine();
}
//save starting points of each window
- for (int m = it->first; m < (it->second-windowSizes[query]); m+=windowSizes[query]) { win.push_back(m); }
+ for (int m = it->first; m < (it->second-windowSizes); m+=windowSizes) { win.push_back(m); }
//save last window
if (win[win.size()-1] < (it->first+length)) {
- win.push_back(win[win.size()-1]+windowSizes[query]); // ex. string length is 115, window is 25, without this you would get 0, 25, 50, 75
+ win.push_back(win[win.size()-1]+windowSizes); // ex. string length is 115, window is 25, without this you would get 0, 25, 50, 75
} //with this you would get 1,25,50,75,100
-
return win;
-
}
catch(exception& e) {
- errorOut(e, "Ccode", "findWindows");
+ m->errorOut(e, "Ccode", "findWindows");
exit(1);
}
}
}
catch(exception& e) {
- errorOut(e, "Ccode", "getDiff");
+ m->errorOut(e, "Ccode", "getDiff");
exit(1);
}
}
//***************************************************************************************************************
//tried to make this look most like ccode original implementation
-void Ccode::removeBadReferenceSeqs(vector<SeqDist>& seqs, int query) {
+void Ccode::removeBadReferenceSeqs(vector<SeqDist>& seqs) {
try {
vector< vector<int> > numDiffBases;
//initialize to 0
for (int i = 0; i < numDiffBases.size(); i++) { numDiffBases[i].resize(seqs.size(),0); }
- it = trim[query].begin();
+ it = trim.begin();
int length = it->second - it->first;
//calc differences from each sequence to everyother seq in the set
seqs = goodSeqs;
}else { //warn, but dont remove any
- mothurOut(querySeqs[query]->getName() + " does not have an adaquate number of reference sequences that are within 20% and 0.5% similarity. I will continue, but please check."); mothurOutEndLine();
+ m->mothurOut(querySeq->getName() + " does not have an adaquate number of reference sequences that are within 20% and 0.5% similarity. I will continue, but please check."); m->mothurOutEndLine();
}
-
}
catch(exception& e) {
- errorOut(e, "Ccode", "removeBadReferenceSeqs");
+ m->errorOut(e, "Ccode", "removeBadReferenceSeqs");
exit(1);
}
}
//***************************************************************************************************************
-vector< vector<SeqDist> > Ccode::findClosest(int start, int end, int numWanted) {
+//makes copy of templateseq for filter
+vector<SeqDist> Ccode::findClosest(Sequence* q, int numWanted) {
try{
- vector< vector<SeqDist> > topMatches; topMatches.resize(querySeqs.size());
-
- float smallestOverall, smallestLeft, smallestRight;
- smallestOverall = 1000; smallestLeft = 1000; smallestRight = 1000;
+ vector<SeqDist> topMatches;
- //for each sequence in querySeqs - find top matches to use as reference
- for(int j = start; j < end; j++){
+ Sequence query = *(q);
- Sequence query = *(querySeqs[j]);
+ //calc distance to each sequence in template seqs
+ for (int i = 0; i < templateSeqs.size(); i++) {
- vector<SeqDist> distances;
-
- //calc distance to each sequence in template seqs
- for (int i = 0; i < templateSeqs.size(); i++) {
-
- Sequence ref = *(templateSeqs[i]);
+ Sequence ref = *(templateSeqs[i]);
- //find overall dist
- distCalc->calcDist(query, ref);
- float dist = distCalc->getDist();
+ //find overall dist
+ distCalc->calcDist(query, ref);
+ float dist = distCalc->getDist();
- //save distance
- SeqDist temp;
- temp.seq = templateSeqs[i];
- temp.dist = dist;
+ //save distance
+ SeqDist temp;
+ temp.seq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
+ temp.dist = dist;
- distances.push_back(temp);
- }
-
- sort(distances.begin(), distances.end(), compareSeqDist);
-
- //save the number of top matches wanted
- for (int h = 0; h < numWanted; h++) {
- topMatches[j].push_back(distances[h]);
- }
+ topMatches.push_back(temp);
}
+ sort(topMatches.begin(), topMatches.end(), compareSeqDist);
+
+ for (int i = numWanted; i < topMatches.size(); i++) { delete topMatches[i].seq; }
+
+ topMatches.resize(numWanted);
+
return topMatches;
}
catch(exception& e) {
- errorOut(e, "Ccode", "findClosestSides");
+ m->errorOut(e, "Ccode", "findClosestSides");
exit(1);
}
}
/**************************************************************************************************/
//find the distances from each reference sequence to every other reference sequence for each window for this query
-void Ccode::getAverageRef(vector<SeqDist> ref, int query) {
+void Ccode::getAverageRef(vector<SeqDist> ref) {
try {
- vector< vector< vector<int> > > diffs; //diffs[0][1][2] is the number of differences between ref seq 0 and ref seq 1 at window 2.
+ vector< vector< vector<int> > > diffs; //diffs[0][1][2] is the number of differences between ref seq 0 and ref seq 1 at window 2.
//initialize diffs vector
diffs.resize(ref.size());
for (int i = 0; i < diffs.size(); i++) {
diffs[i].resize(ref.size());
for (int j = 0; j < diffs[i].size(); j++) {
- diffs[i][j].resize(windows[query].size(), 0);
+ diffs[i][j].resize(windows.size(), 0);
}
}
- it = trim[query].begin();
+ it = trim.begin();
//find the distances from each reference sequence to every other reference sequence for each window for this query
for (int i = 0; i < ref.size(); i++) {
string refJ = ref[j].seq->getAligned();
- for (int k = 0; k < windows[query].size(); k++) {
+ for (int k = 0; k < windows.size(); k++) {
string refIWindowk, refJWindowk;
- if (k < windows[query].size()-1) {
+ if (k < windows.size()-1) {
//get window strings
- refIWindowk = refI.substr(windows[query][k], windowSizes[query]);
- refJWindowk = refJ.substr(windows[query][k], windowSizes[query]);
+ refIWindowk = refI.substr(windows[k], windowSizes);
+ refJWindowk = refJ.substr(windows[k], windowSizes);
}else { //last window may be smaller than rest - see findwindows
//get window strings
- refIWindowk = refI.substr(windows[query][k], (it->second-windows[query][k]));
- refJWindowk = refJ.substr(windows[query][k], (it->second-windows[query][k]));
+ refIWindowk = refI.substr(windows[k], (it->second-windows[k]));
+ refJWindowk = refJ.substr(windows[k], (it->second-windows[k]));
}
//find differences
int diff = getDiff(refIWindowk, refJWindowk);
-//cout << i << '\t' << j << '\t' << k << '\t' << diff << endl;
+
//save differences in [i][j][k] and [j][i][k] since they are the same
diffs[i][j][k] = diff;
diffs[j][i][k] = diff;
}//i
//initialize sumRef for this query
- sumRef[query].resize(windows[query].size(), 0);
- sumSquaredRef[query].resize(windows[query].size(), 0);
- averageRef[query].resize(windows[query].size(), 0);
+ sumRef.resize(windows.size(), 0);
+ sumSquaredRef.resize(windows.size(), 0);
+ averageRef.resize(windows.size(), 0);
//find the sum of the differences for hte reference sequences
for (int i = 0; i < diffs.size(); i++) {
for (int j = 0; j < i; j++) {
//increment this querys reference sequences combos
- refCombo[query]++;
+ refCombo++;
for (int k = 0; k < diffs[i][j].size(); k++) {
- sumRef[query][k] += diffs[i][j][k];
- sumSquaredRef[query][k] += (diffs[i][j][k]*diffs[i][j][k]);
+ sumRef[k] += diffs[i][j][k];
+ sumSquaredRef[k] += (diffs[i][j][k]*diffs[i][j][k]);
}//k
}//j
//find the average of the differences for the references for each window
- for (int i = 0; i < windows[query].size(); i++) {
- averageRef[query][i] = sumRef[query][i] / (float) refCombo[query];
+ for (int i = 0; i < windows.size(); i++) {
+ averageRef[i] = sumRef[i] / (float) refCombo;
}
}
catch(exception& e) {
- errorOut(e, "Ccode", "getAverageRef");
+ m->errorOut(e, "Ccode", "getAverageRef");
exit(1);
}
}
/**************************************************************************************************/
-void Ccode::getAverageQuery (vector<SeqDist> ref, int query) {
+void Ccode::getAverageQuery (vector<SeqDist> ref, Sequence* query) {
try {
vector< vector<int> > diffs; //diffs[1][2] is the number of differences between querySeqs[query] and ref seq 1 at window 2.
//initialize diffs vector
diffs.resize(ref.size());
for (int j = 0; j < diffs.size(); j++) {
- diffs[j].resize(windows[query].size(), 0);
+ diffs[j].resize(windows.size(), 0);
}
- it = trim[query].begin();
+ it = trim.begin();
- string refQuery = querySeqs[query]->getAligned();
+ string refQuery = query->getAligned();
//j<i, so you don't find distances from i to j and then j to i.
for (int j = 0; j < ref.size(); j++) {
string refJ = ref[j].seq->getAligned();
- for (int k = 0; k < windows[query].size(); k++) {
+ for (int k = 0; k < windows.size(); k++) {
string QueryWindowk, refJWindowk;
- if (k < windows[query].size()-1) {
+ if (k < windows.size()-1) {
//get window strings
- QueryWindowk = refQuery.substr(windows[query][k], windowSizes[query]);
- refJWindowk = refJ.substr(windows[query][k], windowSizes[query]);
+ QueryWindowk = refQuery.substr(windows[k], windowSizes);
+ refJWindowk = refJ.substr(windows[k], windowSizes);
}else { //last window may be smaller than rest - see findwindows
//get window strings
- QueryWindowk = refQuery.substr(windows[query][k], (it->second-windows[query][k]));
- refJWindowk = refJ.substr(windows[query][k], (it->second-windows[query][k]));
+ QueryWindowk = refQuery.substr(windows[k], (it->second-windows[k]));
+ refJWindowk = refJ.substr(windows[k], (it->second-windows[k]));
}
//find differences
int diff = getDiff(QueryWindowk, refJWindowk);
-//cout << j << '\t' << k << '\t' << diff << endl;
+
//save differences
diffs[j][k] = diff;
//initialize sumRef for this query
- sumQuery[query].resize(windows[query].size(), 0);
- sumSquaredQuery[query].resize(windows[query].size(), 0);
- averageQuery[query].resize(windows[query].size(), 0);
+ sumQuery.resize(windows.size(), 0);
+ sumSquaredQuery.resize(windows.size(), 0);
+ averageQuery.resize(windows.size(), 0);
//find the sum of the differences
for (int j = 0; j < diffs.size(); j++) {
for (int k = 0; k < diffs[j].size(); k++) {
- sumQuery[query][k] += diffs[j][k];
- sumSquaredQuery[query][k] += (diffs[j][k]*diffs[j][k]);
+ sumQuery[k] += diffs[j][k];
+ sumSquaredQuery[k] += (diffs[j][k]*diffs[j][k]);
}//k
}//j
//find the average of the differences for the references for each window
- for (int i = 0; i < windows[query].size(); i++) {
- averageQuery[query][i] = sumQuery[query][i] / (float) ref.size();
+ for (int i = 0; i < windows.size(); i++) {
+ averageQuery[i] = sumQuery[i] / (float) ref.size();
}
-
-
}
catch(exception& e) {
- errorOut(e, "Ccode", "getAverageQuery");
+ m->errorOut(e, "Ccode", "getAverageQuery");
exit(1);
}
}
/**************************************************************************************************/
-void Ccode::findVarianceRef (int query) {
+void Ccode::findVarianceRef() {
try {
- varRef[query].resize(windows[query].size(), 0);
- sdRef[query].resize(windows[query].size(), 0);
+ varRef.resize(windows.size(), 0);
+ sdRef.resize(windows.size(), 0);
//for each window
- for (int i = 0; i < windows[query].size(); i++) {
- varRef[query][i] = (sumSquaredRef[query][i] - ((sumRef[query][i]*sumRef[query][i])/(float)refCombo[query])) / (float)(refCombo[query]-1);
- sdRef[query][i] = sqrt(varRef[query][i]);
+ for (int i = 0; i < windows.size(); i++) {
+ varRef[i] = (sumSquaredRef[i] - ((sumRef[i]*sumRef[i])/(float)refCombo)) / (float)(refCombo-1);
+ sdRef[i] = sqrt(varRef[i]);
//set minimum error rate to 0.001 - to avoid potential divide by zero - not sure if this is necessary but it follows ccode implementation
- if (averageRef[query][i] < 0.001) { averageRef[query][i] = 0.001; }
- if (sumRef[query][i] < 0.001) { sumRef[query][i] = 0.001; }
- if (varRef[query][i] < 0.001) { varRef[query][i] = 0.001; }
- if (sumSquaredRef[query][i] < 0.001) { sumSquaredRef[query][i] = 0.001; }
- if (sdRef[query][i] < 0.001) { sdRef[query][i] = 0.001; }
+ if (averageRef[i] < 0.001) { averageRef[i] = 0.001; }
+ if (sumRef[i] < 0.001) { sumRef[i] = 0.001; }
+ if (varRef[i] < 0.001) { varRef[i] = 0.001; }
+ if (sumSquaredRef[i] < 0.001) { sumSquaredRef[i] = 0.001; }
+ if (sdRef[i] < 0.001) { sdRef[i] = 0.001; }
}
}
catch(exception& e) {
- errorOut(e, "Ccode", "findVarianceRef");
+ m->errorOut(e, "Ccode", "findVarianceRef");
exit(1);
}
}
/**************************************************************************************************/
-void Ccode::findVarianceQuery (int query) {
+void Ccode::findVarianceQuery() {
try {
- varQuery[query].resize(windows[query].size(), 0);
- sdQuery[query].resize(windows[query].size(), 0);
+ varQuery.resize(windows.size(), 0);
+ sdQuery.resize(windows.size(), 0);
//for each window
- for (int i = 0; i < windows[query].size(); i++) {
- varQuery[query][i] = (sumSquaredQuery[query][i] - ((sumQuery[query][i]*sumQuery[query][i])/(float) closest[query].size())) / (float) (closest[query].size()-1);
- sdQuery[query][i] = sqrt(varQuery[query][i]);
+ for (int i = 0; i < windows.size(); i++) {
+ varQuery[i] = (sumSquaredQuery[i] - ((sumQuery[i]*sumQuery[i])/(float) closest.size())) / (float) (closest.size()-1);
+ sdQuery[i] = sqrt(varQuery[i]);
//set minimum error rate to 0.001 - to avoid potential divide by zero - not sure if this is necessary but it follows ccode implementation
- if (averageQuery[query][i] < 0.001) { averageQuery[query][i] = 0.001; }
- if (sumQuery[query][i] < 0.001) { sumQuery[query][i] = 0.001; }
- if (varQuery[query][i] < 0.001) { varQuery[query][i] = 0.001; }
- if (sumSquaredQuery[query][i] < 0.001) { sumSquaredQuery[query][i] = 0.001; }
- if (sdQuery[query][i] < 0.001) { sdQuery[query][i] = 0.001; }
+ if (averageQuery[i] < 0.001) { averageQuery[i] = 0.001; }
+ if (sumQuery[i] < 0.001) { sumQuery[i] = 0.001; }
+ if (varQuery[i] < 0.001) { varQuery[i] = 0.001; }
+ if (sumSquaredQuery[i] < 0.001) { sumSquaredQuery[i] = 0.001; }
+ if (sdQuery[i] < 0.001) { sdQuery[i] = 0.001; }
}
}
catch(exception& e) {
- errorOut(e, "Ccode", "findVarianceQuery");
+ m->errorOut(e, "Ccode", "findVarianceQuery");
exit(1);
}
}
/**************************************************************************************************/
-void Ccode::determineChimeras (int query) {
+void Ccode::determineChimeras() {
try {
- isChimericConfidence[query].resize(windows[query].size(), false);
- isChimericTStudent[query].resize(windows[query].size(), false);
- isChimericANOVA[query].resize(windows[query].size(), false);
- anova[query].resize(windows[query].size());
+ isChimericConfidence.resize(windows.size(), false);
+ isChimericTStudent.resize(windows.size(), false);
+ isChimericANOVA.resize(windows.size(), false);
+ anova.resize(windows.size());
//for each window
- for (int i = 0; i < windows[query].size(); i++) {
+ for (int i = 0; i < windows.size(); i++) {
//get confidence limits
- float t = getT(closest[query].size()-1); //how many seqs you are comparing to this querySeq
- float dsUpper = (averageQuery[query][i] + (t * sdQuery[query][i])) / averageRef[query][i];
- float dsLower = (averageQuery[query][i] - (t * sdQuery[query][i])) / averageRef[query][i];
-//cout << t << '\t' << "ds upper = " << dsUpper << " dsLower = " << dsLower << endl;
- if ((dsUpper > 1.0) && (dsLower > 1.0) && (averageQuery[query][i] > averageRef[query][i])) { /* range does not include 1 */
- isChimericConfidence[query][i] = true; /* significantly higher at P<0.05 */
-//cout << i << " made it here" << endl;
+ float t = getT(closest.size()-1); //how many seqs you are comparing to this querySeq
+ float dsUpper = (averageQuery[i] + (t * sdQuery[i])) / averageRef[i];
+ float dsLower = (averageQuery[i] - (t * sdQuery[i])) / averageRef[i];
+
+ if ((dsUpper > 1.0) && (dsLower > 1.0) && (averageQuery[i] > averageRef[i])) { /* range does not include 1 */
+ isChimericConfidence[i] = true; /* significantly higher at P<0.05 */
+
}
//student t test
- int degreeOfFreedom = refCombo[query] + closest[query].size() - 2;
- float denomForT = (((refCombo[query]-1) * varQuery[query][i] + (closest[query].size() - 1) * varRef[query][i]) / (float) degreeOfFreedom) * ((refCombo[query] + closest[query].size()) / (float) (refCombo[query] * closest[query].size())); /* denominator, without sqrt(), for ts calculations */
+ int degreeOfFreedom = refCombo + closest.size() - 2;
+ float denomForT = (((refCombo-1) * varQuery[i] + (closest.size() - 1) * varRef[i]) / (float) degreeOfFreedom) * ((refCombo + closest.size()) / (float) (refCombo * closest.size())); /* denominator, without sqrt(), for ts calculations */
- float ts = fabs((averageQuery[query][i] - averageRef[query][i]) / (sqrt(denomForT))); /* value of ts for t-student test */
+ float ts = fabs((averageQuery[i] - averageRef[i]) / (sqrt(denomForT))); /* value of ts for t-student test */
t = getT(degreeOfFreedom);
-//cout << i << '\t' << t << '\t' << ts << endl;
- if ((ts >= t) && (averageQuery[query][i] > averageRef[query][i])) {
- isChimericTStudent[query][i] = true; /* significantly higher at P<0.05 */
+
+ if ((ts >= t) && (averageQuery[i] > averageRef[i])) {
+ isChimericTStudent[i] = true; /* significantly higher at P<0.05 */
}
//anova test
- float value1 = sumQuery[query][i] + sumRef[query][i];
- float value2 = sumSquaredQuery[query][i] + sumSquaredRef[query][i];
- float value3 = ((sumQuery[query][i]*sumQuery[query][i]) / (float) (closest[query].size())) + ((sumRef[query][i] * sumRef[query][i]) / (float) refCombo[query]);
- float value4 = (value1 * value1) / ( (float) (closest[query].size() + refCombo[query]) );
+ float value1 = sumQuery[i] + sumRef[i];
+ float value2 = sumSquaredQuery[i] + sumSquaredRef[i];
+ float value3 = ((sumQuery[i]*sumQuery[i]) / (float) (closest.size())) + ((sumRef[i] * sumRef[i]) / (float) refCombo);
+ float value4 = (value1 * value1) / ( (float) (closest.size() + refCombo) );
float value5 = value2 - value4;
float value6 = value3 - value4;
float value7 = value5 - value6;
float f = getF(degreeOfFreedom);
- if ((anovaValue >= f) && (averageQuery[query][i] > averageRef[query][i])) {
- isChimericANOVA[query][i] = true; /* significant P<0.05 */
+ if ((anovaValue >= f) && (averageQuery[i] > averageRef[i])) {
+ isChimericANOVA[i] = true; /* significant P<0.05 */
}
if (isnan(anovaValue) || isinf(anovaValue)) { anovaValue = 0.0; }
- anova[query][i] = anovaValue;
+ anova[i] = anovaValue;
}
}
catch(exception& e) {
- errorOut(e, "Ccode", "determineChimeras");
+ m->errorOut(e, "Ccode", "determineChimeras");
exit(1);
}
}
else if (numseq > 2) tvalue = 2.353;
else if (numseq > 1) tvalue = 2.920;
else if (numseq <= 1) {
- mothurOut("Two or more reference sequences are required, your data will be flawed.\n"); mothurOutEndLine();
+ m->mothurOut("Two or more reference sequences are required, your data will be flawed.\n"); m->mothurOutEndLine();
}
return tvalue;
}
catch(exception& e) {
- errorOut(e, "Ccode", "getT");
+ m->errorOut(e, "Ccode", "getT");
exit(1);
}
}
else if (numseq > 1) fvalue = 18.5;
else if (numseq > 0) fvalue = 161;
else if (numseq <= 0) {
- mothurOut("Two or more reference sequences are required, your data will be flawed.\n"); mothurOutEndLine();
+ m->mothurOut("Two or more reference sequences are required, your data will be flawed.\n"); m->mothurOutEndLine();
}
return fvalue;
}
catch(exception& e) {
- errorOut(e, "Ccode", "getF");
- exit(1);
- }
-}
-
-/**************************************************************************************************/
-void Ccode::createProcessesClosest() {
- try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- int process = 0;
- vector<int> processIDS;
-
- //loop through and create all the processes you want
- while (process != processors) {
- int pid = fork();
-
- if (pid > 0) {
- processIDS.push_back(pid);
- process++;
- }else if (pid == 0){
-
- mothurOut("Finding top matches for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
- closest = findClosest(lines[process]->start, lines[process]->end, numWanted);
- mothurOut("Done finding top matches for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
-
- //write out data to file so parent can read it
- ofstream out;
- string s = toString(getpid()) + ".temp";
- openOutputFile(s, out);
-
- //output pairs
- for (int i = lines[process]->start; i < lines[process]->end; i++) {
- for (int j = 0; j < closest[i].size(); j++) {
- closest[i][j].seq->printSequence(out);
- }
- }
- out << ">" << endl; //to stop sequence read
-
- for (int i = lines[process]->start; i < lines[process]->end; i++) {
- for (int j = 0; j < closest[i].size(); j++) {
- out << closest[i][j].dist << '\t';
- }
- out << endl;
- }
- out.close();
- exit(0);
- }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
- }
-
- //force parent to wait until all the processes are done
- for (int i=0;i<processors;i++) {
- int temp = processIDS[i];
- wait(&temp);
- }
-
- //get data created by processes
- for (int i=0;i<processors;i++) {
- ifstream in;
- string s = toString(processIDS[i]) + ".temp";
- openInputFile(s, in);
-
- vector< vector<Sequence*> > tempClosest; tempClosest.resize(querySeqs.size());
- //get pairs
- for (int k = lines[i]->start; k < lines[i]->end; k++) {
- vector<Sequence*> tempVector;
-
- for (int j = 0; j < numWanted; j++) {
-
- Sequence* temp = new Sequence(in);
- gobble(in);
-
- tempVector.push_back(temp);
- }
-
- tempClosest[k] = tempVector;
- }
-
- string junk;
- in >> junk; gobble(in); // to get ">"
-
- vector< vector<float> > dists; dists.resize(querySeqs.size());
-
- for (int k = lines[i]->start; k < lines[i]->end; k++) {
- dists[k].resize(numWanted);
- for (int j = 0; j < numWanted; j++) {
- in >> dists[k][j];
- }
- gobble(in);
-
- }
-
- for (int k = lines[i]->start; k < lines[i]->end; k++) {
- closest[k].resize(numWanted);
- for (int j = 0; j < closest[k].size(); j++) {
- closest[k][j].seq = tempClosest[k][j];
- closest[k][j].dist = dists[k][j];
- }
- }
-
- in.close();
- remove(s.c_str());
- }
-
-
-#else
- closest = findClosest(lines[0]->start, lines[0]->end, numWanted);
-#endif
-
- }
- catch(exception& e) {
- errorOut(e, "Ccode", "createProcessesClosest");
- exit(1);
- }
-}
-
-//***************************************************************************************************************
-void Ccode::createProcessesRemoveBad() {
- try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- int process = 0;
- vector<int> processIDS;
-
- //loop through and create all the processes you want
- while (process != processors) {
- int pid = fork();
-
- if (pid > 0) {
- processIDS.push_back(pid);
- process++;
- }else if (pid == 0){
-
- for (int i = lines[process]->start; i < lines[process]->end; i++) {
- removeBadReferenceSeqs(closest[i], i);
- }
-
- //write out data to file so parent can read it
- ofstream out;
- string s = toString(getpid()) + ".temp";
- openOutputFile(s, out);
-
- //output pairs
- for (int i = lines[process]->start; i < lines[process]->end; i++) {
- out << closest[i].size() << endl;
- for (int j = 0; j < closest[i].size(); j++) {
- closest[i][j].seq->printSequence(out);
- }
- out << ">" << endl; //to stop sequence read
- }
-
- for (int i = lines[process]->start; i < lines[process]->end; i++) {
- for (int j = 0; j < closest[i].size(); j++) {
- out << closest[i][j].dist << '\t';
- }
- out << endl;
- }
-
- out.close();
-
- exit(0);
- }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
- }
-
- //force parent to wait until all the processes are done
- for (int i=0;i<processors;i++) {
- int temp = processIDS[i];
- wait(&temp);
- }
-
- //get data created by processes
- for (int i=0;i<processors;i++) {
- ifstream in;
- string s = toString(processIDS[i]) + ".temp";
- openInputFile(s, in);
-
- vector< vector<Sequence*> > tempClosest; tempClosest.resize(querySeqs.size());
- vector<int> sizes;
- //get pairs
- for (int k = lines[i]->start; k < lines[i]->end; k++) {
-
- int num;
- in >> num;
- sizes.push_back(num);
- gobble(in);
-
- vector<Sequence*> tempVector;
-
- for (int j = 0; j < num; j++) {
-
- Sequence* temp = new Sequence(in);
- gobble(in);
-
- tempVector.push_back(temp);
- }
- string junk;
- in >> junk; gobble(in); // to get ">"
-
- tempClosest[k] = tempVector;
- }
-
- vector< vector<float> > dists; dists.resize(querySeqs.size());
- int count = 0;
- for (int k = lines[i]->start; k < lines[i]->end; k++) {
- dists[k].resize(sizes[count]);
- for (int j = 0; j < sizes[count]; j++) {
- in >> dists[k][j];
- }
- gobble(in);
- count++;
- }
-
- count = 0;
- for (int k = lines[i]->start; k < lines[i]->end; k++) {
- for (int j = 0; j < sizes[count]; j++) {
- closest[k][j].seq = tempClosest[k][j];
- closest[k][j].dist = dists[k][j];
- }
- count++;
- }
-
- in.close();
- remove(s.c_str());
- }
-#else
- for (int i = 0; i < closest.size(); i++) {
- removeBadReferenceSeqs(closest[i], i);
- }
-#endif
-
- }
- catch(exception& e) {
- errorOut(e, "Ccode", "createProcessesRemoveBad");
- exit(1);
- }
-}
-//***************************************************************************************************************
-void Ccode::createProcessesAverages() {
- try {
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- int process = 0;
- vector<int> processIDS;
-
- //loop through and create all the processes you want
- while (process != processors) {
- int pid = fork();
-
- if (pid > 0) {
- processIDS.push_back(pid);
- process++;
- }else if (pid == 0){
-
- //find the averages for each querys references
- for (int i = lines[process]->start; i < lines[process]->end; i++) {
- getAverageRef(closest[i], i); //fills sumRef[i], averageRef[i], sumSquaredRef[i] and refCombo[i].
- }
-
- //find the averages for the query
- for (int i = lines[process]->start; i < lines[process]->end; i++) {
- getAverageQuery(closest[i], i); //fills sumQuery[i], averageQuery[i], sumSquaredQuery[i].
- }
-
-
- //write out data to file so parent can read it
- ofstream out;
- string s = toString(getpid()) + ".temp";
- openOutputFile(s, out);
-
- //output pairs
- for (int i = lines[process]->start; i < lines[process]->end; i++) {
- for (int j = 0; j < windows[i].size(); j++) {
- out << sumRef[i][j] << '\t';
- }
- out << endl;
- for (int j = 0; j < windows[i].size(); j++) {
- out << averageRef[i][j] << '\t';
- }
- out << endl;
- for (int j = 0; j < windows[i].size(); j++) {
- out << sumSquaredRef[i][j] << '\t';
- }
- out << endl;
- for (int j = 0; j < windows[i].size(); j++) {
- out << sumQuery[i][j] << '\t';
- }
- out << endl;
- for (int j = 0; j < windows[i].size(); j++) {
- out << averageQuery[i][j] << '\t';
- }
- out << endl;
- for (int j = 0; j < windows[i].size(); j++) {
- out << sumSquaredQuery[i][j] << '\t';
- }
- out << endl;
- out << refCombo[i] << endl;
- }
-
- out.close();
-
- exit(0);
- }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
- }
-
- //force parent to wait until all the processes are done
- for (int i=0;i<processors;i++) {
- int temp = processIDS[i];
- wait(&temp);
- }
-
- //get data created by processes
- for (int i=0;i<processors;i++) {
- ifstream in;
- string s = toString(processIDS[i]) + ".temp";
- openInputFile(s, in);
-
- //get pairs
- for (int k = lines[i]->start; k < lines[i]->end; k++) {
- sumRef[k].resize(windows[k].size());
- averageRef[k].resize(windows[k].size());
- sumSquaredRef[k].resize(windows[k].size());
- averageQuery[k].resize(windows[k].size());
- sumQuery[k].resize(windows[k].size());
- sumSquaredQuery[k].resize(windows[k].size());
-
- for (int j = 0; j < windows[k].size(); j++) {
- in >> sumRef[k][j];
- }
- gobble(in);
- for (int j = 0; j < windows[k].size(); j++) {
- in >> averageRef[k][j];
- }
- gobble(in);
- for (int j = 0; j < windows[k].size(); j++) {
- in >> sumSquaredRef[k][j];
- }
- gobble(in);
- for (int j = 0; j < windows[k].size(); j++) {
- in >> sumQuery[k][j];
- }
- gobble(in);
- for (int j = 0; j < windows[k].size(); j++) {
- in >> averageQuery[k][j];
- }
- gobble(in);
- for (int j = 0; j < windows[k].size(); j++) {
- in >> sumSquaredQuery[k][j];
- }
- gobble(in);
- in >> refCombo[k];
- gobble(in);
- }
-
- in.close();
- remove(s.c_str());
- }
-#else
- //find the averages for each querys references
- for (int i = 0; i < querySeqs.size(); i++) {
- getAverageRef(closest[i], i); //fills sumRef[i], averageRef[i], sumSquaredRef[i] and refCombo[i].
- }
-
- //find the averages for the query
- for (int i = 0; i < querySeqs.size(); i++) {
- getAverageQuery(closest[i], i); //fills sumQuery[i], averageQuery[i], sumSquaredQuery[i].
- }
-
-#endif
-
- }
- catch(exception& e) {
- errorOut(e, "Ccode", "createProcessesAverages");
- exit(1);
- }
-}
-//***************************************************************************************************************
-void Ccode::createProcessesVariances() {
- try {
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- int process = 0;
- vector<int> processIDS;
-
- //loop through and create all the processes you want
- while (process != processors) {
- int pid = fork();
-
- if (pid > 0) {
- processIDS.push_back(pid);
- process++;
- }else if (pid == 0){
-
- //find the averages for each querys references
- for (int i = lines[process]->start; i < lines[process]->end; i++) {
- findVarianceRef(i); //fills varRef[i] and sdRef[i] also sets minimum error rate to 0.001 to avoid divide by 0.
- }
-
- //find the averages for the query
- for (int i = lines[process]->start; i < lines[process]->end; i++) {
- findVarianceQuery(i); //fills varQuery[i] and sdQuery[i] also sets minimum error rate to 0.001 to avoid divide by 0.
- }
-
-
- //write out data to file so parent can read it
- ofstream out;
- string s = toString(getpid()) + ".temp";
- openOutputFile(s, out);
-
- //output pairs
- for (int i = lines[process]->start; i < lines[process]->end; i++) {
- for (int j = 0; j < windows[i].size(); j++) {
- out << varRef[i][j] << '\t';
- }
- out << endl;
- for (int j = 0; j < windows[i].size(); j++) {
- out << sdRef[i][j] << '\t';
- }
- out << endl;
- for (int j = 0; j < windows[i].size(); j++) {
- out << varQuery[i][j] << '\t';
- }
- out << endl;
- for (int j = 0; j < windows[i].size(); j++) {
- out << sdQuery[i][j] << '\t';
- }
- out << endl;
- }
-
- out.close();
-
- exit(0);
- }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
- }
-
- //force parent to wait until all the processes are done
- for (int i=0;i<processors;i++) {
- int temp = processIDS[i];
- wait(&temp);
- }
-
- //get data created by processes
- for (int i=0;i<processors;i++) {
- ifstream in;
- string s = toString(processIDS[i]) + ".temp";
- openInputFile(s, in);
-
- //get pairs
- for (int k = lines[i]->start; k < lines[i]->end; k++) {
- varRef[k].resize(windows[k].size());
- sdRef[k].resize(windows[k].size());
- varQuery[k].resize(windows[k].size());
- sdQuery[k].resize(windows[k].size());
-
- for (int j = 0; j < windows[k].size(); j++) {
- in >> varRef[k][j];
- }
- gobble(in);
- for (int j = 0; j < windows[k].size(); j++) {
- in >> sdRef[k][j];
- }
- gobble(in);
- for (int j = 0; j < windows[k].size(); j++) {
- in >> varQuery[k][j];
- }
- gobble(in);
- for (int j = 0; j < windows[k].size(); j++) {
- in >> sdQuery[k][j];
- }
- gobble(in);
- }
-
- in.close();
- remove(s.c_str());
- }
-#else
- //find the averages for each querys references
- for (int i = 0; i < querySeqs.size(); i++) {
- findVarianceRef(i); //fills varRef[i] and sdRef[i] also sets minimum error rate to 0.001 to avoid divide by 0.
- }
-
- //find the averages for the query
- for (int i = 0; i < querySeqs.size(); i++) {
- findVarianceQuery(i); //fills varQuery[i] and sdQuery[i] also sets minimum error rate to 0.001 to avoid divide by 0.
- }
-#endif
- }
- catch(exception& e) {
- errorOut(e, "Ccode", "createProcessesVariances");
- exit(1);
- }
-}
-//***************************************************************************************************************
-void Ccode::createProcessesDetermine() {
- try {
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- int process = 0;
- vector<int> processIDS;
-
- //loop through and create all the processes you want
- while (process != processors) {
- int pid = fork();
-
- if (pid > 0) {
- processIDS.push_back(pid);
- process++;
- }else if (pid == 0){
-
- //find the averages for each querys references
- for (int i = lines[process]->start; i < lines[process]->end; i++) {
- determineChimeras(i); //fills anova, isChimericConfidence[i], isChimericTStudent[i] and isChimericANOVA[i].
- }
-
- //write out data to file so parent can read it
- ofstream out;
- string s = toString(getpid()) + ".temp";
- openOutputFile(s, out);
-
- //output pairs
- for (int i = lines[process]->start; i < lines[process]->end; i++) {
- for (int j = 0; j < windows[i].size(); j++) {
- out << anova[i][j] << '\t';
- }
- out << endl;
- for (int j = 0; j < windows[i].size(); j++) {
- out << isChimericConfidence[i][j] << '\t';
- }
- out << endl;
- for (int j = 0; j < windows[i].size(); j++) {
- out << isChimericTStudent[i][j] << '\t';
- }
- out << endl;
- for (int j = 0; j < windows[i].size(); j++) {
- out << isChimericANOVA[i][j] << '\t';
- }
- out << endl;
- }
-
- out.close();
-
- exit(0);
- }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
- }
-
- //force parent to wait until all the processes are done
- for (int i=0;i<processors;i++) {
- int temp = processIDS[i];
- wait(&temp);
- }
-
- //get data created by processes
- for (int i=0;i<processors;i++) {
- ifstream in;
- string s = toString(processIDS[i]) + ".temp";
- openInputFile(s, in);
-
- //get pairs
- for (int k = lines[i]->start; k < lines[i]->end; k++) {
- anova[k].resize(windows[k].size());
- isChimericConfidence[k].resize(windows[k].size(), false);
- isChimericTStudent[k].resize(windows[k].size(), false);
- isChimericANOVA[k].resize(windows[k].size(), false);
- int tempBool;
-
- for (int j = 0; j < windows[k].size(); j++) {
- in >> anova[k][j];
- }
- gobble(in);
- for (int j = 0; j < windows[k].size(); j++) {
- in >> tempBool;
- if (tempBool == 1) { isChimericConfidence[k][j] = true; }
- }
- gobble(in);
- for (int j = 0; j < windows[k].size(); j++) {
- in >> tempBool;
- if (tempBool == 1) { isChimericTStudent[k][j] = true; }
- }
- gobble(in);
- for (int j = 0; j < windows[k].size(); j++) {
- in >> tempBool;
- if (tempBool == 1) { isChimericANOVA[k][j] = true; }
- }
- gobble(in);
- }
-
- in.close();
- remove(s.c_str());
- }
- #else
- for (int i = 0; i < querySeqs.size(); i++) {
- determineChimeras(i); //fills anova, isChimericConfidence[i], isChimericTStudent[i] and isChimericANOVA[i].
- }
- #endif
-
- }
- catch(exception& e) {
- errorOut(e, "Ccode", "createProcessesDetermine");
+ m->errorOut(e, "Ccode", "getF");
exit(1);
}
}