//***************************************************************************************************************
-Ccode::Ccode(string filename, string o) {
- fastafile = filename; outputDir = o;
+Ccode::Ccode(string filename, string temp, bool f, string mask, int win, int numW, string o) : Chimera() {
+ fastafile = filename;
+ outputDir = o;
+ templateFileName = temp; templateSeqs = readSeqs(temp);
+ setMask(mask);
+ filter = f;
+ window = win;
+ numWanted = numW;
+
distCalc = new eachGapDist();
decalc = new DeCalculator();
mapInfo = outputDir + getRootName(getSimpleName(fastafile)) + "mapinfo";
- ofstream out2;
- openOutputFile(mapInfo, out2);
+
+ #ifdef USE_MPI
+
+ //char* inFileName = new char[mapInfo.length()];
+ //memcpy(inFileName, mapInfo.c_str(), mapInfo.length());
+
+ char inFileName[1024];
+ strcpy(inFileName, mapInfo.c_str());
+
+ int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
+
+ MPI_File_open(MPI_COMM_WORLD, inFileName, outMode, MPI_INFO_NULL, &outMap); //comm, filename, mode, info, filepointer
+
+ //delete inFileName;
+
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+
+ if (pid == 0) {
+ string outString = "Place in masked, filtered and trimmed sequence\tPlace in original alignment\n";
+
+ MPI_Status status;
+ int length = outString.length();
+ char* buf2 = new char[length];
+ memcpy(buf2, outString.c_str(), length);
+
+ MPI_File_write_shared(outMap, buf2, length, MPI_CHAR, &status);
+ delete buf2;
+ }
+ #else
+
+ ofstream out2;
+ openOutputFile(mapInfo, out2);
- out2 << "Place in masked, filtered and trimmed sequence\tPlace in original alignment" << endl;
- out2.close();
+ out2 << "Place in masked, filtered and trimmed sequence\tPlace in original alignment" << endl;
+ out2.close();
+ #endif
}
//***************************************************************************************************************
Ccode::~Ccode() {
delete distCalc;
delete decalc;
+
+ #ifdef USE_MPI
+ MPI_File_close(&outMap);
+ #endif
}
//***************************************************************************************************************
-void Ccode::printHeader(ostream& out) {
- out << "For full window mapping info refer to " << mapInfo << endl << endl;
-}
-//***************************************************************************************************************
-void Ccode::print(ostream& out, ostream& outAcc) {
+int Ccode::print(ostream& out, ostream& outAcc) {
try {
ofstream out2;
//free memory
for (int i = 0; i < closest.size(); i++) { delete closest[i].seq; }
+ return results;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Ccode", "print");
+ exit(1);
+ }
+}
+#ifdef USE_MPI
+//***************************************************************************************************************
+int Ccode::print(MPI_File& out, MPI_File& outAcc) {
+ try {
+
+ string outMapString = "";
+
+ outMapString += querySeq->getName() + "\n";
+ for (it = spotMap.begin(); it!= spotMap.end(); it++) {
+ outMapString += toString(it->first) + "\t" + toString(it->second) + "\n";
+ }
+ printMapping(outMapString);
+ outMapString = "";
+
+ string outString = "";
+ string outAccString = "";
+
+ outString += querySeq->getName() + "\n\nReference sequences used and distance to query:\n";
+
+ for (int j = 0; j < closest.size(); j++) {
+ outString += closest[j].seq->getName() + "\t" + toString(closest[j].dist) + "\n";
+ }
+ outString += "\n\nMapping information: ";
+
+ //for each window
+ //window mapping info.
+ //you mask and did not filter
+ if ((seqMask != "") && (!filter)) { outString += "mask and trim."; }
+
+ //you filtered and did not mask
+ if ((seqMask == "") && (filter)) { outString += "filter and trim."; }
+
+ //you masked and filtered
+ if ((seqMask != "") && (filter)) { outString += "mask, filter and trim."; }
+
+ outString += "\nWindow\tStartPos\tEndPos\n";
+ it = trim.begin();
+ for (int k = 0; k < windows.size()-1; k++) {
+ outString += toString(k+1) + "\t" + toString(spotMap[windows[k]-it->first]) + "\t" + toString(spotMap[windows[k]-it->first+windowSizes]) + "\n";
+ }
+
+ outString += toString(windows.size()) + "\t" + toString(spotMap[windows[windows.size()-1]-it->first]) + "\t" + toString(spotMap[it->second-it->first-1]) + "\n\n";
+
+ outString += "Window\tAvgQ\t(sdQ)\tAvgR\t(sdR)\tRatio\tAnova\n";
+ for (int k = 0; k < windows.size(); k++) {
+ float ds = averageQuery[k] / averageRef[k];
+ outString += toString(k+1) + "\t" + toString(averageQuery[k]) + "\t" + toString(sdQuery[k]) + "\t" + toString(averageRef[k]) + "\t" + toString(sdRef[k]) + "\t" + toString(ds) + "\t" + toString(anova[k]) + "\n";
+ }
+
+ //varRef
+ //varQuery
+ /* F test for differences among variances.
+ * varQuery is expected to be higher or similar than varRef */
+ //float fs = varQuery[query] / varRef[query]; /* F-Snedecor, test for differences of variances */
+
+ bool results = false;
+
+ //confidence limit, t - Student, anova
+ outString += "\nWindow\tConfidenceLimit\tt-Student\tAnova\n";
+
+ for (int k = 0; k < windows.size(); k++) {
+ string temp = "";
+ if (isChimericConfidence[k]) { temp += "*\t"; }
+ else { temp += "\t"; }
+
+ if (isChimericTStudent[k]) { temp += "*\t"; }
+ else { temp += "\t"; }
+
+ if (isChimericANOVA[k]) { temp += "*\t"; }
+ else { temp += "\t"; }
+
+ outString += toString(k+1) + "\t" + temp + "\n";
+
+ if (temp == "*\t*\t*\t") { results = true; }
+ }
+ outString += "\n";
+ MPI_Status status;
+ int length = outString.length();
+ char* buf2 = new char[length];
+ memcpy(buf2, outString.c_str(), length);
+
+ MPI_File_write_shared(out, buf2, length, MPI_CHAR, &status);
+ delete buf2;
+
+ if (results) {
+ m->mothurOut(querySeq->getName() + " was found have at least one chimeric window."); m->mothurOutEndLine();
+ outAccString += querySeq->getName() + "\n";
+
+ MPI_Status statusAcc;
+ length = outAccString.length();
+ char* buf = new char[length];
+ memcpy(buf, outAccString.c_str(), length);
+
+ MPI_File_write_shared(outAcc, buf, length, MPI_CHAR, &statusAcc);
+ delete buf;
+ }
+
+ //free memory
+ for (int i = 0; i < closest.size(); i++) { delete closest[i].seq; }
+
+ return results;
}
catch(exception& e) {
m->errorOut(e, "Ccode", "print");
}
}
//***************************************************************************************************************
+int Ccode::printMapping(string& output) {
+ try {
+ MPI_Status status;
+ int length = output.length();
+ char* buf = new char[length];
+ memcpy(buf, output.c_str(), length);
+
+ MPI_File_write_shared(outMap, buf, length, MPI_CHAR, &status);
+ delete buf;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Ccode", "printMapping");
+ exit(1);
+ }
+}
+#endif
+//***************************************************************************************************************
int Ccode::getChimeras(Sequence* query) {
try {
//find closest matches to query
closest = findClosest(query, numWanted);
+ if (m->control_pressed) { return 0; }
+
//initialize spotMap
for (int i = 0; i < query->getAligned().length(); i++) { spotMap[i] = i; }
createFilter(temp, 0.5);
- for (int i = 0; i < temp.size(); i++) { runFilter(temp[i]); }
+ for (int i = 0; i < temp.size(); i++) {
+ if (m->control_pressed) { return 0; }
+ runFilter(temp[i]);
+ }
//update spotMap
map<int, int> newMap;
//trim sequences - this follows ccodes remove_extra_gaps
trimSequences(query);
-
+ if (m->control_pressed) { return 0; }
//windows are equivalent to words - ccode paper recommends windows are between 5% and 20% on alignment length().
//Our default will be 10% and we will warn if user tries to use a window above or below these recommendations
windows = findWindows();
-
+ if (m->control_pressed) { return 0; }
//remove sequences that are more than 20% different and less than 0.5% different - may want to allow user to specify this later
removeBadReferenceSeqs(closest);
-
+ if (m->control_pressed) { return 0; }
//find the averages for each querys references
getAverageRef(closest); //fills sumRef, averageRef, sumSquaredRef and refCombo.
getAverageQuery(closest, query); //fills sumQuery, averageQuery, sumSquaredQuery.
-
+ if (m->control_pressed) { return 0; }
//find the averages for each querys references
findVarianceRef(); //fills varRef and sdRef also sets minimum error rate to 0.001 to avoid divide by 0.
-
+ if (m->control_pressed) { return 0; }
//find the averages for the query
findVarianceQuery(); //fills varQuery and sdQuery also sets minimum error rate to 0.001 to avoid divide by 0.
+ if (m->control_pressed) { return 0; }
determineChimeras(); //fills anova, isChimericConfidence, isChimericTStudent and isChimericANOVA.
+ if (m->control_pressed) { return 0; }
return 0;
}