+ if (!m->isBlank(accnosFileName)) {
+ //edit accnos file
+ ifstream in2;
+ m->openInputFile(accnosFileName, in2);
+
+ while (!in2.eof()) {
+ if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
+
+ in2 >> name; m->gobble(in2);
+
+ //find unique name
+ itUnique = uniqueNames.find(name);
+
+ if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+ else {
+ itChimeras = chimerasInFile.find((itUnique->second));
+
+ if (itChimeras == chimerasInFile.end()) {
+ out2 << itUnique->second << endl;
+ chimerasInFile.insert((itUnique->second));
+ total++;
+ }
+ }
+ }
+ in2.close();
+ }
+ out2.close();
+
+ m->mothurRemove(accnosFileName);
+ rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
+
+
+
+ //edit chimera file
+ ifstream in;
+ m->openInputFile(outputFileName, in);
+
+ ofstream out;
+ m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
+ out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
+
+ float temp1;
+ string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
+ name = "";
+ namesInFile.clear();
+ //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
+ /* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+ 0.000000 F11Fcsw_33372/ab=18/ * * * * * * * * * * * * * * N
+ 0.018300 F11Fcsw_14980/ab=16/ F11Fcsw_1915/ab=35/ F11Fcsw_6032/ab=42/ 79.9 78.7 78.2 78.7 79.2 3 0 5 11 10 20 1.46 N
+ */
+
+ while (!in.eof()) {
+
+ if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
+
+ bool print = false;
+ in >> temp1; m->gobble(in);
+ in >> name; m->gobble(in);
+ in >> parent1; m->gobble(in);
+ in >> parent2; m->gobble(in);
+ in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
+ m->gobble(in);
+
+ //parse name - name will look like U68590/ab=1/
+ string restOfName = "";
+ int pos = name.find_first_of('/');
+ if (pos != string::npos) {
+ restOfName = name.substr(pos);
+ name = name.substr(0, pos);
+ }
+
+ //find unique name
+ itUnique = uniqueNames.find(name);
+
+ if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+ else {
+ name = itUnique->second;
+ //is this name already in the file
+ itNames = namesInFile.find((name));
+
+ if (itNames == namesInFile.end()) { //no not in file
+ if (flag == "N") { //are you really a no??
+ //is this sequence really not chimeric??
+ itChimeras = chimerasInFile.find(name);
+
+ //then you really are a no so print, otherwise skip
+ if (itChimeras == chimerasInFile.end()) { print = true; }
+ }else{ print = true; }
+ }
+ }
+
+ if (print) {
+ out << temp1 << '\t' << name << restOfName << '\t';
+ namesInFile.insert(name);
+
+ //parse parent1 names
+ if (parent1 != "*") {
+ restOfName = "";
+ pos = parent1.find_first_of('/');
+ if (pos != string::npos) {
+ restOfName = parent1.substr(pos);
+ parent1 = parent1.substr(0, pos);
+ }
+
+ itUnique = uniqueNames.find(parent1);
+ if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+ else { out << itUnique->second << restOfName << '\t'; }
+ }else { out << parent1 << '\t'; }
+
+ //parse parent2 names
+ if (parent2 != "*") {
+ restOfName = "";
+ pos = parent2.find_first_of('/');
+ if (pos != string::npos) {
+ restOfName = parent2.substr(pos);
+ parent2 = parent2.substr(0, pos);
+ }
+
+ itUnique = uniqueNames.find(parent2);
+ if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+ else { out << itUnique->second << restOfName << '\t'; }
+ }else { out << parent2 << '\t'; }
+
+ out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;
+ }
+ }
+ in.close();
+ out.close();
+
+ m->mothurRemove(outputFileName);
+ rename((outputFileName+".temp").c_str(), outputFileName.c_str());
+
+
+ //edit anls file
+ //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
+ /*
+ ------------------------------------------------------------------------
+ Query ( 179 nt) F21Fcsw_11639/ab=591/
+ ParentA ( 179 nt) F11Fcsw_6529/ab=1625/
+ ParentB ( 181 nt) F21Fcsw_12128/ab=1827/
+
+ A 1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
+ Q 1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
+ B 1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
+ Diffs N N A N?N N N NNN N?NB N ?NaNNN B B NN NNNN
+ Votes 0 0 + 000 0 0 000 000+ 0 00!000 + 00 0000
+ Model AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
+
+ A 79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
+ Q 79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
+ B 81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
+ Diffs NNN N N N N N BB NNN
+ Votes 000 0 0 0 0 0 ++ 000
+ Model BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
+
+ A 159 TGGAACTGAGACACGGTCCAA 179
+ Q 159 TGGAACTGAGACACGGTCCAA 179
+ B 161 TGGAACTGAGACACGGTCCAA 181
+ Diffs
+ Votes
+ Model BBBBBBBBBBBBBBBBBBBBB
+
+ Ids. QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
+ Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
+ */
+ if (chimealns) {
+ ifstream in3;
+ m->openInputFile(alnsFileName, in3);
+
+ ofstream out3;
+ m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
+
+ name = "";
+ namesInFile.clear();
+ string line = "";
+
+ while (!in3.eof()) {
+ if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
+
+ line = "";
+ line = m->getline(in3);
+ string temp = "";
+
+ if (line != "") {
+ istringstream iss(line);
+ iss >> temp;
+
+ //are you a name line
+ if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
+ int spot = 0;
+ for (int i = 0; i < line.length(); i++) {
+ spot = i;
+ if (line[i] == ')') { break; }
+ else { out3 << line[i]; }
+ }
+
+ if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+ else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+ else {
+ out << line[spot] << line[spot+1];
+
+ name = line.substr(spot+2);
+
+ //parse name - name will either look like U68590/ab=1/ or U68590
+ string restOfName = "";
+ int pos = name.find_first_of('/');
+ if (pos != string::npos) {
+ restOfName = name.substr(pos);
+ name = name.substr(0, pos);
+ }
+
+ //find unique name
+ itUnique = uniqueNames.find(name);
+
+ if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true; }
+ else {
+ //only limit repeats on query names
+ if (temp == "Query") {
+ itNames = namesInFile.find((itUnique->second));
+
+ if (itNames == namesInFile.end()) {
+ out << itUnique->second << restOfName << endl;
+ namesInFile.insert((itUnique->second));
+ }
+ }else { out << itUnique->second << restOfName << endl; }
+ }
+
+ }
+
+ }else { //not need to alter line
+ out3 << line << endl;
+ }
+ }else { out3 << endl; }
+ }
+ in3.close();
+ out3.close();
+
+ m->mothurRemove(alnsFileName);
+ rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
+ }
+
+ return total;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
+ try {
+
+ sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
+
+ ofstream out;
+ m->openOutputFile(filename, out);
+
+ //print new file in order of
+ for (int i = 0; i < nameMapCount.size(); i++) {
+ out << ">" << nameMapCount[i].name << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
+ }
+ out.close();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraUchimeCommand", "printFile");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
+ try {
+ //create input file for uchime
+ //read through fastafile and store info
+ ifstream in;
+ m->openInputFile(filename, in);
+
+ while (!in.eof()) {
+
+ if (m->control_pressed) { in.close(); return 0; }
+
+ Sequence seq(in); m->gobble(in);
+ seqs[seq.getName()] = seq.getAligned();
+ }
+ in.close();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+string ChimeraUchimeCommand::getNamesFile(string& inputFile){
+ try {
+ string nameFile = "";
+
+ m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
+
+ //use unique.seqs to create new name and fastafile
+ string inputString = "fasta=" + inputFile;
+ m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+ m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
+ m->mothurCalling = true;
+
+ Command* uniqueCommand = new DeconvoluteCommand(inputString);
+ uniqueCommand->execute();
+
+ map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
+
+ delete uniqueCommand;
+ m->mothurCalling = false;
+ m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+
+ nameFile = filenames["name"][0];
+ inputFile = filenames["fasta"][0];
+
+ return nameFile;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, string countlist, int start, int end, vector<string> groups){
+ try {
+
+ int totalSeqs = 0;
+ int numChimeras = 0;
+
+
+ ofstream outCountList;
+ if (hasCount && dups) { m->openOutputFile(countlist, outCountList); }
+
+ for (int i = start; i < end; i++) {
+ int start = time(NULL); if (m->control_pressed) { outCountList.close(); m->mothurRemove(countlist); return 0; }
+
+ int error;
+ if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
+ else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
+
+ int numSeqs = driver((outputFName + groups[i]), filename, (accnos+groups[i]), (alns+ groups[i]), numChimeras);
+ totalSeqs += numSeqs;
+
+ if (m->control_pressed) { return 0; }
+
+ //remove file made for uchime
+ if (!m->debug) { m->mothurRemove(filename); }
+ else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
+
+ //if we provided a count file with group info and set dereplicate=t, then we want to create a *.pick.count_table
+ //This table will zero out group counts for seqs determined to be chimeric by that group.
+ if (dups) {
+ if (!m->isBlank(accnos+groups[i])) {
+ ifstream in;
+ m->openInputFile(accnos+groups[i], in);
+ string name;
+ if (hasCount) {
+ while (!in.eof()) {
+ in >> name; m->gobble(in);
+ outCountList << name << '\t' << groups[i] << endl;
+ }
+ in.close();
+ }else {
+ map<string, string> thisnamemap = sparser->getNameMap(groups[i]);
+ map<string, string>::iterator itN;
+ ofstream out;
+ m->openOutputFile(accnos+groups[i]+".temp", out);
+ while (!in.eof()) {
+ in >> name; m->gobble(in);
+ itN = thisnamemap.find(name);
+ if (itN != thisnamemap.end()) {
+ vector<string> tempNames; m->splitAtComma(itN->second, tempNames);
+ for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
+
+ }else { m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); m->control_pressed = true; }
+ }
+ out.close();
+ in.close();
+ m->renameFile(accnos+groups[i]+".temp", accnos+groups[i]);
+ }
+
+ }
+ }
+
+ //append files
+ m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
+ m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
+ if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
+
+ m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + "."); m->mothurOutEndLine();
+ }
+
+ if (hasCount && dups) { outCountList.close(); }
+
+ return totalSeqs;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){