vector<string> setParameters();
string getCommandName() { return "chimera.uchime"; }
string getCommandCategory() { return "Sequence Processing"; }
- string getOutputFileNameTag(string, string);
+
string getHelpString();
- string getCitation() { return "uchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\nhttp://www.mothur.org/wiki/Chimera.uchime\nEdgar,R.C., Haas,B.J., Clemente,J.C., Quince,C. and Knight,R. (2011), UCHIME improves sensitivity and speed of chimera detection, Bioinformatics, in press.\n"; }
+ string getOutputPattern(string);
+ string getCitation() { return "uchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code was donated to the public domain.\nEdgar,R.C., Haas,B.J., Clemente,J.C., Quince,C. and Knight,R. (2011), UCHIME improves sensitivity and speed of chimera detection. Bioinformatics 27:2194.\nhttp://www.mothur.org/wiki/Chimera.uchime\n"; }
string getDescription() { return "detect chimeric sequences"; }
int execute();
int driver(string, string, string, string, int&);
int createProcesses(string, string, string, string, int&);
- bool abort, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount, hasName;
- string fastafile, groupfile, templatefile, outputDir, namefile, countfile, abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, uchimeLocation;
+ bool abort, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount, hasName, dups;
+ string fastafile, groupfile, templatefile, outputDir, namefile, countfile, abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, uchimeLocation, strand;
int processors;
SequenceParser* sparser;
int readFasta(string, map<string, string>&);
int printFile(vector<seqPriorityNode>&, string);
int deconvoluteResults(map<string, string>&, string, string, string);
- int driverGroups(string, string, string, string, int, int, vector<string>);
- int createProcessesGroups(string, string, string, string, vector<string>, string, string, string);
+ int driverGroups(string, string, string, string, string, int, int, vector<string>);
+ int createProcessesGroups(string, string, string, string, string, vector<string>, string, string, string);
int prepFile(string filename, string);
string namefile;
string groupfile;
string outputFName;
- string accnos, alns, filename, templatefile, uchimeLocation;
+ string accnos, alns, filename, templatefile, uchimeLocation, countlist;
MothurOut* m;
int start;
int end;
int threadID, count, numChimeras;
vector<string> groups;
- bool useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount;
- string abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract;
+ bool dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount;
+ string abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand;
uchimeData(){}
- uchimeData(string o, string uloc, string t, string file, string f, string n, string g, string ac, string al, vector<string> gr, MothurOut* mout, int st, int en, int tid) {
+ uchimeData(string o, string uloc, string t, string file, string f, string n, string g, string ac, string al, string nc, vector<string> gr, MothurOut* mout, int st, int en, int tid) {
fastafile = f;
namefile = n;
groupfile = g;
count = 0;
numChimeras = 0;
uchimeLocation = uloc;
+ countlist = nc;
}
- void setBooleans(bool Abskew, bool calns, bool MinH, bool Mindiv, bool Xn, bool Dn, bool Xa, bool Chunks, bool Minchunk, bool Idsmoothwindow, bool Minsmoothid, bool Maxp, bool skipgap, bool skipgap2, bool Minlen, bool Maxlen, bool uc, bool Queryfract, bool hc) {
+ void setBooleans(bool dps, bool Abskew, bool calns, bool MinH, bool Mindiv, bool Xn, bool Dn, bool Xa, bool Chunks, bool Minchunk, bool Idsmoothwindow, bool Minsmoothid, bool Maxp, bool skipgap, bool skipgap2, bool Minlen, bool Maxlen, bool uc, bool Queryfract, bool hc) {
useAbskew = Abskew;
chimealns = calns;
useMinH = MinH;
ucl = uc;
useQueryfract = Queryfract;
hasCount = hc;
+ dups = dps;
}
- void setVariables(string abske, string min, string mindi, string x, string d, string xa2, string chunk, string minchun, string idsmoothwindo, string minsmoothi, string max, string minle, string maxle, string queryfrac) {
+ void setVariables(string abske, string min, string mindi, string x, string d, string xa2, string chunk, string minchun, string idsmoothwindo, string minsmoothi, string max, string minle, string maxle, string queryfrac, string stra) {
abskew = abske;
minh = min;
mindiv = mindi;
+ strand = stra;
xn = x;
dn = d;
xa = xa2;
SequenceCountParser* cparser;
if (pDataArray->hasCount) {
CountTable* ct = new CountTable();
- ct->readTable(pDataArray->namefile);
+ ct->readTable(pDataArray->namefile, true);
cparser = new SequenceCountParser(pDataArray->fastafile, *ct);
delete ct;
}else {
int totalSeqs = 0;
int numChimeras = 0;
+
+ ofstream outCountList;
+ if (pDataArray->hasCount && pDataArray->dups) { pDataArray->m->openOutputFile(pDataArray->countlist, outCountList); }
+
for (int i = pDataArray->start; i < pDataArray->end; i++) {
int start = time(NULL); if (pDataArray->m->control_pressed) { if (pDataArray->hasCount) { delete cparser; } { delete parser; } return 0; }
if (pDataArray->hasCount) {
error = cparser->getSeqs(pDataArray->groups[i], pDataArray->filename, true); if ((error == 1) || pDataArray->m->control_pressed) { delete cparser; return 0; }
}else {
- error = cparser->getSeqs(pDataArray->groups[i], pDataArray->filename, true); if ((error == 1) || pDataArray->m->control_pressed) { delete parser; return 0; }
+ error = parser->getSeqs(pDataArray->groups[i], pDataArray->filename, true); if ((error == 1) || pDataArray->m->control_pressed) { delete parser; return 0; }
}
//int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
cPara.push_back(tempa);
}
+ if (pDataArray->strand != "") {
+ char* tempA = new char[9];
+ *tempA = '\0'; strncat(tempA, "--strand", 8);
+ cPara.push_back(tempA);
+ char* tempa = new char[pDataArray->strand.length()+1];
+ *tempa = '\0'; strncat(tempa, pDataArray->strand.c_str(), pDataArray->strand.length());
+ cPara.push_back(tempa);
+ }
+
if (pDataArray->useAbskew) {
char* tempskew = new char[9];
*tempskew = '\0'; strncat(tempskew, "--abskew", 8);
ofstream out;
pDataArray->m->openOutputFile(accnos, out);
+
int num = 0;
numChimeras = 0;
+ map<string, string> thisnamemap;
+ map<string, string>::iterator itN;
+ if (pDataArray->dups && !pDataArray->hasCount) { thisnamemap = parser->getNameMap(pDataArray->groups[i]); }
+
while(!in.eof()) {
if (pDataArray->m->control_pressed) { break; }
for (int j = 0; j < 15; j++) { in >> chimeraFlag; }
pDataArray->m->gobble(in);
- if (chimeraFlag == "Y") { out << name << endl; numChimeras++; }
+ if (chimeraFlag == "Y") {
+ if (pDataArray->dups) {
+ if (!pDataArray->hasCount) { //output redundant names for each group
+ itN = thisnamemap.find(name);
+ if (itN != thisnamemap.end()) {
+ vector<string> tempNames; pDataArray->m->splitAtComma(itN->second, tempNames);
+ for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
+ }else { pDataArray->m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); pDataArray->m->control_pressed = true; }
+
+ }else {
+ out << name << endl;
+ outCountList << name << '\t' << pDataArray->groups[i] << endl;
+ }
+ }else{ out << name << endl; }
+ numChimeras++;
+ }
num++;
}
in.close();
}
+ if (pDataArray->hasCount && pDataArray->dups) { outCountList.close(); }
pDataArray->count = totalSeqs;
if (pDataArray->hasCount) { delete cparser; } { delete parser; }
return totalSeqs;
//prepFile(filename, outputFileName);
/******************************************/
ifstream in23;
- m->openInputFile((filename.substr(1, filename.length()-2)), in23);
+ pDataArray->m->openInputFile((filename.substr(1, filename.length()-2)), in23);
ofstream out23;
- m->openOutputFile(outputFileName, out23);
+ pDataArray->m->openOutputFile(outputFileName, out23);
+ int fcount = 0;
while (!in23.eof()) {
- if (m->control_pressed) { break; }
+ if (pDataArray->m->control_pressed) { break; }
- Sequence seq(in23); m->gobble(in23);
+ Sequence seq(in23); pDataArray->m->gobble(in23);
- if (seq.getName() != "") { seq.printSequence(out23); }
+ if (seq.getName() != "") { seq.printSequence(out23); fcount++; }
}
in23.close();
out23.close();
cPara.push_back(tempa);
}
+ if (pDataArray->strand != "") {
+ char* tempA = new char[9];
+ *tempA = '\0'; strncat(tempA, "--strand", 8);
+ cPara.push_back(tempA);
+ char* tempa = new char[pDataArray->strand.length()+1];
+ *tempa = '\0'; strncat(tempa, pDataArray->strand.c_str(), pDataArray->strand.length());
+ cPara.push_back(tempa);
+ }
+
if (pDataArray->useAbskew) {
char* tempskew = new char[9];
*tempskew = '\0'; strncat(tempskew, "--abskew", 8);
for (int j = 0; j < cPara.size(); j++) { uchimeParameters[j] = cPara[j]; commandString += toString(cPara[j]) + " "; }
//int numArgs = cPara.size();
+ commandString = "\"" + commandString + "\"";
+
//uchime_main(numArgs, uchimeParameters);
//cout << "commandString = " << commandString << endl;
if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
in.close();
out.close();
+ if (fcount != totalSeqs) { pDataArray->m->mothurOut("[ERROR]: process " + toString(pDataArray->threadID) + " only processed " + toString(pDataArray->count) + " of " + toString(pDataArray->end) + " sequences assigned to it, quitting. \n"); pDataArray->m->control_pressed = true; }
+
if (pDataArray->m->control_pressed) { return 0; }
pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences."); pDataArray->m->mothurOutEndLine();
pDataArray->count = totalSeqs;
pDataArray->numChimeras = numChimeras;
+
return totalSeqs;
}