vector<seqData> readFiles(string inputFile, CountTable* ct);
vector<seqData> loadSequences(string);
int deconvoluteResults(map<string, string>&, string, string);
- int driverGroups(string, string, int, int, vector<string>);
- int createProcessesGroups(string, string, vector<string>, string, string, string);
+ int driverGroups(string, string, string, int, int, vector<string>);
+ int createProcessesGroups(string, string, string, vector<string>, string, string, string);
string removeNs(string);
};
string groupfile;
string outputFName;
string accnos;
+ string countlist;
MothurOut* m;
int start;
int end;
- bool hasName, hasCount;
+ bool hasName, hasCount, dups;
int threadID, count, numChimeras;
double alpha, beta, cutoff;
vector<string> groups;
perseusData(){}
- perseusData(bool hn, bool hc, double a, double b, double c, string o, string f, string n, string g, string ac, vector<string> gr, MothurOut* mout, int st, int en, int tid) {
+ perseusData(bool dps, bool hn, bool hc, double a, double b, double c, string o, string f, string n, string g, string ac, string ctlist, vector<string> gr, MothurOut* mout, int st, int en, int tid) {
alpha = a;
beta = b;
cutoff = c;
namefile = n;
groupfile = g;
outputFName = o;
+ countlist = ctlist;
accnos = ac;
m = mout;
start = st;
groups = gr;
hasName = hn;
hasCount = hc;
+ dups = dps;
count = 0;
numChimeras = 0;
}
SequenceCountParser* cparser;
if (pDataArray->hasCount) {
CountTable* ct = new CountTable();
- ct->readTable(pDataArray->namefile);
+ ct->readTable(pDataArray->namefile, true, false);
cparser = new SequenceCountParser(pDataArray->fastafile, *ct);
delete ct;
}else {
int totalSeqs = 0;
int numChimeras = 0;
+
+ ofstream outCountList;
+ if (pDataArray->hasCount && pDataArray->dups) { pDataArray->m->openOutputFile(pDataArray->countlist, outCountList); }
for (int u = pDataArray->start; u < pDataArray->end; u++) {
chimeraFile << j << '\t' << sequences[j].seqName << "\t0\t0\tNull\t0\t0\t0\tNull\tNull\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\tgood" << endl;
}
//report progress
- if((j+1) % 100 == 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(j+1) + "\n"); }
+ if((j+1) % 100 == 0){ pDataArray->m->mothurOutJustToScreen("Processing sequence: " + toString(j+1) + "\n"); }
}
- if((numSeqs) % 100 != 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(numSeqs) + "\n"); }
+ if((numSeqs) % 100 != 0){ pDataArray->m->mothurOutJustToScreen("Processing sequence: " + toString(numSeqs) + "\n"); }
chimeraFile.close();
accnosFile.close();
////////////////////////////////////////////////////////////////////////////////////////
totalSeqs += numSeqs;
+
+ if (pDataArray->dups) {
+ if (!pDataArray->m->isBlank(accnosFileName)) {
+ ifstream in;
+ pDataArray->m->openInputFile(accnosFileName, in);
+ string name;
+ if (pDataArray->hasCount) {
+ while (!in.eof()) {
+ in >> name; pDataArray->m->gobble(in);
+ outCountList << name << '\t' << pDataArray->groups[u] << endl;
+ }
+ in.close();
+ }else {
+ map<string, string> thisnamemap = parser->getNameMap(pDataArray->groups[u]);
+ map<string, string>::iterator itN;
+ ofstream out;
+ pDataArray->m->openOutputFile(accnosFileName+".temp", out);
+ while (!in.eof()) {
+ in >> name; pDataArray->m->gobble(in);
+ itN = thisnamemap.find(name);
+ if (itN != thisnamemap.end()) {
+ vector<string> tempNames; pDataArray->m->splitAtComma(itN->second, tempNames);
+ for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
+
+ }else { pDataArray->m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); pDataArray->m->control_pressed = true; }
+ }
+ out.close();
+ in.close();
+ pDataArray->m->renameFile(accnosFileName+".temp", accnosFileName);
+ }
+
+ }
+ }
//append files
pDataArray->m->appendFiles(chimeraFileName, pDataArray->outputFName); pDataArray->m->mothurRemove(chimeraFileName);
if (pDataArray->m->control_pressed) { if (pDataArray->hasCount) { delete cparser; } { delete parser; } pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); return 0; }
}
+ if (pDataArray->hasCount && pDataArray->dups) { outCountList.close(); }
+
pDataArray->count = totalSeqs;
if (pDataArray->hasCount) { delete cparser; } { delete parser; }
return totalSeqs;