temp = validParameter.validFile(parameters, "dereplicate", false);
- if (temp == "not found") {
- if (groupfile != "") { temp = "false"; }
- else { temp = "true"; }
- }
+ if (temp == "not found") { temp = "false"; }
dups = m->isTrue(temp);
if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
+ if (hasCount && (templatefile != "self")) { m->mothurOut("You have provided a countfile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
//look for uchime exe
int error;
if (hasCount) {
CountTable ct;
- ct.readTable(nameFile);
+ ct.readTable(nameFile, true, false);
for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
int num = ct.getNumSeqs(it->first);
if (num == 0) { error = 1; }
if(processors == 1) { totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, 0, groups.size(), groups);
- //read my own
- if (hasCount && !dups) {
- CountTable newCount; newCount.readTable(nameFile);
-
+ if (hasCount && dups) {
+ CountTable c; c.readTable(nameFile, true, false);
if (!m->isBlank(newCountFile)) {
ifstream in2;
m->openInputFile(newCountFile, in2);
string name, group;
while (!in2.eof()) {
in2 >> name >> group; m->gobble(in2);
- newCount.setAbund(name, group, 0);
+ c.setAbund(name, group, 0);
}
in2.close();
}
m->mothurRemove(newCountFile);
- newCount.printTable(newCountFile);
+ c.printTable(newCountFile);
}
}else { totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, groups, nameFile, groupFile, fastaFileNames[s]); }
m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine();
m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
}else {
- /*if (hasCount) { //removed empty seqs
+
+ if (hasCount) {
+ set<string> doNotRemove;
+ CountTable c; c.readTable(newCountFile, true, true);
+ vector<string> namesInTable = c.getNamesOfSeqs();
+ for (int i = 0; i < namesInTable.size(); i++) {
+ int temp = c.getNumSeqs(namesInTable[i]);
+ if (temp == 0) { c.remove(namesInTable[i]); }
+ else { doNotRemove.insert((namesInTable[i])); }
+ }
+ //remove names we want to keep from accnos file.
+ set<string> accnosNames = m->readAccnos(accnosFileName);
ofstream out2;
m->openOutputFile(accnosFileName, out2);
-
- CountTable c; c.readTable(newCountFile);
- vector<string> nseqs = c.getNamesOfSeqs();
- vector<string> ngroups = c.getNamesOfGroups();
- for (int l = 0; l < nseqs.size(); l++) {
- if (c.getNumSeqs(nseqs[l]) == 0) {
- c.remove(nseqs[l]);
- out2 << nseqs[l] << endl;
- }
- }
- for (int l = 0; l < ngroups.size(); l++) {
- if (c.getGroupCount(ngroups[l]) == 0) { c.removeGroup(ngroups[l]); }
+ for (set<string>::iterator it = accnosNames.begin(); it != accnosNames.end(); it++) {
+ if (doNotRemove.count(*it) == 0) { out2 << (*it) << endl; }
}
out2.close();
c.printTable(newCountFile);
- }*/
+ outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile);
+ }
}
if (hasCount) { delete cparser; }
if (itTypes != outputTypes.end()) {
if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
}
+
+ itTypes = outputTypes.find("count");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+ }
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
map<string, string>::iterator itUnique;
int total = 0;
- //edit accnos file
- ifstream in2;
- m->openInputFile(accnosFileName, in2);
-
ofstream out2;
m->openOutputFile(accnosFileName+".temp", out2);
set<string> chimerasInFile;
set<string>::iterator itChimeras;
-
- while (!in2.eof()) {
- if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
-
- in2 >> name; m->gobble(in2);
-
- //find unique name
- itUnique = uniqueNames.find(name);
-
- if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
- else {
- itChimeras = chimerasInFile.find((itUnique->second));
-
- if (itChimeras == chimerasInFile.end()) {
- out2 << itUnique->second << endl;
- chimerasInFile.insert((itUnique->second));
- total++;
- }
- }
- }
- in2.close();
+ if (!m->isBlank(accnosFileName)) {
+ //edit accnos file
+ ifstream in2;
+ m->openInputFile(accnosFileName, in2);
+
+ while (!in2.eof()) {
+ if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
+
+ in2 >> name; m->gobble(in2);
+
+ //find unique name
+ itUnique = uniqueNames.find(name);
+
+ if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+ else {
+ itChimeras = chimerasInFile.find((itUnique->second));
+
+ if (itChimeras == chimerasInFile.end()) {
+ out2 << itUnique->second << endl;
+ chimerasInFile.insert((itUnique->second));
+ total++;
+ }
+ }
+ }
+ in2.close();
+ }
out2.close();
m->mothurRemove(accnosFileName);
int totalSeqs = 0;
int numChimeras = 0;
+
ofstream outCountList;
if (hasCount && dups) { m->openOutputFile(countlist, outCountList); }
//loop through and create all the processes you want
while (process != processors) {
- int pid = fork();
+ pid_t pid = fork();
if (pid > 0) {
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
+ num = driver(outputFileName + toString(m->mothurGetpid(process)) + ".temp", files[process], accnos + toString(m->mothurGetpid(process)) + ".temp", alns + toString(m->mothurGetpid(process)) + ".temp", numChimeras);
//pass numSeqs to parent
ofstream out;
- string tempFile = outputFileName + toString(getpid()) + ".num.temp";
+ string tempFile = outputFileName + toString(m->mothurGetpid(process)) + ".num.temp";
m->openOutputFile(tempFile, out);
out << num << endl;
out << numChimeras << endl;
int num = 0;
CountTable newCount;
- if (hasCount && dups) { newCount.readTable(nameFile); }
+ if (hasCount && dups) { newCount.readTable(nameFile, true, false); }
//sanity check
if (groups.size() < processors) { processors = groups.size(); }
//divide the groups between the processors
vector<linePair> lines;
- int numGroupsPerProcessor = groups.size() / processors;
- for (int i = 0; i < processors; i++) {
- int startIndex = i * numGroupsPerProcessor;
- int endIndex = (i+1) * numGroupsPerProcessor;
- if(i == (processors - 1)){ endIndex = groups.size(); }
- lines.push_back(linePair(startIndex, endIndex));
- }
-
+ int remainingPairs = groups.size();
+ int startIndex = 0;
+ for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) {
+ int numPairs = remainingPairs; //case for last processor
+ if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); }
+ lines.push_back(linePair(startIndex, (startIndex+numPairs))); //startIndex, endIndex
+ startIndex = startIndex + numPairs;
+ remainingPairs = remainingPairs - numPairs;
+ }
+
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
//loop through and create all the processes you want
while (process != processors) {
- int pid = fork();
+ pid_t pid = fork();
if (pid > 0) {
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", accnos + ".byCount." + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
+ num = driverGroups(outputFName + toString(m->mothurGetpid(process)) + ".temp", filename + toString(m->mothurGetpid(process)) + ".temp", accnos + toString(m->mothurGetpid(process)) + ".temp", alns + toString(m->mothurGetpid(process)) + ".temp", accnos + ".byCount." + toString(m->mothurGetpid(process)) + ".temp", lines[process].start, lines[process].end, groups);
//pass numSeqs to parent
ofstream out;
- string tempFile = outputFName + toString(getpid()) + ".num.temp";
+ string tempFile = outputFName + toString(m->mothurGetpid(process)) + ".num.temp";
m->openOutputFile(tempFile, out);
out << num << endl;
out.close();
#endif
-
+
//read my own
if (hasCount && dups) {
if (!m->isBlank(accnos + ".byCount")) {
}
m->mothurRemove(accnos + ".byCount");
}
-
+
//append output files
for(int i=0;i<processIDS.size();i++){
m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
}
//print new *.pick.count_table
- if (hasCount && dups) { newCount.printTable(newCountFile); }
-
+ if (hasCount && dups) { newCount.printTable(newCountFile); }
+
return num;
}