//
#include "makecontigscommand.h"
-#include "counttable.h"
//**********************************************************************************************************************
vector<string> MakeContigsCommand::setParameters(){
CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "","",false,false); parameters.push_back(palign);
CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
- CommandParameter pmakecount("makecount", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pmakecount);
CommandParameter ptrimoverlap("trimoverlap", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ptrimoverlap);
CommandParameter pmatch("match", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pmatch);
CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch);
string helpString = "";
helpString += "The make.contigs command reads a file, forward fastq file and a reverse fastq file or forward fasta and reverse fasta files and outputs new fasta. It will also provide new quality files if the fastq or file parameter is used.\n";
helpString += "If an oligos file is provided barcodes and primers will be trimmed, and a group file will be created.\n";
- helpString += "The make.contigs command parameters are file, ffastq, rfastq, ffasta, rfasta, fqfile, rqfile, oligos, format, tdiffs, bdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, insert, deltaq, allfiles, makecount and processors.\n";
+ helpString += "The make.contigs command parameters are file, ffastq, rfastq, ffasta, rfasta, fqfile, rqfile, oligos, format, tdiffs, bdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, insert, deltaq, allfiles and processors.\n";
helpString += "The ffastq and rfastq, file, or ffasta and rfasta parameters are required.\n";
helpString += "The file parameter is 2 or 3 column file containing the forward fastq files in the first column and their matching reverse fastq files in the second column, or a groupName then forward fastq file and reverse fastq file. Mothur will process each pair and create a combined fasta and report file with all the sequences.\n";
helpString += "The ffastq and rfastq parameters are used to provide a forward fastq and reverse fastq file to process. If you provide one, you must provide the other.\n";
helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score equal to or below the threshold we eliminate it. Default=20.\n";
helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
- helpString += "The makecount parameter will create a count table file instead of a group file. The default is F.\n";
helpString += "The trimoverlap parameter allows you to trim the sequences to only the overlapping section. The default is F.\n";
helpString += "The make.contigs command should be in the following format: \n";
if (type == "fasta") { pattern = "[filename],[tag],contigs.fasta"; }
else if (type == "group") { pattern = "[filename],[tag],contigs.groups"; }
- else if (type == "count") { pattern = "[filename],[tag],contigs.count_table"; }
else if (type == "report") { pattern = "[filename],[tag],contigs.report"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
outputTypes["fasta"] = tempOutNames;
outputTypes["group"] = tempOutNames;
outputTypes["report"] = tempOutNames;
- outputTypes["count"] = tempOutNames;
- }
+ }
catch(exception& e) {
m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
exit(1);
outputTypes["fasta"] = tempOutNames;
outputTypes["report"] = tempOutNames;
outputTypes["group"] = tempOutNames;
- outputTypes["count"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
allFiles = m->isTrue(temp);
- temp = validParameter.validFile(parameters, "makecount", false); if (temp == "not found") { temp = "F"; }
- makeCount = m->isTrue(temp);
temp = validParameter.validFile(parameters, "trimoverlap", false); if (temp == "not found") { temp = "F"; }
trimOverlap = m->isTrue(temp);
cvars["[filename]"] = compOutputDir + m->getRootName(m->getSimpleName(file));
cvars["[tag]"] = "";
string compositeGroupFile = getOutputFileName("group",cvars);
- if (makeCount) { compositeGroupFile = getOutputFileName("count",cvars); }
cvars["[tag]"] = "trim";
string compositeFastaFile = getOutputFileName("fasta",cvars);
cvars["[tag]"] = "scrap";
}
map<string, int> totalGroupCounts;
- CountTable compositeCt;
+
for (int l = 0; l < filesToProcess.size(); l++) {
m->mothurOut("\n>>>>>\tProcessing " + filesToProcess[l][0][0] + " (file " + toString(l+1) + " of " + toString(filesToProcess.size()) + ")\t<<<<<\n");
if(oligosfile != ""){ createOligosGroup = getOligos(fastaFileNames, variables["[filename]"]); }
if (createOligosGroup || createFileGroup) {
outputGroupFileName = getOutputFileName("group",variables);
- if (makeCount) { outputGroupFileName = getOutputFileName("count",variables); }
}
//give group in file file precedence
m->mothurOut("Making contigs...\n");
createProcesses(filesToProcess[l], outFastaFile, outScrapFastaFile, outMisMatchFile, fastaFileNames, l);
- m->mothurOut("Done.\n");
+ m->mothurOut("Here...\n");
//remove temp fasta and qual files
for (int i = 0; i < processors; i++) { for(int j = 0; j < filesToProcess[l][i].size(); j++) { m->mothurRemove(filesToProcess[l][i][j]); } }
ofstream out;
string thisGroupName = thisOutputDir + m->getRootName(m->getSimpleName(it->first));
- if (!makeCount) { thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
- m->openOutputFile(thisGroupName, out);
+ thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
+ m->openOutputFile(thisGroupName, out);
+
+ while (!in.eof()){
+ if (m->control_pressed) { break; }
- while (!in.eof()){
- if (m->control_pressed) { break; }
-
- Sequence currSeq(in); m->gobble(in);
- out << currSeq.getName() << '\t' << it->second << endl;
- }
- out.close();
- }
- else {
- thisGroupName += getOutputFileName("count",variables); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName);
- CountTable ct;
- ct.addGroup(it->second);
- while (!in.eof()){
- if (m->control_pressed) { break; }
-
- Sequence currSeq(in); m->gobble(in);
- vector<int> tempGroupCount; tempGroupCount.push_back(1);
- ct.push_back(currSeq.getName(), tempGroupCount);
- }
- ct.printTable(thisGroupName);
+ Sequence currSeq(in); m->gobble(in);
+ out << currSeq.getName() << '\t' << it->second << endl;
}
+ out.close();
in.close();
}
}
if (createFileGroup || createOligosGroup) {
- if (makeCount) {
- if ((allFiles) || (filesToProcess.size() == 1)) {
- CountTable ct;
- for (map<string, int>::iterator itGroups = groupCounts.begin(); itGroups != groupCounts.end(); itGroups++) {
- ct.addGroup(itGroups->first);
- }
- vector<string> groups = ct.getNamesOfGroups();
- for (map<string, string>::iterator itGroup = groupMap.begin(); itGroup != groupMap.end(); itGroup++) {
- vector<int> tempGroupCounts; tempGroupCounts.resize(groups.size(), 0);
- ct.push_back(itGroup->first, tempGroupCounts);
- ct.setAbund(itGroup->first, itGroup->second, 1);
- }
- ct.printTable(outputGroupFileName);
- }
- }else {
- ofstream outGroup;
- m->openOutputFile(outputGroupFileName, outGroup);
- for (map<string, string>::iterator itGroup = groupMap.begin(); itGroup != groupMap.end(); itGroup++) {
- outGroup << itGroup->first << '\t' << itGroup->second << endl;
- }
- outGroup.close();
+ ofstream outGroup;
+ m->openOutputFile(outputGroupFileName, outGroup);
+ for (map<string, string>::iterator itGroup = groupMap.begin(); itGroup != groupMap.end(); itGroup++) {
+ outGroup << itGroup->first << '\t' << itGroup->second << endl;
}
+ outGroup.close();
}
if (filesToProcess.size() > 1) { //merge into large combo files
if (createFileGroup || createOligosGroup) {
- if (makeCount) {
- for (map<string, string>::iterator itGroup = groupMap.begin(); itGroup != groupMap.end(); itGroup++) {
- vector<string> groups = compositeCt.getNamesOfGroups();
- if (m->inUsersGroups(itGroup->second, groups)) {
- vector<int> tempGroupCounts; tempGroupCounts.resize(groups.size(), 0);
- compositeCt.push_back(itGroup->first, tempGroupCounts);
- compositeCt.setAbund(itGroup->first, itGroup->second, 1);
- }else{
- compositeCt.addGroup(itGroup->second);
- vector<int> tempGroupCounts; tempGroupCounts.resize(groups.size()+1, 0);
- compositeCt.push_back(itGroup->first, tempGroupCounts);
- compositeCt.setAbund(itGroup->first, itGroup->second, 1);
- }
- }
- }else {
- if (l == 0) {
- ofstream outCGroup;
- m->openOutputFile(compositeGroupFile, outCGroup); outCGroup.close();
- outputNames.push_back(compositeGroupFile); outputTypes["group"].push_back(compositeGroupFile);
- }
- m->appendFiles(outputGroupFileName, compositeGroupFile);
- if (!allFiles) { m->mothurRemove(outputGroupFileName); }
- else { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); }
+ if (l == 0) {
+ ofstream outCGroup;
+ m->openOutputFile(compositeGroupFile, outCGroup); outCGroup.close();
+ outputNames.push_back(compositeGroupFile); outputTypes["group"].push_back(compositeGroupFile);
}
+ m->appendFiles(outputGroupFileName, compositeGroupFile);
+ if (!allFiles) { m->mothurRemove(outputGroupFileName); }
+ else { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); }
+
for (map<string, int>::iterator itGroups = groupCounts.begin(); itGroups != groupCounts.end(); itGroups++) {
map<string, int>::iterator itTemp = totalGroupCounts.find(itGroups->first);
if (itTemp == totalGroupCounts.end()) { totalGroupCounts[itGroups->first] = itGroups->second; } //new group create it in totalGroups
outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
if (createFileGroup || createOligosGroup) {
- if (makeCount) { outputNames.push_back(outputGroupFileName); outputTypes["count"].push_back(outputGroupFileName); }
- else { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); }
+ outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
}
}
+ m->mothurOut("Done.\n");
}
- if ((filesToProcess.size() > 1) && makeCount) { //merge into large combo files
- compositeCt.printTable(compositeGroupFile);
- outputNames.push_back(compositeGroupFile); outputTypes["count"].push_back(compositeGroupFile);
- }
m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
if (itTypes != outputTypes.end()) {
if ((itTypes->second).size() != 0) { currentGroup = (itTypes->second)[0]; m->setGroupFile(currentGroup); }
}
-
- string currentCount = "";
- itTypes = outputTypes.find("count");
- if (itTypes != outputTypes.end()) {
- if ((itTypes->second).size() != 0) { currentCount = (itTypes->second)[0]; m->setCountTableFile(currentCount); }
- }
//output files created by command
m->mothurOutEndLine();
// get rest of line in case there is a primer name
while (!in.eof()) {
char c = in.get();
- if (c == 10 || c == 13){ break; }
+ if (c == 10 || c == 13 || c == -1){ break; }
else if (c == 32 || c == 9){;} //space or tab
else { group += c; }
}
group = "";
while (!in.eof()) {
char c = in.get();
- if (c == 10 || c == 13){ break; }
+ if (c == 10 || c == 13 || c == -1){ break; }
else if (c == 32 || c == 9){;} //space or tab
else { group += c; }
}