CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
-// CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
-// CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "","",false,false); parameters.push_back(palign);
CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch);
CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen);
CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend);
- CommandParameter pthreshold("insert", "Number", "", "25", "", "", "","",false,false); parameters.push_back(pthreshold);
+ CommandParameter pthreshold("insert", "Number", "", "20", "", "", "","",false,false); parameters.push_back(pthreshold);
CommandParameter pdeltaq("deltaq", "Number", "", "6", "", "", "","",false,false); parameters.push_back(pdeltaq);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "illumina1.8+", "", "", "","",false,false,true); parameters.push_back(pformat);
helpString += "If an oligos file is provided barcodes and primers will be trimmed, and a group file will be created.\n";
helpString += "The make.contigs command parameters are file, ffastq, rfastq, ffasta, rfasta, fqfile, rqfile, oligos, format, tdiffs, bdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, insert, deltaq, allfiles and processors.\n";
helpString += "The ffastq and rfastq, file, or ffasta and rfasta parameters are required.\n";
- helpString += "The file parameter is 2 column file containing the forward fastq files in the first column and their matching reverse fastq files in the second column. Mothur will process each pair and create a combined fasta and report file with all the sequences.\n";
+ helpString += "The file parameter is 2 or 3 column file containing the forward fastq files in the first column and their matching reverse fastq files in the second column, or a groupName then forward fastq file and reverse fastq file. Mothur will process each pair and create a combined fasta and report file with all the sequences.\n";
helpString += "The ffastq and rfastq parameters are used to provide a forward fastq and reverse fastq file to process. If you provide one, you must provide the other.\n";
helpString += "The ffasta and rfasta parameters are used to provide a forward fasta and reverse fasta file to process. If you provide one, you must provide the other.\n";
helpString += "The fqfile and rqfile parameters are used to provide a forward quality and reverse quality files to process with the ffasta and rfasta parameters. If you provide one, you must provide the other.\n";
helpString += "The deltaq parameter allows you to specify the delta allowed between quality scores of a mismatched base. For example in the overlap, if deltaq=5 and in the alignment seqA, pos 200 has a quality score of 30 and the same position in seqB has a quality score of 20, you take the base from seqA (30-20 >= 5). If the quality score in seqB is 28 then the base in the consensus will be an N (30-28<5) The default is 6.\n";
helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -1.0.\n";
- helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score below the threshold we eliminate it. Default=25.\n";
+ helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score equal to or below the threshold we eliminate it. Default=20.\n";
helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
helpString += "The trimoverlap parameter allows you to trim the sequences to only the overlapping section. The default is F.\n";
//**********************************************************************************************************************
MakeContigsCommand::MakeContigsCommand(string option) {
try {
- abort = false; calledHelp = false;
+ abort = false; calledHelp = false;
+ createFileGroup = false; createOligosGroup = false;
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
m->mothurConvert(temp, gapExtend);
if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; }
- temp = validParameter.validFile(parameters, "insert", false); if (temp == "not found"){ temp = "25"; }
+ temp = validParameter.validFile(parameters, "insert", false); if (temp == "not found"){ temp = "20"; }
m->mothurConvert(temp, insert);
if ((insert < 0) || (insert > 40)) { m->mothurOut("[ERROR]: insert must be between 0 and 40.\n"); abort=true; }
m->mothurOut("\n>>>>>\tProcessing " + filesToProcess[l][0][0] + " (file " + toString(l+1) + " of " + toString(filesToProcess.size()) + ")\t<<<<<\n");
vector<vector<string> > fastaFileNames;
- createGroup = false;
+ createOligosGroup = false;
string outputGroupFileName;
map<string, string> variables;
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir = m->hasPath(filesToProcess[l][0][0]); }
variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(filesToProcess[l][0][0]));
variables["[tag]"] = "";
- if(oligosfile != ""){
- createGroup = getOligos(fastaFileNames, variables["[filename]"]);
- if (createGroup) {
- outputGroupFileName = getOutputFileName("group",variables);
- outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
- }
+ if(oligosfile != ""){ createOligosGroup = getOligos(fastaFileNames, variables["[filename]"]); }
+ if (createOligosGroup || createFileGroup) {
+ outputGroupFileName = getOutputFileName("group",variables);
}
+ //give group in file file precedence
+ if (createFileGroup) { createOligosGroup = false; }
+
variables["[tag]"] = "trim";
string outFastaFile = getOutputFileName("fasta",variables);
variables["[tag]"] = "scrap";
string outScrapFastaFile = getOutputFileName("fasta",variables);
variables["[tag]"] = "";
string outMisMatchFile = getOutputFileName("report",variables);
- outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
- outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
- outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
-
+
m->mothurOut("Making contigs...\n");
- createProcesses(filesToProcess[l], outFastaFile, outScrapFastaFile, outMisMatchFile, fastaFileNames);
+ createProcesses(filesToProcess[l], outFastaFile, outScrapFastaFile, outMisMatchFile, fastaFileNames, l);
m->mothurOut("Done.\n");
//remove temp fasta and qual files
}
}
- if (createGroup) {
+ if (createFileGroup || createOligosGroup) {
ofstream outGroup;
m->openOutputFile(outputGroupFileName, outGroup);
for (map<string, string>::iterator itGroup = groupMap.begin(); itGroup != groupMap.end(); itGroup++) {
}
if (filesToProcess.size() > 1) { //merge into large combo files
- if (createGroup) {
+ if (createFileGroup || createOligosGroup) {
if (l == 0) {
ofstream outCGroup;
m->openOutputFile(compositeGroupFile, outCGroup); outCGroup.close();
outputNames.push_back(compositeGroupFile); outputTypes["group"].push_back(compositeGroupFile);
}
- m->appendFiles(outputGroupFileName, compositeGroupFile);
+ m->appendFiles(outputGroupFileName, compositeGroupFile);
+ if (!allFiles) { m->mothurRemove(outputGroupFileName); }
+ else { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); }
}
- m->appendFiles(outMisMatchFile, compositeMisMatchFile);
+ if (l == 0) { m->appendFiles(outMisMatchFile, compositeMisMatchFile); }
+ else { m->appendFilesWithoutHeaders(outMisMatchFile, compositeMisMatchFile); }
m->appendFiles(outFastaFile, compositeFastaFile);
m->appendFiles(outScrapFastaFile, compositeScrapFastaFile);
+ if (!allFiles) {
+ m->mothurRemove(outMisMatchFile);
+ m->mothurRemove(outFastaFile);
+ m->mothurRemove(outScrapFastaFile);
+ }else {
+ outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
+ outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
+ outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
+ }
+ }else {
+ outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
+ outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
+ outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
+ if (createFileGroup || createOligosGroup) { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); }
}
}
m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
}
}
//**********************************************************************************************************************
-int MakeContigsCommand::createProcesses(vector< vector<string> > files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector<vector<string> > fastaFileNames) {
+int MakeContigsCommand::createProcesses(vector< vector<string> > files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector<vector<string> > fastaFileNames, int index) {
try {
int num = 0;
vector<int> processIDS;
+ string group = "";
+ map<int, string>::iterator it = file2Group.find(index);
+ if (it != file2Group.end()) { group = it->second; }
+
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
int process = 0;
outputFasta + toString(getpid()) + ".temp",
outputScrapFasta + toString(getpid()) + ".temp",
outputMisMatches + toString(getpid()) + ".temp",
- tempFASTAFileNames, process);
+ tempFASTAFileNames, process, group);
//pass groupCounts to parent
ofstream out;
string tempFile = toString(getpid()) + ".num.temp";
m->openOutputFile(tempFile, out);
out << num << endl;
- if(createGroup){
+ if (createFileGroup || createOligosGroup) {
out << groupCounts.size() << endl;
for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
m->openOutputFile(outputScrapFasta, temp); temp.close();
//do my part
- num = driver(files[processors-1], outputFasta, outputScrapFasta, outputMisMatches, fastaFileNames, processors-1);
+ num = driver(files[processors-1], outputFasta, outputScrapFasta, outputMisMatches, fastaFileNames, processors-1, group);
//force parent to wait until all the processes are done
for (int i=0;i<processIDS.size();i++) {
int tempNum;
in >> tempNum; num += tempNum; m->gobble(in);
- if(createGroup){
+ if (createFileGroup || createOligosGroup) {
string group;
in >> tempNum; m->gobble(in);
}
- contigsData* tempcontig = new contigsData(files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createGroup, allFiles, trimOverlap, h);
+ contigsData* tempcontig = new contigsData(group, files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createOligosGroup, createFileGroup, allFiles, trimOverlap, h);
pDataArray.push_back(tempcontig);
hThreadArray[h] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);
//do my part
processIDS.push_back(processors-1);
- num = driver(files[processors-1], (outputFasta+ toString(processors-1) + ".temp"), (outputScrapFasta+ toString(processors-1) + ".temp"), (outputMisMatches+ toString(processors-1) + ".temp"), tempFASTAFileNames, processors-1);
+ num = driver(files[processors-1], (outputFasta+ toString(processors-1) + ".temp"), (outputScrapFasta+ toString(processors-1) + ".temp"), (outputMisMatches+ toString(processors-1) + ".temp"), tempFASTAFileNames, processors-1, group);
//Wait until all threads have terminated.
WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
m->appendFiles((outputScrapFasta + toString(processIDS[i]) + ".temp"), outputScrapFasta);
m->mothurRemove((outputScrapFasta + toString(processIDS[i]) + ".temp"));
- m->appendFiles((outputMisMatches + toString(processIDS[i]) + ".temp"), outputMisMatches);
+ m->appendFilesWithoutHeaders((outputMisMatches + toString(processIDS[i]) + ".temp"), outputMisMatches);
m->mothurRemove((outputMisMatches + toString(processIDS[i]) + ".temp"));
if(allFiles){
}
}
//**********************************************************************************************************************
-int MakeContigsCommand::driver(vector<string> files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector<vector<string> > fastaFileNames, int process){
+int MakeContigsCommand::driver(vector<string> files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector<vector<string> > fastaFileNames, int process, string group){
try {
Alignment* alignment;
m->openOutputFile(outputFasta, outFasta);
m->openOutputFile(outputScrapFasta, outScrapFasta);
m->openOutputFile(outputMisMatches, outMisMatch);
- if (process == 0) { outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n"; }
+ outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n";
TrimOligos trimOligos(pdiffs, bdiffs, 0, 0, primers, barcodes);
contig += seq1[i];
}else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below insert. In that case eliminate base
if (thisfqualfile != "") {
- if (scores2[BBaseMap[i]] < insert) { } //
+ if (scores2[BBaseMap[i]] <= insert) { } //
else { contig += seq2[i]; }
}else { contig += seq2[i]; } //with no quality info, then we keep it?
}else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below insert. In that case eliminate base
if (thisfqualfile != "") {
- if (scores1[ABaseMap[i]] < insert) { } //
+ if (scores1[ABaseMap[i]] <= insert) { } //
else { contig += seq1[i]; }
}else { contig += seq1[i]; } //with no quality info, then we keep it?
}else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
if (m->debug) { m->mothurOut(fSeq.getName()); }
- if (createGroup) {
+ if (createOligosGroup) {
if(barcodes.size() != 0){
string thisGroup = barcodeNameVector[barcodeIndex];
if (primers.size() != 0) {
}else { ignore = true; }
}
+ }else if (createFileGroup) {
+ int pos = group.find("ignore");
+ if (pos == string::npos) {
+ groupMap[fSeq.getName()] = group;
+
+ map<string, int>::iterator it = groupCounts.find(group);
+ if (it == groupCounts.end()) { groupCounts[group] = 1; }
+ else { groupCounts[it->first] ++; }
+ }else { ignore = true; }
}
if (m->debug) { m->mothurOut("\n"); }
if (m->control_pressed) { return files; }
in >> forward; m->gobble(in);
- in >> reverse; m->gobble(in);
+ in >> reverse;
+
+ string group = "";
+ while (!in.eof()) { //do we have a group assigned to this pair
+ char c = in.get();
+ if (c == 10 || c == 13 || c == -1){ break; }
+ else if (c == 32 || c == 9){;} //space or tab
+ else { group += c; }
+ }
+
+ if (group != "") {
+ //line in file look like: group forward reverse
+ string temp = forward;
+ forward = reverse;
+ reverse = group;
+ group = temp;
+ createFileGroup = true;
+ }
+ m->gobble(in);
+
//check to make sure both are able to be opened
ifstream in2;
}else{ in3.close(); }
if ((openForward != 1) && (openReverse != 1)) { //good pair
+ file2Group[files.size()] = group;
vector<string> pair;
pair.push_back(forward);
pair.push_back(reverse);
files.push_back(pair);
}
-
}
in.close();
if(foligo[i] == 'U') { foligo[i] = 'T'; }
}
- if(type == "FORWARD"){
+ if(type == "PRIMER"){
m->gobble(in);
in >> roligo;
CommandParameter preference("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(preference);
CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "QualReport","",false,false); parameters.push_back(pqfile);
CommandParameter preport("report", "InputTypes", "", "", "none", "none", "QualReport","",false,false); parameters.push_back(preport);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pname);
+ CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","",false,false,true); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","",false,false,true); parameters.push_back(pcount);
CommandParameter pignorechimeras("ignorechimeras", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pignorechimeras);
CommandParameter pthreshold("threshold", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pthreshold);
CommandParameter paligned("aligned", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(paligned);
helpString += "The reference parameter...\n";
helpString += "The qfile parameter...\n";
helpString += "The report parameter...\n";
- helpString += "The name parameter...\n";
+ helpString += "The name parameter allows you to provide a name file associated with the fasta file.\n";
+ helpString += "The count parameter allows you to provide a count file associated with the fasta file.\n";
helpString += "The ignorechimeras parameter...\n";
helpString += "The threshold parameter...\n";
helpString += "The processors parameter...\n";
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["name"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a names file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
it = parameters.find("qfile");
//user has given a quality score file
if(namesFileName == "not found"){ namesFileName = ""; }
else if (namesFileName == "not open") { namesFileName = ""; abort = true; }
else { m->setNameFile(namesFileName); }
+
+ //check for optional parameters
+ countfile = validParameter.validFile(parameters, "count", true);
+ if(countfile == "not found"){ countfile = ""; }
+ else if (countfile == "not open") { countfile = ""; abort = true; }
+ else { m->setCountTableFile(countfile); }
qualFileName = validParameter.validFile(parameters, "qfile", true);
if(qualFileName == "not found"){ qualFileName = ""; }
outputDir += m->hasPath(queryFileName); //if user entered a file with a path then preserve it
}
+ if ((countfile != "") && (namesFileName != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
+
//check for optional parameter and set defaults
// ...at some point should added some additional type checking...
temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found") { temp = "1.00"; }
getReferences(); //read in reference sequences - make sure there's no ambiguous bases
- if(namesFileName != ""){ weights = getWeights(); }
+ if(namesFileName != "") { weights = getWeights(); }
+ else if (countfile != "") {
+ CountTable ct;
+ ct.readTable(countfile);
+ weights = ct.getNameMap();
+ }
vector<unsigned long long> fastaFilePos;
vector<unsigned long long> qFilePos;
int misMatchSize;
in >> misMatchSize; m->gobble(in);
if (misMatchSize > misMatchCounts.size()) { misMatchCounts.resize(misMatchSize, 0); }
- for (int j = 0; j < misMatchCounts.size(); j++) {
+ for (int j = 0; j < misMatchSize; j++) {
in >> tempNum; misMatchCounts[j] += tempNum;
}
m->gobble(in);
getErrors(query, reference, minCompare);
- if(namesFileName != ""){
+ if((namesFileName != "") || (countfile != "")){
it = weights.find(query.getName());
minCompare.weight = it->second;
}