CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch);
CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen);
CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend);
- CommandParameter pthreshold("insert", "Number", "", "25", "", "", "","",false,false); parameters.push_back(pthreshold);
+ CommandParameter pthreshold("insert", "Number", "", "20", "", "", "","",false,false); parameters.push_back(pthreshold);
CommandParameter pdeltaq("deltaq", "Number", "", "6", "", "", "","",false,false); parameters.push_back(pdeltaq);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "illumina1.8+", "", "", "","",false,false,true); parameters.push_back(pformat);
helpString += "The deltaq parameter allows you to specify the delta allowed between quality scores of a mismatched base. For example in the overlap, if deltaq=5 and in the alignment seqA, pos 200 has a quality score of 30 and the same position in seqB has a quality score of 20, you take the base from seqA (30-20 >= 5). If the quality score in seqB is 28 then the base in the consensus will be an N (30-28<5) The default is 6.\n";
helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -1.0.\n";
- helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score below the threshold we eliminate it. Default=25.\n";
+ helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score equal to or below the threshold we eliminate it. Default=20.\n";
helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
+
helpString += "The trimoverlap parameter allows you to trim the sequences to only the overlapping section. The default is F.\n";
helpString += "The make.contigs command should be in the following format: \n";
helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n";
outputTypes["fasta"] = tempOutNames;
outputTypes["group"] = tempOutNames;
outputTypes["report"] = tempOutNames;
- }
+ }
catch(exception& e) {
m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
exit(1);
m->mothurConvert(temp, gapExtend);
if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; }
- temp = validParameter.validFile(parameters, "insert", false); if (temp == "not found"){ temp = "25"; }
+ temp = validParameter.validFile(parameters, "insert", false); if (temp == "not found"){ temp = "20"; }
m->mothurConvert(temp, insert);
if ((insert < 0) || (insert > 40)) { m->mothurOut("[ERROR]: insert must be between 0 and 40.\n"); abort=true; }
temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
allFiles = m->isTrue(temp);
+
temp = validParameter.validFile(parameters, "trimoverlap", false); if (temp == "not found") { temp = "F"; }
trimOverlap = m->isTrue(temp);
outputNames.push_back(compositeScrapFastaFile); outputTypes["fasta"].push_back(compositeScrapFastaFile);
}
+ map<string, int> totalGroupCounts;
+
for (int l = 0; l < filesToProcess.size(); l++) {
m->mothurOut("\n>>>>>\tProcessing " + filesToProcess[l][0][0] + " (file " + toString(l+1) + " of " + toString(filesToProcess.size()) + ")\t<<<<<\n");
+ groupCounts.clear();
+ groupMap.clear();
vector<vector<string> > fastaFileNames;
createOligosGroup = false;
string outputGroupFileName;
m->mothurOut("Making contigs...\n");
createProcesses(filesToProcess[l], outFastaFile, outScrapFastaFile, outMisMatchFile, fastaFileNames, l);
- m->mothurOut("Done.\n");
+ m->mothurOut("Here...\n");
//remove temp fasta and qual files
for (int i = 0; i < processors; i++) { for(int j = 0; j < filesToProcess[l][i].size(); j++) { m->mothurRemove(filesToProcess[l][i][j]); } }
ofstream out;
string thisGroupName = thisOutputDir + m->getRootName(m->getSimpleName(it->first));
- thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
+ thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
m->openOutputFile(thisGroupName, out);
while (!in.eof()){
if (m->control_pressed) { break; }
Sequence currSeq(in); m->gobble(in);
- out << currSeq.getName() << '\t' << it->second << endl;
+ out << currSeq.getName() << '\t' << it->second << endl;
}
- in.close();
out.close();
+ in.close();
}
}
}
if (filesToProcess.size() > 1) { //merge into large combo files
- if (createFileGroup || createOligosGroup) {
- if (l == 0) {
+ if (createFileGroup || createOligosGroup) {
+ if (l == 0) {
ofstream outCGroup;
m->openOutputFile(compositeGroupFile, outCGroup); outCGroup.close();
outputNames.push_back(compositeGroupFile); outputTypes["group"].push_back(compositeGroupFile);
m->appendFiles(outputGroupFileName, compositeGroupFile);
if (!allFiles) { m->mothurRemove(outputGroupFileName); }
else { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); }
+
+ for (map<string, int>::iterator itGroups = groupCounts.begin(); itGroups != groupCounts.end(); itGroups++) {
+ map<string, int>::iterator itTemp = totalGroupCounts.find(itGroups->first);
+ if (itTemp == totalGroupCounts.end()) { totalGroupCounts[itGroups->first] = itGroups->second; } //new group create it in totalGroups
+ else { itTemp->second += itGroups->second; } //existing group, update total
+ }
}
if (l == 0) { m->appendFiles(outMisMatchFile, compositeMisMatchFile); }
else { m->appendFilesWithoutHeaders(outMisMatchFile, compositeMisMatchFile); }
outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
}
}else {
+ totalGroupCounts = groupCounts;
outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile);
- if (createFileGroup || createOligosGroup) { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); }
+ if (createFileGroup || createOligosGroup) {
+ outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
+ }
}
+ m->mothurOut("Done.\n");
}
+
m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
//output group counts
m->mothurOutEndLine();
int total = 0;
- if (groupCounts.size() != 0) { m->mothurOut("Group count: \n"); }
- for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
+ if (totalGroupCounts.size() != 0) { m->mothurOut("Group count: \n"); }
+ for (map<string, int>::iterator it = totalGroupCounts.begin(); it != totalGroupCounts.end(); it++) {
total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine();
}
if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
}
}
}
-
+
num = driver(files[process],
outputFasta + toString(getpid()) + ".temp",
outputScrapFasta + toString(getpid()) + ".temp",
}
}
}
-
-
+
contigsData* tempcontig = new contigsData(group, files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createOligosGroup, createFileGroup, allFiles, trimOverlap, h);
pDataArray.push_back(tempcontig);
contig += seq1[i];
}else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below insert. In that case eliminate base
if (thisfqualfile != "") {
- if (scores2[BBaseMap[i]] < insert) { } //
+ if (scores2[BBaseMap[i]] <= insert) { } //
else { contig += seq2[i]; }
}else { contig += seq2[i]; } //with no quality info, then we keep it?
}else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below insert. In that case eliminate base
if (thisfqualfile != "") {
- if (scores1[ABaseMap[i]] < insert) { } //
+ if (scores1[ABaseMap[i]] <= insert) { } //
else { contig += seq1[i]; }
}else { contig += seq1[i]; } //with no quality info, then we keep it?
}else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
}
m->gobble(in);
+ if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", forward = " + forward + ", reverse = " + reverse + ".\n"); }
//check to make sure both are able to be opened
ifstream in2;
}
//roligo = reverseOligo(roligo);
+ if (m->debug) { m->mothurOut("[DEBUG]: reading - " + roligo + ".\n"); }
+
group = "";
// get rest of line in case there is a primer name
while (!in.eof()) {
char c = in.get();
- if (c == 10 || c == 13){ break; }
+ if (c == 10 || c == 13 || c == -1){ break; }
else if (c == 32 || c == 9){;} //space or tab
else { group += c; }
}
oligosPair newPrimer(foligo, roligo);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
//check for repeat barcodes
string tempPair = foligo+roligo;
group = "";
while (!in.eof()) {
char c = in.get();
- if (c == 10 || c == 13){ break; }
+ if (c == 10 || c == 13 || c == -1){ break; }
else if (c == 32 || c == 9){;} //space or tab
else { group += c; }
}