//**********************************************************************************************************************
vector<string> MakeContigsCommand::setParameters(){
try {
- CommandParameter pfasta("ffastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
- CommandParameter prfasta("rfastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(prfasta);
- CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
- CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
- CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
+ CommandParameter pfastq("ffastq", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(pfastq);
+ CommandParameter prfastq("rfastq", "InputTypes", "", "", "none", "none", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(prfastq);
+ CommandParameter pfasta("ffasta", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastaGroup","fasta",false,false,true); parameters.push_back(pfasta);
+ CommandParameter prfasta("rfasta", "InputTypes", "", "", "none", "none", "none","fastaGroup",false,false,true); parameters.push_back(prfasta);
+ CommandParameter pfqual("fqfile", "InputTypes", "", "", "none", "none", "qfileGroup","qfile",false,false,true); parameters.push_back(pfqual);
+ CommandParameter prqual("rqfile", "InputTypes", "", "", "none", "none", "qfileGroup","qfile",false,false,true); parameters.push_back(prqual);
+ CommandParameter pfile("file", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "none","fasta-qfile",false,false,true); parameters.push_back(pfile);
+ CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
+ CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
+ CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
// CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
// CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
- CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
+ CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
- CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "",false,false); parameters.push_back(palign);
- CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
- CommandParameter pmatch("match", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pmatch);
- CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch);
- CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen);
- CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend);
- CommandParameter pthreshold("threshold", "Number", "", "40", "", "", "",false,false); parameters.push_back(pthreshold);
- CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
- CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
- CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+ CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "","",false,false); parameters.push_back(palign);
+ CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
+ CommandParameter pmatch("match", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pmatch);
+ CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch);
+ CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen);
+ CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend);
+ CommandParameter pthreshold("threshold", "Number", "", "40", "", "", "","",false,false); parameters.push_back(pthreshold);
+ CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
string MakeContigsCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The make.contigs command reads a forward fastq file and a reverse fastq file and outputs new fasta and quality files.\n";
+ helpString += "The make.contigs command reads a file, forward fastq file and a reverse fastq file or forward fasta and reverse fasta files and outputs new fasta. It will also provide new quality files if the fastq or file parameter is used.\n";
helpString += "If an oligos file is provided barcodes and primers will be trimmed, and a group file will be created.\n";
helpString += "The make.contigs command parameters are ffastq, rfastq, oligos, tdiffs, bdiffs, ldiffs, sdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, allfiles and processors.\n";
- helpString += "The ffastq and rfastq parameters are required.\n";
+ helpString += "The ffastq and rfastq, file, or ffasta and rfasta parameters are required.\n";
+ helpString += "The file parameter is 2 column file containing the forward fastq files in the first column and their matching reverse fastq files in the second column. Mothur will process each pair and create a combined fasta and qual file with all the sequences.\n";
+ helpString += "The ffastq and rfastq parameters are used to provide a forward fastq and reverse fastq file to process. If you provide one, you must provide the other.\n";
+ helpString += "The ffasta and rfasta parameters are used to provide a forward fasta and reverse fasta file to process. If you provide one, you must provide the other.\n";
+ helpString += "The fqfile and rqfile parameters are used to provide a forward quality and reverse quality files to process with the ffasta and rfasta parameters. If you provide one, you must provide the other.\n";
helpString += "The align parameter allows you to specify the alignment method to use. Your options are: gotoh and needleman. The default is needleman.\n";
helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
}
}
//**********************************************************************************************************************
-string MakeContigsCommand::getOutputFileNameTag(string type, string inputName=""){
- try {
- string outputFileName = "";
- map<string, vector<string> >::iterator it;
+string MakeContigsCommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
- //is this a type this command creates
- it = outputTypes.find(type);
- if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
- else {
- if (type == "fasta") { outputFileName = "contigs.fasta"; }
- else if (type == "qfile") { outputFileName = "contigs.qual"; }
- else if (type == "group") { outputFileName = "groups"; }
- else if (type == "mismatch") { outputFileName = "contigs.mismatch"; }
- else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
- }
- return outputFileName;
- }
- catch(exception& e) {
- m->errorOut(e, "MakeContigsCommand", "getOutputFileNameTag");
- exit(1);
- }
+ if (type == "fasta") { pattern = "[filename],[tag],contigs.fasta"; }
+ else if (type == "qfile") { pattern = "[filename],[tag],contigs.qual"; }
+ else if (type == "group") { pattern = "[filename],[tag],groups"; }
+ else if (type == "mismatch") { pattern = "[filename],[tag],contigs.mismatch"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
+
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeContigsCommand", "getOutputPattern");
+ exit(1);
+ }
}
//**********************************************************************************************************************
MakeContigsCommand::MakeContigsCommand(){
if (path == "") { parameters["rfastq"] = inputDir + it->second; }
}
+ it = parameters.find("ffasta");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["ffasta"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("rfasta");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["rfasta"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("fqfile");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["fqfile"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("rqfile");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["rqfile"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("file");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["file"] = inputDir + it->second; }
+ }
+
it = parameters.find("oligos");
//user has given a template file
if(it != parameters.end()){
}
ffastqfile = validParameter.validFile(parameters, "ffastq", true);
- if (ffastqfile == "not open") { ffastqfile = ""; abort = true; }
- else if (ffastqfile == "not found") { ffastqfile = ""; abort=true; m->mothurOut("The ffastq parameter is required.\n"); }
+ if (ffastqfile == "not open") { abort = true; }
+ else if (ffastqfile == "not found") { ffastqfile = ""; }
rfastqfile = validParameter.validFile(parameters, "rfastq", true);
- if (rfastqfile == "not open") { rfastqfile = ""; abort = true; }
- else if (rfastqfile == "not found") { rfastqfile = ""; abort=true; m->mothurOut("The rfastq parameter is required.\n"); }
+ if (rfastqfile == "not open") { abort = true; }
+ else if (rfastqfile == "not found") { rfastqfile = ""; }
+
+ ffastafile = validParameter.validFile(parameters, "ffasta", true);
+ if (ffastafile == "not open") { abort = true; }
+ else if (ffastafile == "not found") { ffastafile = ""; }
+
+ rfastafile = validParameter.validFile(parameters, "rfasta", true);
+ if (rfastafile == "not open") { abort = true; }
+ else if (rfastafile == "not found") { rfastafile = ""; }
+
+ fqualfile = validParameter.validFile(parameters, "fqfile", true);
+ if (fqualfile == "not open") { abort = true; }
+ else if (fqualfile == "not found") { fqualfile = ""; }
+
+ rqualfile = validParameter.validFile(parameters, "rqfile", true);
+ if (rqualfile == "not open") { abort = true; }
+ else if (rqualfile == "not found") { rqualfile = ""; }
+
+ file = validParameter.validFile(parameters, "file", true);
+ if (file == "not open") { abort = true; }
+ else if (file == "not found") { file = ""; }
+
+ if ((file == "") && (ffastafile == "") && (ffastqfile == "")) { abort = true; m->mothurOut("[ERROR]: ffastq and rfastq parameters are required.\n"); }
+ if ((ffastqfile != "") && (rfastqfile == "")) { abort = true; }
+ if ((ffastqfile == "") && (rfastqfile != "")) { abort = true; }
oligosfile = validParameter.validFile(parameters, "oligos", true);
if (oligosfile == "not found") { oligosfile = ""; }
temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
m->mothurConvert(temp, pdiffs);
- // temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
+ // temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
// m->mothurConvert(temp, ldiffs);
+ ldiffs = 0;
// temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
-// m->mothurConvert(temp, sdiffs);
+ // m->mothurConvert(temp, sdiffs);
+ sdiffs = 0;
- temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
+ temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs; temp = toString(tempTotal); }
m->mothurConvert(temp, tdiffs);
if(tdiffs == 0){ tdiffs = bdiffs + pdiffs; } //+ ldiffs + sdiffs;
//read ffastq and rfastq files creating fasta and qual files.
//this function will create a forward and reverse, fasta and qual files for each processor.
//files has an entry for each processor. files[i][0] = forwardFasta, files[i][1] = forwardQual, files[i][2] = reverseFasta, files[i][3] = reverseQual
- int numReads = 0;
+ unsigned long int numReads = 0;
int start = time(NULL);
longestBase = 1000;
m->mothurOut("Reading fastq data...\n");
vector< vector<string> > files = readFastqFiles(numReads);
m->mothurOut("Done.\n");
-
+
if (m->control_pressed) { return 0; }
vector<vector<string> > fastaFileNames;
vector<vector<string> > qualFileNames;
createGroup = false;
string outputGroupFileName;
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(ffastqfile));
+ variables["[tag]"] = "";
if(oligosfile != ""){
createGroup = getOligos(fastaFileNames, qualFileNames);
if (createGroup) {
- outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + getOutputFileNameTag("group");
+ outputGroupFileName = getOutputFileName("group",variables);
outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
}
}
- string outFastaFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "trim." + getOutputFileNameTag("fasta");
- string outQualFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "trim." + getOutputFileNameTag("qfile");
- string outScrapFastaFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "scrap." + getOutputFileNameTag("fasta");
- string outScrapQualFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "scrap." + getOutputFileNameTag("qfile");
+ variables["[tag]"] = "trim";
+ string outFastaFile = getOutputFileName("fasta",variables);
+ string outQualFile = getOutputFileName("qfile",variables);
+ variables["[tag]"] = "scrap";
+ string outScrapFastaFile = getOutputFileName("fasta",variables);
+ string outScrapQualFile = getOutputFileName("qfile",variables);
- string outMisMatchFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + getOutputFileNameTag("mismatch");
+ variables["[tag]"] = "";
+ string outMisMatchFile = getOutputFileName("mismatch",variables);
outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
outputNames.push_back(outQualFile); outputTypes["qfile"].push_back(outQualFile);
outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile);
ofstream out;
string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first));
- thisGroupName += getOutputFileNameTag("group"); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
+ thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
m->openOutputFile(thisGroupName, out);
while (!in.eof()){
}
}
//**********************************************************************************************************************
-vector< vector<string> > MakeContigsCommand::readFastqFiles(int& count){
+vector< vector<string> > MakeContigsCommand::readFastqFiles(unsigned long int& count){
try {
vector< vector<string> > files;
else { ignorer = true; }
vector<pairFastqRead> reads = getReads(ignoref, ignorer, thisFread, thisRread, uniques);
-
+
for (int i = 0; i < reads.size(); i++) {
fastqRead fread = reads[i].forward;
fastqRead rread = reads[i].reverse;
+ if (m->debug) { m->mothurOut(toString(count) + '\t' + fread.name + '\t' + rread.name + '\n'); }
+
if (checkReads(fread, rread)) {
if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
pairFastqRead temp(forward, reverse);
reads.push_back(temp);
}else {
- //look for forward pair
- itUniques = uniques.find(forward.name);
- if (itUniques != uniques.end()) { //we have the pair for this read
- pairFastqRead temp(forward, itUniques->second);
- reads.push_back(temp);
- uniques.erase(itUniques);
- }else { //save this read for later
- uniques[forward.name] = forward;
+ bool match = false;
+ //if no match are the names only different by 1 and 2?
+ string tempFRead = forward.name.substr(0, forward.name.length()-1);
+ string tempRRead = reverse.name.substr(0, reverse.name.length()-1);
+ if (tempFRead == tempRRead) {
+ if ((forward.name[forward.name.length()-1] == '1') && (reverse.name[reverse.name.length()-1] == '2')) {
+ forward.name = tempFRead;
+ reverse.name = tempRRead;
+ pairFastqRead temp(forward, reverse);
+ reads.push_back(temp);
+ match = true;
+ }
}
- //look for reverse pair
- itUniques = uniques.find(reverse.name);
- if (itUniques != uniques.end()) { //we have the pair for this read
- pairFastqRead temp(itUniques->second, reverse);
- reads.push_back(temp);
- uniques.erase(itUniques);
- }else { //save this read for later
- uniques[reverse.name] = reverse;
+ if (!match) {
+ //look for forward pair
+ itUniques = uniques.find(forward.name);
+ if (itUniques != uniques.end()) { //we have the pair for this read
+ pairFastqRead temp(forward, itUniques->second);
+ reads.push_back(temp);
+ uniques.erase(itUniques);
+ }else { //save this read for later
+ uniques[forward.name] = forward;
+ }
+
+ //look for reverse pair
+ itUniques = uniques.find(reverse.name);
+ if (itUniques != uniques.end()) { //we have the pair for this read
+ pairFastqRead temp(itUniques->second, reverse);
+ reads.push_back(temp);
+ uniques.erase(itUniques);
+ }else { //save this read for later
+ uniques[reverse.name] = reverse;
+ }
}
+
}
}else if (!ignoref && ignorer) { //ignore reverse keep forward
//look for forward pair