X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=makecontigscommand.cpp;h=7a84f88ed86fbdcfc3c03d6759f46e99f935c836;hb=55bbd10379db27def51cec72a8819d775f73e45b;hp=f2f6449a383cb58c746ea0ccee9fd2ed1483b246;hpb=5c5c0428f6d548c28a8b903ac80efed4f92d59db;p=mothur.git diff --git a/makecontigscommand.cpp b/makecontigscommand.cpp index f2f6449..7a84f88 100644 --- a/makecontigscommand.cpp +++ b/makecontigscommand.cpp @@ -11,25 +11,30 @@ //********************************************************************************************************************** vector MakeContigsCommand::setParameters(){ try { - CommandParameter pfasta("ffastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta); - CommandParameter prfasta("rfastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(prfasta); - CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos); - CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs); - CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs); + CommandParameter pfastq("ffastq", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(pfastq); + CommandParameter prfastq("rfastq", "InputTypes", "", "", "none", "none", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(prfastq); + CommandParameter pfasta("ffasta", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastaGroup","fasta",false,false,true); parameters.push_back(pfasta); + CommandParameter prfasta("rfasta", "InputTypes", "", "", "none", "none", "none","fastaGroup",false,false,true); parameters.push_back(prfasta); + CommandParameter pfqual("fqfile", "InputTypes", "", "", "none", "none", "qfileGroup","qfile",false,false,true); parameters.push_back(pfqual); + CommandParameter prqual("rqfile", "InputTypes", "", "", "none", "none", "qfileGroup","qfile",false,false,true); parameters.push_back(prqual); + CommandParameter pfile("file", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "none","fasta-qfile",false,false,true); parameters.push_back(pfile); + CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos); + CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs); + CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs); // CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs); // CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs); - CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs); + CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs); - CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "",false,false); parameters.push_back(palign); - CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles); - CommandParameter pmatch("match", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pmatch); - CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch); - CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen); - CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend); - CommandParameter pthreshold("threshold", "Number", "", "40", "", "", "",false,false); parameters.push_back(pthreshold); - CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "","",false,false); parameters.push_back(palign); + CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles); + CommandParameter pmatch("match", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pmatch); + CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch); + CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen); + CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend); + CommandParameter pthreshold("threshold", "Number", "", "40", "", "", "","",false,false); parameters.push_back(pthreshold); + CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -44,10 +49,14 @@ vector MakeContigsCommand::setParameters(){ string MakeContigsCommand::getHelpString(){ try { string helpString = ""; - helpString += "The make.contigs command reads a forward fastq file and a reverse fastq file and outputs new fasta and quality files.\n"; + helpString += "The make.contigs command reads a file, forward fastq file and a reverse fastq file or forward fasta and reverse fasta files and outputs new fasta. It will also provide new quality files if the fastq or file parameter is used.\n"; helpString += "If an oligos file is provided barcodes and primers will be trimmed, and a group file will be created.\n"; helpString += "The make.contigs command parameters are ffastq, rfastq, oligos, tdiffs, bdiffs, ldiffs, sdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, allfiles and processors.\n"; - helpString += "The ffastq and rfastq parameters are required.\n"; + helpString += "The ffastq and rfastq, file, or ffasta and rfasta parameters are required.\n"; + helpString += "The file parameter is 2 column file containing the forward fastq files in the first column and their matching reverse fastq files in the second column. Mothur will process each pair and create a combined fasta and qual file with all the sequences.\n"; + helpString += "The ffastq and rfastq parameters are used to provide a forward fastq and reverse fastq file to process. If you provide one, you must provide the other.\n"; + helpString += "The ffasta and rfasta parameters are used to provide a forward fasta and reverse fasta file to process. If you provide one, you must provide the other.\n"; + helpString += "The fqfile and rqfile parameters are used to provide a forward quality and reverse quality files to process with the ffasta and rfasta parameters. If you provide one, you must provide the other.\n"; helpString += "The align parameter allows you to specify the alignment method to use. Your options are: gotoh and needleman. The default is needleman.\n"; helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n"; helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n"; @@ -72,27 +81,22 @@ string MakeContigsCommand::getHelpString(){ } } //********************************************************************************************************************** -string MakeContigsCommand::getOutputFileNameTag(string type, string inputName=""){ - try { - string outputFileName = ""; - map >::iterator it; +string MakeContigsCommand::getOutputPattern(string type) { + try { + string pattern = ""; - //is this a type this command creates - it = outputTypes.find(type); - if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } - else { - if (type == "fasta") { outputFileName = "contigs.fasta"; } - else if (type == "qfile") { outputFileName = "contigs.qual"; } - else if (type == "group") { outputFileName = "groups"; } - else if (type == "mismatch") { outputFileName = "contigs.mismatch"; } - else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } - } - return outputFileName; - } - catch(exception& e) { - m->errorOut(e, "MakeContigsCommand", "getOutputFileNameTag"); - exit(1); - } + if (type == "fasta") { pattern = "[filename],[tag],contigs.fasta"; } + else if (type == "qfile") { pattern = "[filename],[tag],contigs.qual"; } + else if (type == "group") { pattern = "[filename],[tag],groups"; } + else if (type == "mismatch") { pattern = "[filename],[tag],contigs.mismatch"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "MakeContigsCommand", "getOutputPattern"); + exit(1); + } } //********************************************************************************************************************** MakeContigsCommand::MakeContigsCommand(){ @@ -162,6 +166,46 @@ MakeContigsCommand::MakeContigsCommand(string option) { if (path == "") { parameters["rfastq"] = inputDir + it->second; } } + it = parameters.find("ffasta"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["ffasta"] = inputDir + it->second; } + } + + it = parameters.find("rfasta"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["rfasta"] = inputDir + it->second; } + } + + it = parameters.find("fqfile"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["fqfile"] = inputDir + it->second; } + } + + it = parameters.find("rqfile"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["rqfile"] = inputDir + it->second; } + } + + it = parameters.find("file"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["file"] = inputDir + it->second; } + } + it = parameters.find("oligos"); //user has given a template file if(it != parameters.end()){ @@ -172,12 +216,36 @@ MakeContigsCommand::MakeContigsCommand(string option) { } ffastqfile = validParameter.validFile(parameters, "ffastq", true); - if (ffastqfile == "not open") { ffastqfile = ""; abort = true; } - else if (ffastqfile == "not found") { ffastqfile = ""; abort=true; m->mothurOut("The ffastq parameter is required.\n"); } + if (ffastqfile == "not open") { abort = true; } + else if (ffastqfile == "not found") { ffastqfile = ""; } rfastqfile = validParameter.validFile(parameters, "rfastq", true); - if (rfastqfile == "not open") { rfastqfile = ""; abort = true; } - else if (rfastqfile == "not found") { rfastqfile = ""; abort=true; m->mothurOut("The rfastq parameter is required.\n"); } + if (rfastqfile == "not open") { abort = true; } + else if (rfastqfile == "not found") { rfastqfile = ""; } + + ffastafile = validParameter.validFile(parameters, "ffasta", true); + if (ffastafile == "not open") { abort = true; } + else if (ffastafile == "not found") { ffastafile = ""; } + + rfastafile = validParameter.validFile(parameters, "rfasta", true); + if (rfastafile == "not open") { abort = true; } + else if (rfastafile == "not found") { rfastafile = ""; } + + fqualfile = validParameter.validFile(parameters, "fqfile", true); + if (fqualfile == "not open") { abort = true; } + else if (fqualfile == "not found") { fqualfile = ""; } + + rqualfile = validParameter.validFile(parameters, "rqfile", true); + if (rqualfile == "not open") { abort = true; } + else if (rqualfile == "not found") { rqualfile = ""; } + + file = validParameter.validFile(parameters, "file", true); + if (file == "not open") { abort = true; } + else if (file == "not found") { file = ""; } + + if ((file == "") && (ffastafile == "") && (ffastqfile == "")) { abort = true; m->mothurOut("[ERROR]: ffastq and rfastq parameters are required.\n"); } + if ((ffastqfile != "") && (rfastqfile == "")) { abort = true; } + if ((ffastqfile == "") && (rfastqfile != "")) { abort = true; } oligosfile = validParameter.validFile(parameters, "oligos", true); if (oligosfile == "not found") { oligosfile = ""; } @@ -220,13 +288,15 @@ MakeContigsCommand::MakeContigsCommand(string option) { temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; } m->mothurConvert(temp, pdiffs); - // temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; } + // temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; } // m->mothurConvert(temp, ldiffs); + ldiffs = 0; // temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; } -// m->mothurConvert(temp, sdiffs); + // m->mothurConvert(temp, sdiffs); + sdiffs = 0; - temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); } + temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs; temp = toString(tempTotal); } m->mothurConvert(temp, tdiffs); if(tdiffs == 0){ tdiffs = bdiffs + pdiffs; } //+ ldiffs + sdiffs; @@ -252,33 +322,39 @@ int MakeContigsCommand::execute(){ //read ffastq and rfastq files creating fasta and qual files. //this function will create a forward and reverse, fasta and qual files for each processor. //files has an entry for each processor. files[i][0] = forwardFasta, files[i][1] = forwardQual, files[i][2] = reverseFasta, files[i][3] = reverseQual - int numReads = 0; + unsigned long int numReads = 0; int start = time(NULL); longestBase = 1000; m->mothurOut("Reading fastq data...\n"); vector< vector > files = readFastqFiles(numReads); m->mothurOut("Done.\n"); - + if (m->control_pressed) { return 0; } vector > fastaFileNames; vector > qualFileNames; createGroup = false; string outputGroupFileName; + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(ffastqfile)); + variables["[tag]"] = ""; if(oligosfile != ""){ createGroup = getOligos(fastaFileNames, qualFileNames); if (createGroup) { - outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + getOutputFileNameTag("group"); + outputGroupFileName = getOutputFileName("group",variables); outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); } } - string outFastaFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "trim." + getOutputFileNameTag("fasta"); - string outQualFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "trim." + getOutputFileNameTag("qfile"); - string outScrapFastaFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "scrap." + getOutputFileNameTag("fasta"); - string outScrapQualFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "scrap." + getOutputFileNameTag("qfile"); + variables["[tag]"] = "trim"; + string outFastaFile = getOutputFileName("fasta",variables); + string outQualFile = getOutputFileName("qfile",variables); + variables["[tag]"] = "scrap"; + string outScrapFastaFile = getOutputFileName("fasta",variables); + string outScrapQualFile = getOutputFileName("qfile",variables); - string outMisMatchFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + getOutputFileNameTag("mismatch"); + variables["[tag]"] = ""; + string outMisMatchFile = getOutputFileName("mismatch",variables); outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile); outputNames.push_back(outQualFile); outputTypes["qfile"].push_back(outQualFile); outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile); @@ -330,7 +406,7 @@ int MakeContigsCommand::execute(){ ofstream out; string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)); - thisGroupName += getOutputFileNameTag("group"); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); + thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); m->openOutputFile(thisGroupName, out); while (!in.eof()){ @@ -881,7 +957,7 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string } } //********************************************************************************************************************** -vector< vector > MakeContigsCommand::readFastqFiles(int& count){ +vector< vector > MakeContigsCommand::readFastqFiles(unsigned long int& count){ try { vector< vector > files; @@ -944,11 +1020,13 @@ vector< vector > MakeContigsCommand::readFastqFiles(int& count){ else { ignorer = true; } vector reads = getReads(ignoref, ignorer, thisFread, thisRread, uniques); - + for (int i = 0; i < reads.size(); i++) { fastqRead fread = reads[i].forward; fastqRead rread = reads[i].reverse; + if (m->debug) { m->mothurOut(toString(count) + '\t' + fread.name + '\t' + rread.name + '\n'); } + if (checkReads(fread, rread)) { if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; } @@ -1012,25 +1090,42 @@ vector MakeContigsCommand::getReads(bool ignoref, bool ignorer, f pairFastqRead temp(forward, reverse); reads.push_back(temp); }else { - //look for forward pair - itUniques = uniques.find(forward.name); - if (itUniques != uniques.end()) { //we have the pair for this read - pairFastqRead temp(forward, itUniques->second); - reads.push_back(temp); - uniques.erase(itUniques); - }else { //save this read for later - uniques[forward.name] = forward; + bool match = false; + //if no match are the names only different by 1 and 2? + string tempFRead = forward.name.substr(0, forward.name.length()-1); + string tempRRead = reverse.name.substr(0, reverse.name.length()-1); + if (tempFRead == tempRRead) { + if ((forward.name[forward.name.length()-1] == '1') && (reverse.name[reverse.name.length()-1] == '2')) { + forward.name = tempFRead; + reverse.name = tempRRead; + pairFastqRead temp(forward, reverse); + reads.push_back(temp); + match = true; + } } - //look for reverse pair - itUniques = uniques.find(reverse.name); - if (itUniques != uniques.end()) { //we have the pair for this read - pairFastqRead temp(itUniques->second, reverse); - reads.push_back(temp); - uniques.erase(itUniques); - }else { //save this read for later - uniques[reverse.name] = reverse; + if (!match) { + //look for forward pair + itUniques = uniques.find(forward.name); + if (itUniques != uniques.end()) { //we have the pair for this read + pairFastqRead temp(forward, itUniques->second); + reads.push_back(temp); + uniques.erase(itUniques); + }else { //save this read for later + uniques[forward.name] = forward; + } + + //look for reverse pair + itUniques = uniques.find(reverse.name); + if (itUniques != uniques.end()) { //we have the pair for this read + pairFastqRead temp(itUniques->second, reverse); + reads.push_back(temp); + uniques.erase(itUniques); + }else { //save this read for later + uniques[reverse.name] = reverse; + } } + } }else if (!ignoref && ignorer) { //ignore reverse keep forward //look for forward pair