X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=makecontigscommand.cpp;h=8796ab2113f455b12c8eace90df64d4bbe2c318e;hp=c4866370bce4be7db8e5e67d7dec799b895080f2;hb=cf9987b67aa49777a4c91c2d21f96e58bf17aa82;hpb=c0793343098c00d922c0cdfe36280b5be802163c diff --git a/makecontigscommand.cpp b/makecontigscommand.cpp index c486637..8796ab2 100644 --- a/makecontigscommand.cpp +++ b/makecontigscommand.cpp @@ -19,10 +19,10 @@ vector MakeContigsCommand::setParameters(){ CommandParameter prqual("rqfile", "InputTypes", "", "", "none", "none", "qfileGroup","",false,false,true); parameters.push_back(prqual); CommandParameter pfile("file", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "none","fasta-qfile",false,false,true); parameters.push_back(pfile); CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos); + CommandParameter pfindex("findex", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(pfindex); + CommandParameter prindex("rindex", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(prindex); CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs); CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs); -// CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs); -// CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs); CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs); CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "","",false,false); parameters.push_back(palign); @@ -32,7 +32,7 @@ vector MakeContigsCommand::setParameters(){ CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch); CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen); CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend); - CommandParameter pthreshold("insert", "Number", "", "25", "", "", "","",false,false); parameters.push_back(pthreshold); + CommandParameter pthreshold("insert", "Number", "", "20", "", "", "","",false,false); parameters.push_back(pthreshold); CommandParameter pdeltaq("deltaq", "Number", "", "6", "", "", "","",false,false); parameters.push_back(pdeltaq); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "illumina1.8+", "", "", "","",false,false,true); parameters.push_back(pformat); @@ -52,15 +52,17 @@ vector MakeContigsCommand::setParameters(){ string MakeContigsCommand::getHelpString(){ try { string helpString = ""; - helpString += "The make.contigs command reads a file, forward fastq file and a reverse fastq file or forward fasta and reverse fasta files and outputs new fasta. It will also provide new quality files if the fastq or file parameter is used.\n"; + helpString += "The make.contigs command reads a file, forward fastq file and a reverse fastq file or forward fasta and reverse fasta files and outputs new fasta. \n"; helpString += "If an oligos file is provided barcodes and primers will be trimmed, and a group file will be created.\n"; - helpString += "The make.contigs command parameters are file, ffastq, rfastq, ffasta, rfasta, fqfile, rqfile, oligos, format, tdiffs, bdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, insert, deltaq, allfiles and processors.\n"; + helpString += "If a forward index or reverse index file is provided barcodes be trimmed, and a group file will be created. The oligos parameter is required if an index file is given.\n"; + helpString += "The make.contigs command parameters are file, ffastq, rfastq, ffasta, rfasta, fqfile, rqfile, oligos, findex, rindex, format, tdiffs, bdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, insert, deltaq, allfiles and processors.\n"; helpString += "The ffastq and rfastq, file, or ffasta and rfasta parameters are required.\n"; - helpString += "The file parameter is 2 column file containing the forward fastq files in the first column and their matching reverse fastq files in the second column. Mothur will process each pair and create a combined fasta and report file with all the sequences.\n"; + helpString += "The file parameter is 2, 3 or 4 column file containing the forward fastq files in the first column and their matching reverse fastq files in the second column, or a groupName then forward fastq file and reverse fastq file, or forward fastq file then reverse fastq then forward index and reverse index file. If you only have one index file add 'none' for the other one. Mothur will process each pair and create a combined fasta and report file with all the sequences.\n"; helpString += "The ffastq and rfastq parameters are used to provide a forward fastq and reverse fastq file to process. If you provide one, you must provide the other.\n"; helpString += "The ffasta and rfasta parameters are used to provide a forward fasta and reverse fasta file to process. If you provide one, you must provide the other.\n"; helpString += "The fqfile and rqfile parameters are used to provide a forward quality and reverse quality files to process with the ffasta and rfasta parameters. If you provide one, you must provide the other.\n"; helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=illumina1.8+.\n"; + helpString += "The findex and rindex parameters are used to provide a forward index and reverse index files to process. \n"; helpString += "The align parameter allows you to specify the alignment method to use. Your options are: gotoh and needleman. The default is needleman.\n"; helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n"; helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n"; @@ -72,9 +74,10 @@ string MakeContigsCommand::getHelpString(){ helpString += "The deltaq parameter allows you to specify the delta allowed between quality scores of a mismatched base. For example in the overlap, if deltaq=5 and in the alignment seqA, pos 200 has a quality score of 30 and the same position in seqB has a quality score of 20, you take the base from seqA (30-20 >= 5). If the quality score in seqB is 28 then the base in the consensus will be an N (30-28<5) The default is 6.\n"; helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n"; helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -1.0.\n"; - helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score below the threshold we eliminate it. Default=25.\n"; + helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score equal to or below the threshold we eliminate it. Default=20.\n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"; helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n"; + helpString += "The trimoverlap parameter allows you to trim the sequences to only the overlapping section. The default is F.\n"; helpString += "The make.contigs command should be in the following format: \n"; helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n"; @@ -112,7 +115,7 @@ MakeContigsCommand::MakeContigsCommand(){ outputTypes["fasta"] = tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["report"] = tempOutNames; - } + } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand"); exit(1); @@ -121,7 +124,8 @@ MakeContigsCommand::MakeContigsCommand(){ //********************************************************************************************************************** MakeContigsCommand::MakeContigsCommand(string option) { try { - abort = false; calledHelp = false; + abort = false; calledHelp = false; + createFileGroup = false; createOligosGroup = false; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } @@ -216,6 +220,22 @@ MakeContigsCommand::MakeContigsCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["oligos"] = inputDir + it->second; } } + + it = parameters.find("findex"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["findex"] = inputDir + it->second; } + } + + it = parameters.find("rindex"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["rindex"] = inputDir + it->second; } + } } ffastqfile = validParameter.validFile(parameters, "ffastq", true); @@ -264,6 +284,29 @@ MakeContigsCommand::MakeContigsCommand(string option) { else if(oligosfile == "not open") { abort = true; } else { m->setOligosFile(oligosfile); } + findexfile = validParameter.validFile(parameters, "findex", true); + if (findexfile == "not found") { findexfile = ""; } + else if(findexfile == "not open") { abort = true; } + + rindexfile = validParameter.validFile(parameters, "rindex", true); + if (rindexfile == "not found") { rindexfile = ""; } + else if(rindexfile == "not open") { abort = true; } + + if ((rindexfile != "") || (findexfile != "")) { + if (oligosfile == ""){ + oligosfile = m->getOligosFile(); + if (oligosfile != "") { m->mothurOut("Using " + oligosfile + " as input file for the oligos parameter.\n"); } + else { + m->mothurOut("You need to provide an oligos file if you are going to use an index file.\n"); abort = true; + } + } + + //can only use an index file with the fastq parameters not fasta and qual + if ((ffastafile != "") || (rfastafile != "")) { + m->mothurOut("[ERROR]: You can only use an index file with the fastq parameters or the file option.\n"); abort = true; + } + } + //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; @@ -288,7 +331,7 @@ MakeContigsCommand::MakeContigsCommand(string option) { m->mothurConvert(temp, gapExtend); if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; } - temp = validParameter.validFile(parameters, "insert", false); if (temp == "not found"){ temp = "25"; } + temp = validParameter.validFile(parameters, "insert", false); if (temp == "not found"){ temp = "20"; } m->mothurConvert(temp, insert); if ((insert < 0) || (insert > 40)) { m->mothurOut("[ERROR]: insert must be between 0 and 40.\n"); abort=true; } @@ -321,6 +364,7 @@ MakeContigsCommand::MakeContigsCommand(string option) { temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; } allFiles = m->isTrue(temp); + temp = validParameter.validFile(parameters, "trimoverlap", false); if (temp == "not found") { temp = "F"; } trimOverlap = m->isTrue(temp); @@ -387,39 +431,39 @@ int MakeContigsCommand::execute(){ outputNames.push_back(compositeScrapFastaFile); outputTypes["fasta"].push_back(compositeScrapFastaFile); } + map totalGroupCounts; + for (int l = 0; l < filesToProcess.size(); l++) { m->mothurOut("\n>>>>>\tProcessing " + filesToProcess[l][0][0] + " (file " + toString(l+1) + " of " + toString(filesToProcess.size()) + ")\t<<<<<\n"); + groupCounts.clear(); + groupMap.clear(); vector > fastaFileNames; - createGroup = false; + createOligosGroup = false; string outputGroupFileName; map variables; string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir = m->hasPath(filesToProcess[l][0][0]); } variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(filesToProcess[l][0][0])); variables["[tag]"] = ""; - if(oligosfile != ""){ - createGroup = getOligos(fastaFileNames, variables["[filename]"]); - if (createGroup) { - outputGroupFileName = getOutputFileName("group",variables); - outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); - } + if(oligosfile != ""){ createOligosGroup = getOligos(fastaFileNames, variables["[filename]"]); } + if (createOligosGroup || createFileGroup) { + outputGroupFileName = getOutputFileName("group",variables); } + //give group in file file precedence + if (createFileGroup) { createOligosGroup = false; } + variables["[tag]"] = "trim"; string outFastaFile = getOutputFileName("fasta",variables); variables["[tag]"] = "scrap"; string outScrapFastaFile = getOutputFileName("fasta",variables); variables["[tag]"] = ""; string outMisMatchFile = getOutputFileName("report",variables); - outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile); - outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile); - outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile); - + m->mothurOut("Making contigs...\n"); - createProcesses(filesToProcess[l], outFastaFile, outScrapFastaFile, outMisMatchFile, fastaFileNames); - m->mothurOut("Done.\n"); + createProcesses(filesToProcess[l], outFastaFile, outScrapFastaFile, outMisMatchFile, fastaFileNames, l); //remove temp fasta and qual files for (int i = 0; i < processors; i++) { for(int j = 0; j < filesToProcess[l][i].size(); j++) { m->mothurRemove(filesToProcess[l][i][j]); } } @@ -459,21 +503,21 @@ int MakeContigsCommand::execute(){ ofstream out; string thisGroupName = thisOutputDir + m->getRootName(m->getSimpleName(it->first)); - thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); + thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); m->openOutputFile(thisGroupName, out); while (!in.eof()){ if (m->control_pressed) { break; } Sequence currSeq(in); m->gobble(in); - out << currSeq.getName() << '\t' << it->second << endl; + out << currSeq.getName() << '\t' << it->second << endl; } - in.close(); out.close(); + in.close(); } } - if (createGroup) { + if (createFileGroup || createOligosGroup) { ofstream outGroup; m->openOutputFile(outputGroupFileName, outGroup); for (map::iterator itGroup = groupMap.begin(); itGroup != groupMap.end(); itGroup++) { @@ -483,19 +527,47 @@ int MakeContigsCommand::execute(){ } if (filesToProcess.size() > 1) { //merge into large combo files - if (createGroup) { - if (l == 0) { + if (createFileGroup || createOligosGroup) { + if (l == 0) { ofstream outCGroup; m->openOutputFile(compositeGroupFile, outCGroup); outCGroup.close(); outputNames.push_back(compositeGroupFile); outputTypes["group"].push_back(compositeGroupFile); } - m->appendFiles(outputGroupFileName, compositeGroupFile); + m->appendFiles(outputGroupFileName, compositeGroupFile); + if (!allFiles) { m->mothurRemove(outputGroupFileName); } + else { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); } + + for (map::iterator itGroups = groupCounts.begin(); itGroups != groupCounts.end(); itGroups++) { + map::iterator itTemp = totalGroupCounts.find(itGroups->first); + if (itTemp == totalGroupCounts.end()) { totalGroupCounts[itGroups->first] = itGroups->second; } //new group create it in totalGroups + else { itTemp->second += itGroups->second; } //existing group, update total + } } - m->appendFiles(outMisMatchFile, compositeMisMatchFile); + if (l == 0) { m->appendFiles(outMisMatchFile, compositeMisMatchFile); } + else { m->appendFilesWithoutHeaders(outMisMatchFile, compositeMisMatchFile); } m->appendFiles(outFastaFile, compositeFastaFile); m->appendFiles(outScrapFastaFile, compositeScrapFastaFile); + if (!allFiles) { + m->mothurRemove(outMisMatchFile); + m->mothurRemove(outFastaFile); + m->mothurRemove(outScrapFastaFile); + }else { + outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile); + outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile); + outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile); + } + }else { + totalGroupCounts = groupCounts; + outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile); + outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile); + outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile); + if (createFileGroup || createOligosGroup) { + outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); + } } + m->mothurOut("Done.\n"); } + m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n"); if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } @@ -503,8 +575,8 @@ int MakeContigsCommand::execute(){ //output group counts m->mothurOutEndLine(); int total = 0; - if (groupCounts.size() != 0) { m->mothurOut("Group count: \n"); } - for (map::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) { + if (totalGroupCounts.size() != 0) { m->mothurOut("Group count: \n"); } + for (map::iterator it = totalGroupCounts.begin(); it != totalGroupCounts.end(); it++) { total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); } if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); } @@ -542,7 +614,7 @@ vector< vector< vector > > MakeContigsCommand::preProcessData(unsigned l vector< vector< vector > > filesToProcess; if (ffastqfile != "") { //reading one file - vector< vector > files = readFastqFiles(numReads, ffastqfile, rfastqfile); + vector< vector > files = readFastqFiles(numReads, ffastqfile, rfastqfile, findexfile, rindexfile); //adjust for really large processors or really small files if (numReads == 0) { m->mothurOut("[ERROR]: no good reads.\n"); m->control_pressed = true; } if (numReads < processors) { @@ -563,12 +635,20 @@ vector< vector< vector > > MakeContigsCommand::preProcessData(unsigned l if (m->control_pressed) { for (int l = 0; l < filesToProcess.size(); l++) { for (int k = 0; k < filesToProcess[l].size(); k++) { for(int j = 0; j < filesToProcess[l][k].size(); j++) { m->mothurRemove(filesToProcess[l][k][j]); } filesToProcess[l][k].clear(); } return filesToProcess; } } unsigned long int thisFilesReads; - vector< vector > files = readFastqFiles(thisFilesReads, filePairsToProcess[i][0], filePairsToProcess[i][1]); + vector< vector > files = readFastqFiles(thisFilesReads, filePairsToProcess[i][0], filePairsToProcess[i][1], filePairsToProcess[i][2], filePairsToProcess[i][3]); //adjust for really large processors or really small files if (thisFilesReads < processors) { m->mothurOut("[ERROR]: " + filePairsToProcess[i][0] + " has less than " + toString(processors) + " good reads, skipping\n"); for (int k = 0; k < files.size(); k++) { for(int j = 0; j < files[k].size(); j++) { m->mothurRemove(files[k][j]); } files[k].clear(); } + //remove from file2Group if necassary + map cFile2Group; + for (map::iterator it = file2Group.begin(); it != file2Group.end(); it++) { + if ((it->first) < i) { cFile2Group[it->first] = it->second; } + else if ((it->first) == i) { } //do nothing, we removed files for i + else { cFile2Group[(it->first-1)] = it->second; } //adjust files because i was removed + } + file2Group = cFile2Group; }else { filesToProcess.push_back(files); numReads += thisFilesReads; @@ -597,10 +677,14 @@ vector< vector< vector > > MakeContigsCommand::preProcessData(unsigned l } } //********************************************************************************************************************** -int MakeContigsCommand::createProcesses(vector< vector > files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector > fastaFileNames) { +int MakeContigsCommand::createProcesses(vector< vector > files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector > fastaFileNames, int index) { try { int num = 0; vector processIDS; + string group = ""; + map::iterator it = file2Group.find(index); + if (it != file2Group.end()) { group = it->second; } + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) int process = 0; @@ -626,19 +710,19 @@ int MakeContigsCommand::createProcesses(vector< vector > files, string o } } } - + num = driver(files[process], outputFasta + toString(getpid()) + ".temp", outputScrapFasta + toString(getpid()) + ".temp", outputMisMatches + toString(getpid()) + ".temp", - tempFASTAFileNames, process); + tempFASTAFileNames, process, group); //pass groupCounts to parent ofstream out; string tempFile = toString(getpid()) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << endl; - if(createGroup){ + if (createFileGroup || createOligosGroup) { out << groupCounts.size() << endl; for (map::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) { @@ -665,7 +749,7 @@ int MakeContigsCommand::createProcesses(vector< vector > files, string o m->openOutputFile(outputScrapFasta, temp); temp.close(); //do my part - num = driver(files[processors-1], outputFasta, outputScrapFasta, outputMisMatches, fastaFileNames, processors-1); + num = driver(files[processors-1], outputFasta, outputScrapFasta, outputMisMatches, fastaFileNames, processors-1, group); //force parent to wait until all the processes are done for (int i=0;i > files, string o int tempNum; in >> tempNum; num += tempNum; m->gobble(in); - if(createGroup){ + if (createFileGroup || createOligosGroup) { string group; in >> tempNum; m->gobble(in); @@ -737,9 +821,8 @@ int MakeContigsCommand::createProcesses(vector< vector > files, string o } } } - - - contigsData* tempcontig = new contigsData(files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createGroup, allFiles, trimOverlap, h); + + contigsData* tempcontig = new contigsData(group, files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createOligosGroup, createFileGroup, allFiles, trimOverlap, h); pDataArray.push_back(tempcontig); hThreadArray[h] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]); @@ -768,7 +851,7 @@ int MakeContigsCommand::createProcesses(vector< vector > files, string o //do my part processIDS.push_back(processors-1); - num = driver(files[processors-1], (outputFasta+ toString(processors-1) + ".temp"), (outputScrapFasta+ toString(processors-1) + ".temp"), (outputMisMatches+ toString(processors-1) + ".temp"), tempFASTAFileNames, processors-1); + num = driver(files[processors-1], (outputFasta+ toString(processors-1) + ".temp"), (outputScrapFasta+ toString(processors-1) + ".temp"), (outputMisMatches+ toString(processors-1) + ".temp"), tempFASTAFileNames, processors-1, group); //Wait until all threads have terminated. WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); @@ -802,7 +885,7 @@ int MakeContigsCommand::createProcesses(vector< vector > files, string o m->appendFiles((outputScrapFasta + toString(processIDS[i]) + ".temp"), outputScrapFasta); m->mothurRemove((outputScrapFasta + toString(processIDS[i]) + ".temp")); - m->appendFiles((outputMisMatches + toString(processIDS[i]) + ".temp"), outputMisMatches); + m->appendFilesWithoutHeaders((outputMisMatches + toString(processIDS[i]) + ".temp"), outputMisMatches); m->mothurRemove((outputMisMatches + toString(processIDS[i]) + ".temp")); if(allFiles){ @@ -825,7 +908,7 @@ int MakeContigsCommand::createProcesses(vector< vector > files, string o } } //********************************************************************************************************************** -int MakeContigsCommand::driver(vector files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector > fastaFileNames, int process){ +int MakeContigsCommand::driver(vector files, string outputFasta, string outputScrapFasta, string outputMisMatches, vector > fastaFileNames, int process, string group){ try { Alignment* alignment; @@ -837,10 +920,12 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string string thisfqualfile = files[1]; string thisrfastafile = files[2]; string thisrqualfile = files[3]; + string thisfindexfile = files[4]; + string thisrindexfile = files[5]; - if (m->debug) { m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n"); } + if (m->debug) { m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n[DEBUG]: findex = " + thisfindexfile + ".\n[DEBUG]: rindex = " + thisrindexfile + ".\n"); } - ifstream inFFasta, inRFasta, inFQual, inRQual; + ifstream inFFasta, inRFasta, inFQual, inRQual, inFIndex, inRIndex; ofstream outFasta, outMisMatch, outScrapFasta; m->openInputFile(thisffastafile, inFFasta); m->openInputFile(thisrfastafile, inRFasta); @@ -848,10 +933,14 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string m->openInputFile(thisfqualfile, inFQual); m->openInputFile(thisrqualfile, inRQual); } + + if (thisfindexfile != "") { m->openInputFile(thisfindexfile, inFIndex); } + if (thisrindexfile != "") { m->openInputFile(thisrindexfile, inRIndex); } + m->openOutputFile(outputFasta, outFasta); m->openOutputFile(outputScrapFasta, outScrapFasta); m->openOutputFile(outputMisMatches, outMisMatch); - if (process == 0) { outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n"; } + outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n"; TrimOligos trimOligos(pdiffs, bdiffs, 0, 0, primers, barcodes); @@ -871,13 +960,27 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string fQual = new QualityScores(inFQual); m->gobble(inFQual); rQual = new QualityScores(inRQual); m->gobble(inRQual); } + Sequence findexBarcode("findex", "NONE"); Sequence rindexBarcode("rindex", "NONE"); + if (thisfindexfile != "") { + Sequence temp(inFIndex); m->gobble(inFIndex); + findexBarcode.setAligned(temp.getAligned()); + } + + if (thisrindexfile != "") { + Sequence temp(inRIndex); m->gobble(inRIndex); + rindexBarcode.setAligned(temp.getAligned()); + } int barcodeIndex = 0; int primerIndex = 0; - + if(barcodes.size() != 0){ if (thisfqualfile != "") { - success = trimOligos.stripBarcode(fSeq, rSeq, *fQual, *rQual, barcodeIndex); + if ((thisfindexfile != "") || (thisrindexfile != "")) { + success = trimOligos.stripBarcode(findexBarcode, rindexBarcode, *fQual, *rQual, barcodeIndex); + }else { + success = trimOligos.stripBarcode(fSeq, rSeq, *fQual, *rQual, barcodeIndex); + } }else { success = trimOligos.stripBarcode(fSeq, rSeq, barcodeIndex); } @@ -939,17 +1042,19 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string if (seq2End < overlapEnd) { overlapEnd = seq2End; } //smallest end position is where overlapping ends int oStart = contig.length(); + //cout << fSeq.getAligned() << endl; cout << rSeq.getAligned() << endl; for (int i = overlapStart; i < overlapEnd; i++) { + //cout << seq1[i] << ' ' << seq2[i] << ' ' << scores1[ABaseMap[i]] << ' ' << scores2[BBaseMap[i]] << endl; if (seq1[i] == seq2[i]) { //match, add base and choose highest score contig += seq1[i]; }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below insert. In that case eliminate base if (thisfqualfile != "") { - if (scores2[BBaseMap[i]] < insert) { } // + if (scores2[BBaseMap[i]] <= insert) { } // else { contig += seq2[i]; } }else { contig += seq2[i]; } //with no quality info, then we keep it? }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below insert. In that case eliminate base if (thisfqualfile != "") { - if (scores1[ABaseMap[i]] < insert) { } // + if (scores1[ABaseMap[i]] <= insert) { } // else { contig += seq1[i]; } }else { contig += seq1[i]; } //with no quality info, then we keep it? }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality @@ -973,7 +1078,8 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string }else { //seq2 ends before seq1 so take from overlap to length from seq1 for (int i = overlapEnd; i < length; i++) { contig += seq1[i]; } } - + //cout << contig << endl; + //exit(1); if (trimOverlap) { contig = contig.substr(overlapStart-1, oend-oStart); if (contig.length() == 0) { trashCode += "l"; } } if(trashCode.length() == 0){ @@ -981,7 +1087,7 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string if (m->debug) { m->mothurOut(fSeq.getName()); } - if (createGroup) { + if (createOligosGroup) { if(barcodes.size() != 0){ string thisGroup = barcodeNameVector[barcodeIndex]; if (primers.size() != 0) { @@ -1006,6 +1112,15 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string }else { ignore = true; } } + }else if (createFileGroup) { + int pos = group.find("ignore"); + if (pos == string::npos) { + groupMap[fSeq.getName()] = group; + + map::iterator it = groupCounts.find(group); + if (it == groupCounts.end()) { groupCounts[group] = 1; } + else { groupCounts[it->first] ++; } + }else { ignore = true; } } if (m->debug) { m->mothurOut("\n"); } @@ -1055,11 +1170,11 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string } } //********************************************************************************************************************** -vector< vector > MakeContigsCommand::readFastqFiles(unsigned long int& count, string ffastq, string rfastq){ +vector< vector > MakeContigsCommand::readFastqFiles(unsigned long int& count, string ffastq, string rfastq, string findex, string rindex){ try { vector< vector > files; //maps processors number to file pointer - map > tempfiles; //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual + map > tempfiles; //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual, tempfiles[4] = forwardIndex, [4] = forwardReverse map >::iterator it; //create files to write to @@ -1069,6 +1184,8 @@ vector< vector > MakeContigsCommand::readFastqFiles(unsigned long int& c ofstream* outFQ = new ofstream; temp.push_back(outFQ); ofstream* outRF = new ofstream; temp.push_back(outRF); ofstream* outRQ = new ofstream; temp.push_back(outRQ); + ofstream* outFI = new ofstream; temp.push_back(outFI); + ofstream* outRI = new ofstream; temp.push_back(outRI); tempfiles[i] = temp; vector names; @@ -1078,8 +1195,13 @@ vector< vector > MakeContigsCommand::readFastqFiles(unsigned long int& c string rfastafilename = thisOutputDir + m->getRootName(m->getSimpleName(rfastq)) + toString(i) + "rfastatemp"; string fqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(ffastq)) + toString(i) + "fqualtemp"; string rqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(rfastq)) + toString(i) + "rqualtemp"; + string findexfilename = ""; string rindexfilename = ""; + noneOk = false; //flag to oligos file read that its okay to allow for non paired barcodes + if (findex != "") { findexfilename = thisOutputDir + m->getRootName(m->getSimpleName(findex)) + toString(i) + "findextemp"; m->openOutputFile(findexfilename, *outFI); noneOk = true; } + if (rindex != "") { rindexfilename = thisOutputDir + m->getRootName(m->getSimpleName(rindex)) + toString(i) + "rindextemp"; m->openOutputFile(rindexfilename, *outRI); noneOk = true; } names.push_back(ffastafilename); names.push_back(fqualfilename); names.push_back(rfastafilename); names.push_back(rqualfilename); + names.push_back(findexfilename); names.push_back(rindexfilename); files.push_back(names); m->openOutputFile(ffastafilename, *outFF); @@ -1093,7 +1215,7 @@ vector< vector > MakeContigsCommand::readFastqFiles(unsigned long int& c for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } } //remove files for (int i = 0; i < files.size(); i++) { - for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } + for(int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") { m->mothurRemove(files[i][j]); } } } } @@ -1103,31 +1225,52 @@ vector< vector > MakeContigsCommand::readFastqFiles(unsigned long int& c ifstream inReverse; m->openInputFile(rfastq, inReverse); + ifstream infIndex, inrIndex; + bool findexIsGood = false; + bool rindexIsGood = false; + if (findex != "") { m->openInputFile(findex, infIndex); findexIsGood = true; } + if (rindex != "") { m->openInputFile(rindex, inrIndex); rindexIsGood = true; } + count = 0; map uniques; + map iUniques; + map pairUniques; map::iterator itUniques; - while ((!inForward.eof()) || (!inReverse.eof())) { + while ((!inForward.eof()) || (!inReverse.eof()) || (findexIsGood) || (rindexIsGood)) { - if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; } + if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) { for(int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") { m->mothurRemove(files[i][j]); } } } inForward.close(); inReverse.close(); if (findex != "") { infIndex.close(); } if (findex != "") { inrIndex.close(); } return files; } //get a read from forward and reverse fastq files - bool ignoref, ignorer; - fastqRead thisFread, thisRread; + bool ignoref, ignorer, ignorefi, ignoreri; + fastqRead thisFread, thisRread, thisFIread, thisRIread; if (!inForward.eof()) { thisFread = readFastq(inForward, ignoref); } else { ignoref = true; } if (!inReverse.eof()) { thisRread = readFastq(inReverse, ignorer); } else { ignorer = true; } + if (findexIsGood) { thisFIread = readFastq(infIndex, ignorefi); if (infIndex.eof()) { findexIsGood = false; } } + else { ignorefi = true; } + if (rindexIsGood) { thisRIread = readFastq(inrIndex, ignoreri); if (inrIndex.eof()) { rindexIsGood = false; } } + else { ignoreri = true; } + + bool allowOne = false; + if ((findex == "") || (rindex == "")) { allowOne = true; } + vector frReads = getReads(ignoref, ignorer, thisFread, thisRread, uniques, false); + vector friReads = getReads(ignorefi, ignoreri, thisFIread, thisRIread, iUniques, allowOne); + + //add in index info if provided + vector reads = frReads; + if ((findex != "") || (rindex != "")) { reads = mergeReads(frReads, friReads, pairUniques); } - vector reads = getReads(ignoref, ignorer, thisFread, thisRread, uniques); - for (int i = 0; i < reads.size(); i++) { fastqRead fread = reads[i].forward; fastqRead rread = reads[i].reverse; + fastqRead firead = reads[i].findex; + fastqRead riread = reads[i].rindex; - if (m->debug) { m->mothurOut(toString(count) + '\t' + fread.name + '\t' + rread.name + '\n'); } + if (m->debug) { m->mothurOut(toString(count) + '\t' + fread.name + '\t' + rread.name + '\n'); if (findex != "") { m->mothurOut(toString(count) + '\t' + firead.name + '\n'); } if (rindex != "") { m->mothurOut(toString(count) + '\t' + riread.name + '\n'); } } //if (checkReads(fread, rread, ffastq, rfastq)) { - if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; } + if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) { for(int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") { m->mothurRemove(files[i][j]); } } } inForward.close(); inReverse.close(); if (findex != "") { infIndex.close(); } if (findex != "") { inrIndex.close(); } return files; } //if the reads are okay write to output files int process = count % processors; @@ -1140,7 +1283,9 @@ vector< vector > MakeContigsCommand::readFastqFiles(unsigned long int& c *(tempfiles[process][3]) << ">" << rread.name << endl; for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; } *(tempfiles[process][3]) << endl; - + if (findex != "") { *(tempfiles[process][4]) << ">" << firead.name << endl << firead.sequence << endl; } + if (rindex != "") { *(tempfiles[process][5]) << ">" << riread.name << endl << riread.sequence << endl; } + count++; //report progress @@ -1153,8 +1298,13 @@ vector< vector > MakeContigsCommand::readFastqFiles(unsigned long int& c if (uniques.size() != 0) { for (itUniques = uniques.begin(); itUniques != uniques.end(); itUniques++) { + if (m->control_pressed) { break; } m->mothurOut("[WARNING]: did not find paired read for " + itUniques->first + ", ignoring.\n"); } + for (map:: iterator it = pairUniques.begin(); it != pairUniques.end(); it++) { + if (m->control_pressed) { break; } + m->mothurOut("[WARNING]: did not find paired read for " + (it->first).substr(1) + ", ignoring.\n"); + } m->mothurOutEndLine(); } @@ -1162,7 +1312,9 @@ vector< vector > MakeContigsCommand::readFastqFiles(unsigned long int& c for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } } inForward.close(); inReverse.close(); - + if (findex != "") { infIndex.close(); } + if (rindex != "") { inrIndex.close(); } + return files; } catch(exception& e) { @@ -1196,8 +1348,10 @@ vector< vector > MakeContigsCommand::readFastaFiles(unsigned long int& c if (fqualfile != "") { fqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(fqualfile)) + toString(i) + "fqual.temp"; m->openOutputFile(fqualfilename, *outFQ); } string rqualfilename = ""; if (rqualfile != "") { rqualfilename = thisOutputDir + m->getRootName(m->getSimpleName(rqualfile)) + toString(i) + "rqual.temp"; m->openOutputFile(rqualfilename, *outRQ); } + string findexfilename = ""; string rindexfilename = ""; names.push_back(ffastafilename); names.push_back(fqualfilename); names.push_back(rfastafilename); names.push_back(rqualfilename); + names.push_back(findexfilename); names.push_back(rindexfilename); files.push_back(names); m->openOutputFile(ffastafilename, *outFF); @@ -1261,7 +1415,7 @@ vector< vector > MakeContigsCommand::readFastaFiles(unsigned long int& c }else { ignorer = true; } } - vector reads = getReads(ignoref, ignorer, thisFread, thisRread, uniques); + vector reads = getReads(ignoref, ignorer, thisFread, thisRread, uniques, false); for (int i = 0; i < reads.size(); i++) { fastqRead fread = reads[i].forward; @@ -1316,7 +1470,7 @@ vector< vector > MakeContigsCommand::readFastaFiles(unsigned long int& c } } //********************************************************************************************************************** -vector MakeContigsCommand::getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map& uniques){ +vector MakeContigsCommand::getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map& uniques, bool allowOne){ try { vector reads; map::iterator itUniques; @@ -1364,25 +1518,36 @@ vector MakeContigsCommand::getReads(bool ignoref, bool ignorer, f } }else if (!ignoref && ignorer) { //ignore reverse keep forward - //look for forward pair - itUniques = uniques.find(forward.name); - if (itUniques != uniques.end()) { //we have the pair for this read - pairFastqRead temp(forward, itUniques->second); + if (allowOne) { + fastqRead dummy; + pairFastqRead temp(forward, dummy); reads.push_back(temp); - uniques.erase(itUniques); - }else { //save this read for later - uniques[forward.name] = forward; + }else { + //look for forward pair + itUniques = uniques.find(forward.name); + if (itUniques != uniques.end()) { //we have the pair for this read + pairFastqRead temp(forward, itUniques->second); + reads.push_back(temp); + uniques.erase(itUniques); + }else { //save this read for later + uniques[forward.name] = forward; + } } - }else if (ignoref && !ignorer) { //ignore forward keep reverse - //look for reverse pair - itUniques = uniques.find(reverse.name); - if (itUniques != uniques.end()) { //we have the pair for this read - pairFastqRead temp(itUniques->second, reverse); + if (allowOne) { + fastqRead dummy; + pairFastqRead temp(dummy, reverse); reads.push_back(temp); - uniques.erase(itUniques); - }else { //save this read for later - uniques[reverse.name] = reverse; + }else { + //look for reverse pair + itUniques = uniques.find(reverse.name); + if (itUniques != uniques.end()) { //we have the pair for this read + pairFastqRead temp(itUniques->second, reverse); + reads.push_back(temp); + uniques.erase(itUniques); + }else { //save this read for later + uniques[reverse.name] = reverse; + } } }//else ignore both and do nothing @@ -1394,6 +1559,73 @@ vector MakeContigsCommand::getReads(bool ignoref, bool ignorer, f } } //********************************************************************************************************************** +//look through the reads from the forward and reverse files and try to find matching reads from index files. +vector MakeContigsCommand::mergeReads(vector thisReads, vector indexes, map& uniques){ + try { + vector reads; + map::iterator itUniques; + + set foundIndexes; + for (int i = 0; i < thisReads.size(); i++) { + bool found = false; + for (int j = 0; j < indexes.size(); j++) { + + //incase only one index + string indexName = indexes[j].forward.name; + if (indexName == "") { indexName = indexes[j].reverse.name; } + + if (thisReads[i].forward.name == indexName){ + thisReads[i].findex = indexes[j].forward; + thisReads[i].rindex = indexes[j].reverse; + reads.push_back(thisReads[i]); + found = true; + foundIndexes.insert(j); + } + } + + if (!found) { + //look for forward pair + itUniques = uniques.find('i'+thisReads[i].forward.name); + if (itUniques != uniques.end()) { //we have the pair for this read + thisReads[i].findex = itUniques->second.forward; + thisReads[i].rindex = itUniques->second.reverse; + reads.push_back(thisReads[i]); + uniques.erase(itUniques); + }else { //save this read for later + uniques['r'+thisReads[i].forward.name] = thisReads[i]; + } + } + } + + if (foundIndexes.size() != indexes.size()) { //if we didnt match all the indexes look for them in uniques + for (int j = 0; j < indexes.size(); j++) { + if (foundIndexes.count(j) == 0) { //we didnt find this one + //incase only one index + string indexName = indexes[j].forward.name; + if (indexName == "") { indexName = indexes[j].reverse.name; } + + //look for forward pair + itUniques = uniques.find('r'+indexName); + if (itUniques != uniques.end()) { //we have the pair for this read + pairFastqRead temp(itUniques->second.forward, itUniques->second.reverse, indexes[j].forward, indexes[j].reverse); + reads.push_back(temp); + uniques.erase(itUniques); + }else { //save this read for later + uniques['i'+indexName] = indexes[j]; + } + } + } + } + + + return reads; + } + catch(exception& e) { + m->errorOut(e, "MakeContigsCommand", "mergeReads"); + exit(1); + } +} +//********************************************************************************************************************** fastqRead MakeContigsCommand::readFastq(ifstream& in, bool& ignore){ try { fastqRead read; @@ -1430,9 +1662,12 @@ fastqRead MakeContigsCommand::readFastq(ifstream& in, bool& ignore){ vector qualScores = convertQual(quality); + m->checkName(name); read.name = name; read.sequence = sequence; read.scores = qualScores; + + if (m->debug) { m->mothurOut("[DEBUG]: " + read.name + " " + read.sequence + " " + quality + "\n"); } return read; } @@ -1466,10 +1701,16 @@ bool MakeContigsCommand::checkReads(fastqRead& forward, fastqRead& reverse, stri } }*/ //*************************************************************************************************************** +//lines can be 2, 3, or 4 columns +// forward.fastq reverse.fastq -> 2 column +// groupName forward.fastq reverse.fastq -> 3 column +// forward.fastq reverse.fastq forward.index.fastq reverse.index.fastq -> 4 column +// forward.fastq reverse.fastq none reverse.index.fastq -> 4 column +// forward.fastq reverse.fastq forward.index.fastq none -> 4 column vector< vector > MakeContigsCommand::readFileNames(string filename){ try { vector< vector > files; - string forward, reverse; + string forward, reverse, findex, rindex; ifstream in; m->openInputFile(filename, in); @@ -1478,8 +1719,35 @@ vector< vector > MakeContigsCommand::readFileNames(string filename){ if (m->control_pressed) { return files; } - in >> forward; m->gobble(in); - in >> reverse; m->gobble(in); + string line = m->getline(in); m->gobble(in); + vector pieces = m->splitWhiteSpace(line); + + string group = ""; + if (pieces.size() == 2) { + forward = pieces[0]; + reverse = pieces[1]; + group = ""; + findex = ""; + rindex = ""; + }else if (pieces.size() == 3) { + group = pieces[0]; + forward = pieces[1]; + reverse = pieces[2]; + findex = ""; + rindex = ""; + createFileGroup = true; + }else if (pieces.size() == 4) { + forward = pieces[0]; + reverse = pieces[1]; + findex = pieces[2]; + rindex = pieces[3]; + if ((findex == "none") || (findex == "NONE")){ findex = ""; } + if ((rindex == "none") || (rindex == "NONE")){ rindex = ""; } + }else { + m->mothurOut("[ERROR]: file lines can be 2, 3, or 4 columns. The forward fastq files in the first column and their matching reverse fastq files in the second column, or a groupName then forward fastq file and reverse fastq file, or forward fastq file then reverse fastq then forward index and reverse index file. If you only have one index file add 'none' for the other one. \n"); m->control_pressed = true; + } + + if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", forward = " + forward + ", reverse = " + reverse + ", forwardIndex = " + findex + ", reverseIndex = " + rindex + ".\n"); } //check to make sure both are able to be opened ifstream in2; @@ -1544,20 +1812,92 @@ vector< vector > MakeContigsCommand::readFileNames(string filename){ m->mothurOut("[WARNING]: can't find " + reverse + ", ignoring pair.\n"); }else{ in3.close(); } - if ((openForward != 1) && (openReverse != 1)) { //good pair + int openFindex = 0; + if (findex != "") { + ifstream in4; + openFindex = m->openInputFile(findex, in4, "noerror"); in4.close(); + + //if you can't open it, try default location + if (openFindex == 1) { + if (m->getDefaultPath() != "") { //default path is set + string tryPath = m->getDefaultPath() + m->getSimpleName(findex); + m->mothurOut("Unable to open " + findex + ". Trying default " + tryPath); m->mothurOutEndLine(); + ifstream in5; + openFindex = m->openInputFile(tryPath, in5, "noerror"); + in5.close(); + findex = tryPath; + } + } + + //if you can't open it, try output location + if (openFindex == 1) { + if (m->getOutputDir() != "") { //default path is set + string tryPath = m->getOutputDir() + m->getSimpleName(findex); + m->mothurOut("Unable to open " + findex + ". Trying output directory " + tryPath); m->mothurOutEndLine(); + ifstream in6; + openFindex = m->openInputFile(tryPath, in6, "noerror"); + findex = tryPath; + in6.close(); + } + } + + if (openFindex == 1) { //can't find it + m->mothurOut("[WARNING]: can't find " + findex + ", ignoring pair.\n"); + } + } + + int openRindex = 0; + if (rindex != "") { + ifstream in7; + openRindex = m->openInputFile(rindex, in7, "noerror"); in7.close(); + + //if you can't open it, try default location + if (openRindex == 1) { + if (m->getDefaultPath() != "") { //default path is set + string tryPath = m->getDefaultPath() + m->getSimpleName(rindex); + m->mothurOut("Unable to open " + rindex + ". Trying default " + tryPath); m->mothurOutEndLine(); + ifstream in8; + openRindex = m->openInputFile(tryPath, in8, "noerror"); + in8.close(); + rindex = tryPath; + } + } + + //if you can't open it, try output location + if (openRindex == 1) { + if (m->getOutputDir() != "") { //default path is set + string tryPath = m->getOutputDir() + m->getSimpleName(rindex); + m->mothurOut("Unable to open " + rindex + ". Trying output directory " + tryPath); m->mothurOutEndLine(); + ifstream in9; + openRindex = m->openInputFile(tryPath, in9, "noerror"); + rindex = tryPath; + in9.close(); + } + } + + if (openRindex == 1) { //can't find it + m->mothurOut("[WARNING]: can't find " + rindex + ", ignoring pair.\n"); + } + } + + + if ((openForward != 1) && (openReverse != 1) && (openFindex != 1) && (openRindex != 1)) { //good pair + file2Group[files.size()] = group; vector pair; pair.push_back(forward); pair.push_back(reverse); + pair.push_back(findex); + pair.push_back(rindex); + if (((findex != "") || (rindex != "")) && (oligosfile == "")) { m->mothurOut("[ERROR]: You need to provide an oligos file if you are going to use an index file.\n"); m->control_pressed = true; } files.push_back(pair); } - } in.close(); return files; } catch(exception& e) { - m->errorOut(e, "MakeContigsCommand", "checkReads"); + m->errorOut(e, "MakeContigsCommand", "readFileNames"); exit(1); } } @@ -1604,7 +1944,7 @@ bool MakeContigsCommand::getOligos(vector >& fastaFileNames, stri if(foligo[i] == 'U') { foligo[i] = 'T'; } } - if(type == "FORWARD"){ + if(type == "PRIMER"){ m->gobble(in); in >> roligo; @@ -1615,25 +1955,28 @@ bool MakeContigsCommand::getOligos(vector >& fastaFileNames, stri } //roligo = reverseOligo(roligo); + if (m->debug) { m->mothurOut("[DEBUG]: reading - " + roligo + ".\n"); } + group = ""; // get rest of line in case there is a primer name while (!in.eof()) { char c = in.get(); - if (c == 10 || c == 13){ break; } + if (c == 10 || c == 13 || c == -1){ break; } else if (c == 32 || c == 9){;} //space or tab else { group += c; } } oligosPair newPrimer(foligo, roligo); - + + if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); } + //check for repeat barcodes string tempPair = foligo+roligo; if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); } else { uniquePrimers.insert(tempPair); } if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } } - primers[indexPrimer]=newPrimer; indexPrimer++; primerNameVector.push_back(group); }else if(type == "BARCODE"){ @@ -1649,10 +1992,12 @@ bool MakeContigsCommand::getOligos(vector >& fastaFileNames, stri oligosPair newPair(foligo, roligo); + if ((foligo == "NONE") || (roligo == "NONE")) { if (!noneOk) { m->mothurOut("[ERROR]: barcodes must be paired unless you are using an index file.\n"); m->control_pressed = true; } } + group = ""; while (!in.eof()) { char c = in.get(); - if (c == 10 || c == 13){ break; } + if (c == 10 || c == 13 || c == -1){ break; } else if (c == 32 || c == 9){;} //space or tab else { group += c; } }