X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=makecontigscommand.cpp;h=85b6a8fdcbacf1ca1ee5b471b2142480ecf5da48;hp=b1e78ae091a13a020360137303b3143a5d9536f6;hb=b0997605981902442138b9309e9c43d95c3ba10a;hpb=196c22d0f93ba48e8ec54ab76608b6e3ba5e68cc diff --git a/makecontigscommand.cpp b/makecontigscommand.cpp index b1e78ae..85b6a8f 100644 --- a/makecontigscommand.cpp +++ b/makecontigscommand.cpp @@ -73,6 +73,7 @@ string MakeContigsCommand::getHelpString(){ helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score equal to or below the threshold we eliminate it. Default=20.\n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"; helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n"; + helpString += "The trimoverlap parameter allows you to trim the sequences to only the overlapping section. The default is F.\n"; helpString += "The make.contigs command should be in the following format: \n"; helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n"; @@ -110,7 +111,7 @@ MakeContigsCommand::MakeContigsCommand(){ outputTypes["fasta"] = tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["report"] = tempOutNames; - } + } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand"); exit(1); @@ -320,6 +321,7 @@ MakeContigsCommand::MakeContigsCommand(string option) { temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; } allFiles = m->isTrue(temp); + temp = validParameter.validFile(parameters, "trimoverlap", false); if (temp == "not found") { temp = "F"; } trimOverlap = m->isTrue(temp); @@ -386,10 +388,14 @@ int MakeContigsCommand::execute(){ outputNames.push_back(compositeScrapFastaFile); outputTypes["fasta"].push_back(compositeScrapFastaFile); } + map totalGroupCounts; + for (int l = 0; l < filesToProcess.size(); l++) { m->mothurOut("\n>>>>>\tProcessing " + filesToProcess[l][0][0] + " (file " + toString(l+1) + " of " + toString(filesToProcess.size()) + ")\t<<<<<\n"); + groupCounts.clear(); + groupMap.clear(); vector > fastaFileNames; createOligosGroup = false; string outputGroupFileName; @@ -415,7 +421,7 @@ int MakeContigsCommand::execute(){ m->mothurOut("Making contigs...\n"); createProcesses(filesToProcess[l], outFastaFile, outScrapFastaFile, outMisMatchFile, fastaFileNames, l); - m->mothurOut("Done.\n"); + m->mothurOut("Here...\n"); //remove temp fasta and qual files for (int i = 0; i < processors; i++) { for(int j = 0; j < filesToProcess[l][i].size(); j++) { m->mothurRemove(filesToProcess[l][i][j]); } } @@ -455,17 +461,17 @@ int MakeContigsCommand::execute(){ ofstream out; string thisGroupName = thisOutputDir + m->getRootName(m->getSimpleName(it->first)); - thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); + thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); m->openOutputFile(thisGroupName, out); while (!in.eof()){ if (m->control_pressed) { break; } Sequence currSeq(in); m->gobble(in); - out << currSeq.getName() << '\t' << it->second << endl; + out << currSeq.getName() << '\t' << it->second << endl; } - in.close(); out.close(); + in.close(); } } @@ -479,8 +485,8 @@ int MakeContigsCommand::execute(){ } if (filesToProcess.size() > 1) { //merge into large combo files - if (createFileGroup || createOligosGroup) { - if (l == 0) { + if (createFileGroup || createOligosGroup) { + if (l == 0) { ofstream outCGroup; m->openOutputFile(compositeGroupFile, outCGroup); outCGroup.close(); outputNames.push_back(compositeGroupFile); outputTypes["group"].push_back(compositeGroupFile); @@ -488,6 +494,12 @@ int MakeContigsCommand::execute(){ m->appendFiles(outputGroupFileName, compositeGroupFile); if (!allFiles) { m->mothurRemove(outputGroupFileName); } else { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); } + + for (map::iterator itGroups = groupCounts.begin(); itGroups != groupCounts.end(); itGroups++) { + map::iterator itTemp = totalGroupCounts.find(itGroups->first); + if (itTemp == totalGroupCounts.end()) { totalGroupCounts[itGroups->first] = itGroups->second; } //new group create it in totalGroups + else { itTemp->second += itGroups->second; } //existing group, update total + } } if (l == 0) { m->appendFiles(outMisMatchFile, compositeMisMatchFile); } else { m->appendFilesWithoutHeaders(outMisMatchFile, compositeMisMatchFile); } @@ -503,12 +515,17 @@ int MakeContigsCommand::execute(){ outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile); } }else { + totalGroupCounts = groupCounts; outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile); outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile); outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile); - if (createFileGroup || createOligosGroup) { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); } + if (createFileGroup || createOligosGroup) { + outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); + } } + m->mothurOut("Done.\n"); } + m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n"); if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } @@ -516,8 +533,8 @@ int MakeContigsCommand::execute(){ //output group counts m->mothurOutEndLine(); int total = 0; - if (groupCounts.size() != 0) { m->mothurOut("Group count: \n"); } - for (map::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) { + if (totalGroupCounts.size() != 0) { m->mothurOut("Group count: \n"); } + for (map::iterator it = totalGroupCounts.begin(); it != totalGroupCounts.end(); it++) { total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); } if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); } @@ -643,7 +660,7 @@ int MakeContigsCommand::createProcesses(vector< vector > files, string o } } } - + num = driver(files[process], outputFasta + toString(getpid()) + ".temp", outputScrapFasta + toString(getpid()) + ".temp", @@ -754,8 +771,7 @@ int MakeContigsCommand::createProcesses(vector< vector > files, string o } } } - - + contigsData* tempcontig = new contigsData(group, files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createOligosGroup, createFileGroup, allFiles, trimOverlap, h); pDataArray.push_back(tempcontig); @@ -1665,7 +1681,7 @@ bool MakeContigsCommand::getOligos(vector >& fastaFileNames, stri // get rest of line in case there is a primer name while (!in.eof()) { char c = in.get(); - if (c == 10 || c == 13){ break; } + if (c == 10 || c == 13 || c == -1){ break; } else if (c == 32 || c == 9){;} //space or tab else { group += c; } } @@ -1697,7 +1713,7 @@ bool MakeContigsCommand::getOligos(vector >& fastaFileNames, stri group = ""; while (!in.eof()) { char c = in.get(); - if (c == 10 || c == 13){ break; } + if (c == 10 || c == 13 || c == -1){ break; } else if (c == 32 || c == 9){;} //space or tab else { group += c; } }