X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=makecontigscommand.cpp;h=85b6a8fdcbacf1ca1ee5b471b2142480ecf5da48;hb=bd27c2b0612942815b7417c79f7ee41f669a2a34;hp=7d7184f7806b671830dfaa4303af72558d2db70e;hpb=859e3a473a3e63e0060c49be70b80f9289253da2;p=mothur.git diff --git a/makecontigscommand.cpp b/makecontigscommand.cpp index 7d7184f..85b6a8f 100644 --- a/makecontigscommand.cpp +++ b/makecontigscommand.cpp @@ -30,7 +30,7 @@ vector MakeContigsCommand::setParameters(){ CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch); CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen); CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend); - CommandParameter pthreshold("insert", "Number", "", "25", "", "", "","",false,false); parameters.push_back(pthreshold); + CommandParameter pthreshold("insert", "Number", "", "20", "", "", "","",false,false); parameters.push_back(pthreshold); CommandParameter pdeltaq("deltaq", "Number", "", "6", "", "", "","",false,false); parameters.push_back(pdeltaq); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "illumina1.8+", "", "", "","",false,false,true); parameters.push_back(pformat); @@ -70,9 +70,10 @@ string MakeContigsCommand::getHelpString(){ helpString += "The deltaq parameter allows you to specify the delta allowed between quality scores of a mismatched base. For example in the overlap, if deltaq=5 and in the alignment seqA, pos 200 has a quality score of 30 and the same position in seqB has a quality score of 20, you take the base from seqA (30-20 >= 5). If the quality score in seqB is 28 then the base in the consensus will be an N (30-28<5) The default is 6.\n"; helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n"; helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -1.0.\n"; - helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score below the threshold we eliminate it. Default=25.\n"; + helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score equal to or below the threshold we eliminate it. Default=20.\n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"; helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n"; + helpString += "The trimoverlap parameter allows you to trim the sequences to only the overlapping section. The default is F.\n"; helpString += "The make.contigs command should be in the following format: \n"; helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n"; @@ -110,7 +111,7 @@ MakeContigsCommand::MakeContigsCommand(){ outputTypes["fasta"] = tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["report"] = tempOutNames; - } + } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand"); exit(1); @@ -287,7 +288,7 @@ MakeContigsCommand::MakeContigsCommand(string option) { m->mothurConvert(temp, gapExtend); if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; } - temp = validParameter.validFile(parameters, "insert", false); if (temp == "not found"){ temp = "25"; } + temp = validParameter.validFile(parameters, "insert", false); if (temp == "not found"){ temp = "20"; } m->mothurConvert(temp, insert); if ((insert < 0) || (insert > 40)) { m->mothurOut("[ERROR]: insert must be between 0 and 40.\n"); abort=true; } @@ -320,6 +321,7 @@ MakeContigsCommand::MakeContigsCommand(string option) { temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; } allFiles = m->isTrue(temp); + temp = validParameter.validFile(parameters, "trimoverlap", false); if (temp == "not found") { temp = "F"; } trimOverlap = m->isTrue(temp); @@ -386,10 +388,14 @@ int MakeContigsCommand::execute(){ outputNames.push_back(compositeScrapFastaFile); outputTypes["fasta"].push_back(compositeScrapFastaFile); } + map totalGroupCounts; + for (int l = 0; l < filesToProcess.size(); l++) { m->mothurOut("\n>>>>>\tProcessing " + filesToProcess[l][0][0] + " (file " + toString(l+1) + " of " + toString(filesToProcess.size()) + ")\t<<<<<\n"); + groupCounts.clear(); + groupMap.clear(); vector > fastaFileNames; createOligosGroup = false; string outputGroupFileName; @@ -415,7 +421,7 @@ int MakeContigsCommand::execute(){ m->mothurOut("Making contigs...\n"); createProcesses(filesToProcess[l], outFastaFile, outScrapFastaFile, outMisMatchFile, fastaFileNames, l); - m->mothurOut("Done.\n"); + m->mothurOut("Here...\n"); //remove temp fasta and qual files for (int i = 0; i < processors; i++) { for(int j = 0; j < filesToProcess[l][i].size(); j++) { m->mothurRemove(filesToProcess[l][i][j]); } } @@ -455,17 +461,17 @@ int MakeContigsCommand::execute(){ ofstream out; string thisGroupName = thisOutputDir + m->getRootName(m->getSimpleName(it->first)); - thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); + thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); m->openOutputFile(thisGroupName, out); while (!in.eof()){ if (m->control_pressed) { break; } Sequence currSeq(in); m->gobble(in); - out << currSeq.getName() << '\t' << it->second << endl; + out << currSeq.getName() << '\t' << it->second << endl; } - in.close(); out.close(); + in.close(); } } @@ -479,8 +485,8 @@ int MakeContigsCommand::execute(){ } if (filesToProcess.size() > 1) { //merge into large combo files - if (createFileGroup || createOligosGroup) { - if (l == 0) { + if (createFileGroup || createOligosGroup) { + if (l == 0) { ofstream outCGroup; m->openOutputFile(compositeGroupFile, outCGroup); outCGroup.close(); outputNames.push_back(compositeGroupFile); outputTypes["group"].push_back(compositeGroupFile); @@ -488,6 +494,12 @@ int MakeContigsCommand::execute(){ m->appendFiles(outputGroupFileName, compositeGroupFile); if (!allFiles) { m->mothurRemove(outputGroupFileName); } else { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); } + + for (map::iterator itGroups = groupCounts.begin(); itGroups != groupCounts.end(); itGroups++) { + map::iterator itTemp = totalGroupCounts.find(itGroups->first); + if (itTemp == totalGroupCounts.end()) { totalGroupCounts[itGroups->first] = itGroups->second; } //new group create it in totalGroups + else { itTemp->second += itGroups->second; } //existing group, update total + } } if (l == 0) { m->appendFiles(outMisMatchFile, compositeMisMatchFile); } else { m->appendFilesWithoutHeaders(outMisMatchFile, compositeMisMatchFile); } @@ -503,12 +515,17 @@ int MakeContigsCommand::execute(){ outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile); } }else { + totalGroupCounts = groupCounts; outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile); outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile); outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile); - if (createFileGroup || createOligosGroup) { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); } + if (createFileGroup || createOligosGroup) { + outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); + } } + m->mothurOut("Done.\n"); } + m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n"); if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } @@ -516,8 +533,8 @@ int MakeContigsCommand::execute(){ //output group counts m->mothurOutEndLine(); int total = 0; - if (groupCounts.size() != 0) { m->mothurOut("Group count: \n"); } - for (map::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) { + if (totalGroupCounts.size() != 0) { m->mothurOut("Group count: \n"); } + for (map::iterator it = totalGroupCounts.begin(); it != totalGroupCounts.end(); it++) { total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); } if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); } @@ -643,7 +660,7 @@ int MakeContigsCommand::createProcesses(vector< vector > files, string o } } } - + num = driver(files[process], outputFasta + toString(getpid()) + ".temp", outputScrapFasta + toString(getpid()) + ".temp", @@ -754,8 +771,7 @@ int MakeContigsCommand::createProcesses(vector< vector > files, string o } } } - - + contigsData* tempcontig = new contigsData(group, files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createOligosGroup, createFileGroup, allFiles, trimOverlap, h); pDataArray.push_back(tempcontig); @@ -961,12 +977,12 @@ int MakeContigsCommand::driver(vector files, string outputFasta, string contig += seq1[i]; }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below insert. In that case eliminate base if (thisfqualfile != "") { - if (scores2[BBaseMap[i]] < insert) { } // + if (scores2[BBaseMap[i]] <= insert) { } // else { contig += seq2[i]; } }else { contig += seq2[i]; } //with no quality info, then we keep it? }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below insert. In that case eliminate base if (thisfqualfile != "") { - if (scores1[ABaseMap[i]] < insert) { } // + if (scores1[ABaseMap[i]] <= insert) { } // else { contig += seq1[i]; } }else { contig += seq1[i]; } //with no quality info, then we keep it? }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality @@ -1665,7 +1681,7 @@ bool MakeContigsCommand::getOligos(vector >& fastaFileNames, stri // get rest of line in case there is a primer name while (!in.eof()) { char c = in.get(); - if (c == 10 || c == 13){ break; } + if (c == 10 || c == 13 || c == -1){ break; } else if (c == 32 || c == 9){;} //space or tab else { group += c; } } @@ -1697,7 +1713,7 @@ bool MakeContigsCommand::getOligos(vector >& fastaFileNames, stri group = ""; while (!in.eof()) { char c = in.get(); - if (c == 10 || c == 13){ break; } + if (c == 10 || c == 13 || c == -1){ break; } else if (c == 32 || c == 9){;} //space or tab else { group += c; } }