From 67ea6ccd74dbd64828d31b952808255f206364ff Mon Sep 17 00:00:00 2001 From: SarahsWork Date: Mon, 25 Mar 2013 09:01:02 -0400 Subject: [PATCH] removed make count from make.contigs --- makecontigscommand.cpp | 119 ++++++++++------------------------------- subsamplecommand.cpp | 17 ++++-- trimflowscommand.cpp | 20 ++++--- unweighted.cpp | 4 +- 4 files changed, 55 insertions(+), 105 deletions(-) diff --git a/makecontigscommand.cpp b/makecontigscommand.cpp index 8d53287..c8f20f1 100644 --- a/makecontigscommand.cpp +++ b/makecontigscommand.cpp @@ -7,7 +7,6 @@ // #include "makecontigscommand.h" -#include "counttable.h" //********************************************************************************************************************** vector MakeContigsCommand::setParameters(){ @@ -26,7 +25,6 @@ vector MakeContigsCommand::setParameters(){ CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "","",false,false); parameters.push_back(palign); CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles); - CommandParameter pmakecount("makecount", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pmakecount); CommandParameter ptrimoverlap("trimoverlap", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ptrimoverlap); CommandParameter pmatch("match", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pmatch); CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch); @@ -54,7 +52,7 @@ string MakeContigsCommand::getHelpString(){ string helpString = ""; helpString += "The make.contigs command reads a file, forward fastq file and a reverse fastq file or forward fasta and reverse fasta files and outputs new fasta. It will also provide new quality files if the fastq or file parameter is used.\n"; helpString += "If an oligos file is provided barcodes and primers will be trimmed, and a group file will be created.\n"; - helpString += "The make.contigs command parameters are file, ffastq, rfastq, ffasta, rfasta, fqfile, rqfile, oligos, format, tdiffs, bdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, insert, deltaq, allfiles, makecount and processors.\n"; + helpString += "The make.contigs command parameters are file, ffastq, rfastq, ffasta, rfasta, fqfile, rqfile, oligos, format, tdiffs, bdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, insert, deltaq, allfiles and processors.\n"; helpString += "The ffastq and rfastq, file, or ffasta and rfasta parameters are required.\n"; helpString += "The file parameter is 2 or 3 column file containing the forward fastq files in the first column and their matching reverse fastq files in the second column, or a groupName then forward fastq file and reverse fastq file. Mothur will process each pair and create a combined fasta and report file with all the sequences.\n"; helpString += "The ffastq and rfastq parameters are used to provide a forward fastq and reverse fastq file to process. If you provide one, you must provide the other.\n"; @@ -75,7 +73,6 @@ string MakeContigsCommand::getHelpString(){ helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score equal to or below the threshold we eliminate it. Default=20.\n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"; helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n"; - helpString += "The makecount parameter will create a count table file instead of a group file. The default is F.\n"; helpString += "The trimoverlap parameter allows you to trim the sequences to only the overlapping section. The default is F.\n"; helpString += "The make.contigs command should be in the following format: \n"; @@ -95,7 +92,6 @@ string MakeContigsCommand::getOutputPattern(string type) { if (type == "fasta") { pattern = "[filename],[tag],contigs.fasta"; } else if (type == "group") { pattern = "[filename],[tag],contigs.groups"; } - else if (type == "count") { pattern = "[filename],[tag],contigs.count_table"; } else if (type == "report") { pattern = "[filename],[tag],contigs.report"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } @@ -115,8 +111,7 @@ MakeContigsCommand::MakeContigsCommand(){ outputTypes["fasta"] = tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["report"] = tempOutNames; - outputTypes["count"] = tempOutNames; - } + } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand"); exit(1); @@ -151,7 +146,6 @@ MakeContigsCommand::MakeContigsCommand(string option) { outputTypes["fasta"] = tempOutNames; outputTypes["report"] = tempOutNames; outputTypes["group"] = tempOutNames; - outputTypes["count"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter @@ -327,8 +321,6 @@ MakeContigsCommand::MakeContigsCommand(string option) { temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; } allFiles = m->isTrue(temp); - temp = validParameter.validFile(parameters, "makecount", false); if (temp == "not found") { temp = "F"; } - makeCount = m->isTrue(temp); temp = validParameter.validFile(parameters, "trimoverlap", false); if (temp == "not found") { temp = "F"; } trimOverlap = m->isTrue(temp); @@ -379,7 +371,6 @@ int MakeContigsCommand::execute(){ cvars["[filename]"] = compOutputDir + m->getRootName(m->getSimpleName(file)); cvars["[tag]"] = ""; string compositeGroupFile = getOutputFileName("group",cvars); - if (makeCount) { compositeGroupFile = getOutputFileName("count",cvars); } cvars["[tag]"] = "trim"; string compositeFastaFile = getOutputFileName("fasta",cvars); cvars["[tag]"] = "scrap"; @@ -398,7 +389,7 @@ int MakeContigsCommand::execute(){ } map totalGroupCounts; - CountTable compositeCt; + for (int l = 0; l < filesToProcess.size(); l++) { m->mothurOut("\n>>>>>\tProcessing " + filesToProcess[l][0][0] + " (file " + toString(l+1) + " of " + toString(filesToProcess.size()) + ")\t<<<<<\n"); @@ -416,7 +407,6 @@ int MakeContigsCommand::execute(){ if(oligosfile != ""){ createOligosGroup = getOligos(fastaFileNames, variables["[filename]"]); } if (createOligosGroup || createFileGroup) { outputGroupFileName = getOutputFileName("group",variables); - if (makeCount) { outputGroupFileName = getOutputFileName("count",variables); } } //give group in file file precedence @@ -431,7 +421,7 @@ int MakeContigsCommand::execute(){ m->mothurOut("Making contigs...\n"); createProcesses(filesToProcess[l], outFastaFile, outScrapFastaFile, outMisMatchFile, fastaFileNames, l); - m->mothurOut("Done.\n"); + m->mothurOut("Here...\n"); //remove temp fasta and qual files for (int i = 0; i < processors; i++) { for(int j = 0; j < filesToProcess[l][i].size(); j++) { m->mothurRemove(filesToProcess[l][i][j]); } } @@ -471,85 +461,40 @@ int MakeContigsCommand::execute(){ ofstream out; string thisGroupName = thisOutputDir + m->getRootName(m->getSimpleName(it->first)); - if (!makeCount) { thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); - m->openOutputFile(thisGroupName, out); + thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); + m->openOutputFile(thisGroupName, out); + + while (!in.eof()){ + if (m->control_pressed) { break; } - while (!in.eof()){ - if (m->control_pressed) { break; } - - Sequence currSeq(in); m->gobble(in); - out << currSeq.getName() << '\t' << it->second << endl; - } - out.close(); - } - else { - thisGroupName += getOutputFileName("count",variables); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName); - CountTable ct; - ct.addGroup(it->second); - while (!in.eof()){ - if (m->control_pressed) { break; } - - Sequence currSeq(in); m->gobble(in); - vector tempGroupCount; tempGroupCount.push_back(1); - ct.push_back(currSeq.getName(), tempGroupCount); - } - ct.printTable(thisGroupName); + Sequence currSeq(in); m->gobble(in); + out << currSeq.getName() << '\t' << it->second << endl; } + out.close(); in.close(); } } if (createFileGroup || createOligosGroup) { - if (makeCount) { - if ((allFiles) || (filesToProcess.size() == 1)) { - CountTable ct; - for (map::iterator itGroups = groupCounts.begin(); itGroups != groupCounts.end(); itGroups++) { - ct.addGroup(itGroups->first); - } - vector groups = ct.getNamesOfGroups(); - for (map::iterator itGroup = groupMap.begin(); itGroup != groupMap.end(); itGroup++) { - vector tempGroupCounts; tempGroupCounts.resize(groups.size(), 0); - ct.push_back(itGroup->first, tempGroupCounts); - ct.setAbund(itGroup->first, itGroup->second, 1); - } - ct.printTable(outputGroupFileName); - } - }else { - ofstream outGroup; - m->openOutputFile(outputGroupFileName, outGroup); - for (map::iterator itGroup = groupMap.begin(); itGroup != groupMap.end(); itGroup++) { - outGroup << itGroup->first << '\t' << itGroup->second << endl; - } - outGroup.close(); + ofstream outGroup; + m->openOutputFile(outputGroupFileName, outGroup); + for (map::iterator itGroup = groupMap.begin(); itGroup != groupMap.end(); itGroup++) { + outGroup << itGroup->first << '\t' << itGroup->second << endl; } + outGroup.close(); } if (filesToProcess.size() > 1) { //merge into large combo files if (createFileGroup || createOligosGroup) { - if (makeCount) { - for (map::iterator itGroup = groupMap.begin(); itGroup != groupMap.end(); itGroup++) { - vector groups = compositeCt.getNamesOfGroups(); - if (m->inUsersGroups(itGroup->second, groups)) { - vector tempGroupCounts; tempGroupCounts.resize(groups.size(), 0); - compositeCt.push_back(itGroup->first, tempGroupCounts); - compositeCt.setAbund(itGroup->first, itGroup->second, 1); - }else{ - compositeCt.addGroup(itGroup->second); - vector tempGroupCounts; tempGroupCounts.resize(groups.size()+1, 0); - compositeCt.push_back(itGroup->first, tempGroupCounts); - compositeCt.setAbund(itGroup->first, itGroup->second, 1); - } - } - }else { - if (l == 0) { - ofstream outCGroup; - m->openOutputFile(compositeGroupFile, outCGroup); outCGroup.close(); - outputNames.push_back(compositeGroupFile); outputTypes["group"].push_back(compositeGroupFile); - } - m->appendFiles(outputGroupFileName, compositeGroupFile); - if (!allFiles) { m->mothurRemove(outputGroupFileName); } - else { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); } + if (l == 0) { + ofstream outCGroup; + m->openOutputFile(compositeGroupFile, outCGroup); outCGroup.close(); + outputNames.push_back(compositeGroupFile); outputTypes["group"].push_back(compositeGroupFile); } + m->appendFiles(outputGroupFileName, compositeGroupFile); + if (!allFiles) { m->mothurRemove(outputGroupFileName); } + else { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); } + for (map::iterator itGroups = groupCounts.begin(); itGroups != groupCounts.end(); itGroups++) { map::iterator itTemp = totalGroupCounts.find(itGroups->first); if (itTemp == totalGroupCounts.end()) { totalGroupCounts[itGroups->first] = itGroups->second; } //new group create it in totalGroups @@ -575,16 +520,12 @@ int MakeContigsCommand::execute(){ outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile); outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile); if (createFileGroup || createOligosGroup) { - if (makeCount) { outputNames.push_back(outputGroupFileName); outputTypes["count"].push_back(outputGroupFileName); } - else { outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); } + outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); } } + m->mothurOut("Done.\n"); } - if ((filesToProcess.size() > 1) && makeCount) { //merge into large combo files - compositeCt.printTable(compositeGroupFile); - outputNames.push_back(compositeGroupFile); outputTypes["count"].push_back(compositeGroupFile); - } m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n"); if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } @@ -611,12 +552,6 @@ int MakeContigsCommand::execute(){ if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentGroup = (itTypes->second)[0]; m->setGroupFile(currentGroup); } } - - string currentCount = ""; - itTypes = outputTypes.find("count"); - if (itTypes != outputTypes.end()) { - if ((itTypes->second).size() != 0) { currentCount = (itTypes->second)[0]; m->setCountTableFile(currentCount); } - } //output files created by command m->mothurOutEndLine(); diff --git a/subsamplecommand.cpp b/subsamplecommand.cpp index 32745a2..4128191 100644 --- a/subsamplecommand.cpp +++ b/subsamplecommand.cpp @@ -1007,7 +1007,7 @@ int SubSampleCommand::getSubSampleList() { ListVector* list = input->getListVector(); string lastLabel = list->getLabel(); - //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; set userLabels = labels; @@ -1265,11 +1265,18 @@ int SubSampleCommand::getSubSampleList() { if (taxonomyfile != "") { if (namefile == "") { - //fake nameMap - for (set::iterator it = subset.begin(); it != subset.end(); it++) { - vector temp; temp.push_back(*it); - nameMap[*it] = temp; + InputData input(listfile, "list"); + ListVector* list = input.getListVector(); + string lastLabel = list->getLabel(); + + for (int i = 0; i < list->getNumBins(); i++) { + vector temp; + string bin = list->get(i); + m->splitAtComma(bin, temp); + for (int j = 0; j < temp.size(); j++) { vector tempFakeOut; tempFakeOut.push_back(temp[j]); nameMap[temp[j]] = tempFakeOut; } } + delete list; + int tcount = getTax(subset); if (tcount != subset.size()) { m->mothurOut("[ERROR]: subsampled list file contains " + toString(subset.size()) + " sequences, but I only found " + toString(tcount) + " in your taxonomy file, did you forget a name file? Please correct."); m->mothurOutEndLine(); } }else { diff --git a/trimflowscommand.cpp b/trimflowscommand.cpp index 3abc760..1bab57f 100644 --- a/trimflowscommand.cpp +++ b/trimflowscommand.cpp @@ -429,6 +429,7 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN flowData.capFlows(maxFlows); Sequence currSeq = flowData.getSequence(); + //cout << currSeq.getName() << '\t' << currSeq.getUnaligned() << endl; if(!flowData.hasMinFlows(minFlows)){ //screen to see if sequence is of a minimum number of flows success = 0; trashCode += 'l'; @@ -551,13 +552,16 @@ void TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ while(!oligosFile.eof()){ - oligosFile >> type; m->gobble(oligosFile); //get the first column value of the row - is it a comment or a feature we are interested in? - + oligosFile >> type; //get the first column value of the row - is it a comment or a feature we are interested in? + + if (m->debug) { m->mothurOut("[DEBUG]: type = " + type + ".\n"); } + if(type[0] == '#'){ //igore the line because there's a comment - while (!oligosFile.eof()) { char c = oligosFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there + while (!oligosFile.eof()) { char c = oligosFile.get(); if (c == 10 || c == 13){ break; } } + m->gobble(oligosFile);// get rest of line if there's any crap there } else{ //there's a feature we're interested in - + m->gobble(oligosFile); for(int i=0;i> oligo; //get the DNA sequence for the feature @@ -566,7 +570,9 @@ void TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ oligo[i] = toupper(oligo[i]); if(oligo[i] == 'U') { oligo[i] = 'T'; } } - + + if (m->debug) { m->mothurOut("[DEBUG]: oligos = " + oligo + ".\n"); } + if(type == "FORWARD"){ //if the feature is a forward primer... group = ""; @@ -595,7 +601,9 @@ void TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ //check for repeat barcodes map::iterator itBar = barcodes.find(oligo); if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } - + + if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ".\n"); } + barcodes[oligo]=indexBarcode; indexBarcode++; barcodeNameVector.push_back(group); }else if(type == "LINKER"){ diff --git a/unweighted.cpp b/unweighted.cpp index e95834f..56694e3 100644 --- a/unweighted.cpp +++ b/unweighted.cpp @@ -104,7 +104,7 @@ EstOutput Unweighted::createProcesses(Tree* t, vector< vector > namesOfG if (m->control_pressed) { exit(0); } - m->mothurOut("Merging results."); m->mothurOutEndLine(); + //m->mothurOut("Merging results."); m->mothurOutEndLine(); //pass numSeqs to parent ofstream out; @@ -156,7 +156,7 @@ EstOutput Unweighted::createProcesses(Tree* t, vector< vector > namesOfG m->mothurRemove(s); } - m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine(); + //m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine(); return results; #endif -- 2.39.2