X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=trimflowscommand.cpp;h=44121b97dfff0bb0c6611878c7e1c04e046709ea;hb=7aa301dfa67cfcb5b00c6b4e38a7ad56eb8337db;hp=9f603c4bacce57d2785333d86fb2a8098ec6b28a;hpb=f509429e06e545bde69c97cacc0eb436775bd329;p=mothur.git diff --git a/trimflowscommand.cpp b/trimflowscommand.cpp index 9f603c4..44121b9 100644 --- a/trimflowscommand.cpp +++ b/trimflowscommand.cpp @@ -14,24 +14,24 @@ //********************************************************************************************************************** vector TrimFlowsCommand::setParameters(){ try { - CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pflow); - CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos); - CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "",false,false); parameters.push_back(pmaxhomop); - CommandParameter pmaxflows("maxflows", "Number", "", "450", "", "", "",false,false); parameters.push_back(pmaxflows); - CommandParameter pminflows("minflows", "Number", "", "450", "", "", "",false,false); parameters.push_back(pminflows); - CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs); - CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs); - CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs); - CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs); - CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs); - CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); - CommandParameter psignal("signal", "Number", "", "0.50", "", "", "",false,false); parameters.push_back(psignal); - CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pnoise); - CommandParameter pallfiles("allfiles", "Boolean", "", "t", "", "", "",false,false); parameters.push_back(pallfiles); - CommandParameter porder("order", "String", "", "TACG", "", "", "",false,false); parameters.push_back(porder); - CommandParameter pfasta("fasta", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfasta); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","flow",false,true,true); parameters.push_back(pflow); + CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(poligos); + CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "","",false,false); parameters.push_back(pmaxhomop); + CommandParameter pmaxflows("maxflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pmaxflows); + CommandParameter pminflows("minflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pminflows); + CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs); + CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs); + CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs); + CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs); + CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs); + CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); + CommandParameter psignal("signal", "Number", "", "0.50", "", "", "","",false,false); parameters.push_back(psignal); + CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "","",false,false); parameters.push_back(pnoise); + CommandParameter pallfiles("allfiles", "Boolean", "", "t", "", "", "","",false,false); parameters.push_back(pallfiles); + CommandParameter porder("order", "String", "", "TACG", "", "", "","",false,false); parameters.push_back(porder); + CommandParameter pfasta("fasta", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pfasta); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -57,26 +57,21 @@ string TrimFlowsCommand::getHelpString(){ } } //********************************************************************************************************************** -string TrimFlowsCommand::getOutputFileNameTag(string type, string inputName=""){ - try { - string outputFileName = ""; - map >::iterator it; +string TrimFlowsCommand::getOutputPattern(string type) { + try { + string pattern = ""; - //is this a type this command creates - it = outputTypes.find(type); - if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } - else { - if (type == "flow") { outputFileName = "flow"; } - else if (type == "fasta") { outputFileName = "flow.fasta"; } - else if (type == "file") { outputFileName = "flow.files"; } - else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } - } - return outputFileName; - } - catch(exception& e) { - m->errorOut(e, "TrimFlowsCommand", "getOutputFileNameTag"); - exit(1); - } + if (type == "flow") { pattern = "[filename],[tag],flow"; } + else if (type == "fasta") { pattern = "[filename],flow.fasta"; } + else if (type == "file") { pattern = "[filename],[tag],flow.files"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "TrimFlowsCommand", "getOutputPattern"); + exit(1); + } } //********************************************************************************************************************** @@ -248,16 +243,20 @@ int TrimFlowsCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } - string trimFlowFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "trim." + getOutputFileNameTag("flow"); + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName)); + string fastaFileName = getOutputFileName("fasta",variables); + if(fasta){ outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName); } + + variables["[tag]"] = "trim"; + string trimFlowFileName = getOutputFileName("flow",variables); outputNames.push_back(trimFlowFileName); outputTypes["flow"].push_back(trimFlowFileName); - string scrapFlowFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "scrap." + getOutputFileNameTag("flow");; + variables["[tag]"] = "scrap"; + string scrapFlowFileName = getOutputFileName("flow",variables); outputNames.push_back(scrapFlowFileName); outputTypes["flow"].push_back(scrapFlowFileName); - string fastaFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("fasta"); - if(fasta){ - outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName); - } + vector flowFilePos; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) @@ -307,40 +306,44 @@ int TrimFlowsCommand::execute(){ if(allFiles){ set namesAlreadyProcessed; - flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("file"); + variables["[tag]"] = ""; + flowFilesFileName = getOutputFileName("file",variables); m->openOutputFile(flowFilesFileName, output); for(int i=0;imothurRemove(barcodePrimerComboFileNames[i][j]); - } - else{ - output << m->getFullPathName(barcodePrimerComboFileNames[i][j]) << endl; - outputNames.push_back(barcodePrimerComboFileNames[i][j]); - outputTypes["flow"].push_back(barcodePrimerComboFileNames[i][j]); - } - namesAlreadyProcessed.insert(barcodePrimerComboFileNames[i][j]); + if (barcodePrimerComboFileNames[i][j] != "") { + FILE * pFile; + unsigned long long size; + + //get num bytes in file + pFile = fopen (barcodePrimerComboFileNames[i][j].c_str(),"rb"); + if (pFile==NULL) perror ("Error opening file"); + else{ + fseek (pFile, 0, SEEK_END); + size=ftell(pFile); + fclose (pFile); + } + + if(size < 10){ + m->mothurRemove(barcodePrimerComboFileNames[i][j]); + } + else{ + output << m->getFullPathName(barcodePrimerComboFileNames[i][j]) << endl; + outputNames.push_back(barcodePrimerComboFileNames[i][j]); + outputTypes["flow"].push_back(barcodePrimerComboFileNames[i][j]); + } + namesAlreadyProcessed.insert(barcodePrimerComboFileNames[i][j]); + } } } } output.close(); } else{ - flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("file"); + variables["[tag]"] = ""; + flowFilesFileName = getOutputFileName("file",variables); m->openOutputFile(flowFilesFileName, output); output << m->getFullPathName(trimFlowFileName) << endl; @@ -349,14 +352,7 @@ int TrimFlowsCommand::execute(){ } outputTypes["file"].push_back(flowFilesFileName); outputNames.push_back(flowFilesFileName); - -// set fasta file as new current fastafile -// string current = ""; -// itTypes = outputTypes.find("fasta"); -// if (itTypes != outputTypes.end()) { -// if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); } -// } - + m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } @@ -398,10 +394,12 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN if(allFiles){ for(int i=0;iopenOutputFile(thisBarcodePrimerComboFileNames[i][j], temp); - temp << maxFlows << endl; - temp.close(); + if (thisBarcodePrimerComboFileNames[i][j] != "") { + ofstream temp; + m->openOutputFile(thisBarcodePrimerComboFileNames[i][j], temp); + temp << maxFlows << endl; + temp.close(); + } } } } @@ -441,6 +439,8 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN } + if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + " " + currSeq.getUnaligned() + "\n"); } + if(barcodes.size() != 0){ success = trimOligos.stripBarcode(currSeq, barcodeIndex); if(success > bdiffs) { trashCode += 'b'; } @@ -468,19 +468,35 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN } if(trashCode.length() == 0){ - - flowData.printFlows(trimFlowFile); - - if(fasta) { currSeq.printSequence(fastaFile); } - - if(allFiles){ - ofstream output; - m->openOutputFileAppend(thisBarcodePrimerComboFileNames[barcodeIndex][primerIndex], output); - output.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint); - - flowData.printFlows(output); - output.close(); - } + string thisGroup = ""; + if(barcodes.size() != 0){ + thisGroup = barcodeNameVector[barcodeIndex]; + if (primers.size() != 0) { + if (primerNameVector[primerIndex] != "") { + if(thisGroup != "") { + thisGroup += "." + primerNameVector[primerIndex]; + }else { + thisGroup = primerNameVector[primerIndex]; + } + } + } + } + + int pos = thisGroup.find("ignore"); + if (pos == string::npos) { + flowData.printFlows(trimFlowFile); + + if(fasta) { currSeq.printSequence(fastaFile); } + + if(allFiles){ + ofstream output; + m->openOutputFileAppend(thisBarcodePrimerComboFileNames[barcodeIndex][primerIndex], output); + output.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint); + + flowData.printFlows(output); + output.close(); + } + } } else{ flowData.printFlows(scrapFlowFile, trashCode); @@ -617,29 +633,37 @@ void TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ string primerName = primerNameVector[itPrimer->second]; string barcodeName = barcodeNameVector[itBar->second]; - - string comboGroupName = ""; - string fileName = ""; - - if(primerName == ""){ - comboGroupName = barcodeNameVector[itBar->second]; - fileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + comboGroupName + ".flow"; - } - else{ - if(barcodeName == ""){ - comboGroupName = primerNameVector[itPrimer->second]; - } - else{ - comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second]; - } - fileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + comboGroupName + ".flow"; - } - - outFlowFileNames[itBar->second][itPrimer->second] = fileName; - - ofstream temp; - m->openOutputFile(fileName, temp); - temp.close(); + + if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else { + string comboGroupName = ""; + string fileName = ""; + + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName)); + + if(primerName == ""){ + comboGroupName = barcodeNameVector[itBar->second]; + variables["[tag]"] = comboGroupName; + fileName = getOutputFileName("flow", variables); + } + else{ + if(barcodeName == ""){ + comboGroupName = primerNameVector[itPrimer->second]; + } + else{ + comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second]; + } + variables["[tag]"] = comboGroupName; + fileName = getOutputFileName("flow", variables); + } + + outFlowFileNames[itBar->second][itPrimer->second] = fileName; + + ofstream temp; + m->openOutputFile(fileName, temp); + temp.close(); + } } } } @@ -794,11 +818,12 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim if(allFiles){ for(int i=0;iopenOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); - temp.close(); - + if (tempBarcodePrimerComboFileNames[i][j] != "") { + tempBarcodePrimerComboFileNames[i][j] += toString(getpid()) + ".temp"; + ofstream temp; + m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); + temp.close(); + } } } } @@ -856,11 +881,12 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim if(allFiles){ for(int i=0;iopenOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); - temp.close(); - + if (tempBarcodePrimerComboFileNames[i][j] != "") { + tempBarcodePrimerComboFileNames[i][j] += extension; + ofstream temp; + m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); + temp.close(); + } } } } @@ -890,10 +916,12 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim if(allFiles){ for(int i=0;iopenOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); - temp.close(); + if (tempBarcodePrimerComboFileNames[i][j] != "") { + tempBarcodePrimerComboFileNames[i][j] += toString(processors-1) + ".temp"; + ofstream temp; + m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); + temp.close(); + } } } @@ -937,8 +965,10 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim if(allFiles){ for (int j = 0; j < barcodePrimerComboFileNames.size(); j++) { for (int k = 0; k < barcodePrimerComboFileNames[0].size(); k++) { - m->appendFiles((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp"), barcodePrimerComboFileNames[j][k]); - m->mothurRemove((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp")); + if (barcodePrimerComboFileNames[j][k] != "") { + m->appendFiles((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp"), barcodePrimerComboFileNames[j][k]); + m->mothurRemove((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp")); + } } } }