X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=trimflowscommand.cpp;h=ec724de4d85c3232ec574a1ead04714331b0220a;hb=HEAD;hp=cc49755d2ced89f31aa60258de2abc18838e9967;hpb=deba0af0ccdcb6005ed5b2b82649b137c63fbdf7;p=mothur.git diff --git a/trimflowscommand.cpp b/trimflowscommand.cpp index cc49755..ec724de 100644 --- a/trimflowscommand.cpp +++ b/trimflowscommand.cpp @@ -14,8 +14,9 @@ //********************************************************************************************************************** vector TrimFlowsCommand::setParameters(){ try { - CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","flow",false,true,true); parameters.push_back(pflow); + CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","flow-file",false,true,true); parameters.push_back(pflow); CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(poligos); + CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient); CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "","",false,false); parameters.push_back(pmaxhomop); CommandParameter pmaxflows("maxflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pmaxflows); CommandParameter pminflows("minflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pminflows); @@ -28,7 +29,7 @@ vector TrimFlowsCommand::setParameters(){ CommandParameter psignal("signal", "Number", "", "0.50", "", "", "","",false,false); parameters.push_back(psignal); CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "","",false,false); parameters.push_back(pnoise); CommandParameter pallfiles("allfiles", "Boolean", "", "t", "", "", "","",false,false); parameters.push_back(pallfiles); - CommandParameter porder("order", "String", "", "TACG", "", "", "","",false,false); parameters.push_back(porder); + CommandParameter porder("order", "Multiple", "A-B-I", "A", "", "", "","",false,false, true); parameters.push_back(porder); CommandParameter pfasta("fasta", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pfasta); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); @@ -47,6 +48,15 @@ string TrimFlowsCommand::getHelpString(){ try { string helpString = ""; helpString += "The trim.flows command reads a flowgram file and creates .....\n"; + helpString += "The oligos parameter allows you to provide an oligos file.\n"; + helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n"; + helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n"; + helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. The default is false.\n"; + helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n"; + helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n"; + helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n"; + helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n"; + helpString += "The order parameter options are A, B or I. Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"; helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n"; helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.flows.\n"; return helpString; @@ -214,21 +224,31 @@ TrimFlowsCommand::TrimFlowsCommand(string option) { m->setProcessors(temp); m->mothurConvert(temp, processors); - flowOrder = validParameter.validFile(parameters, "order", false); - if (flowOrder == "not found"){ flowOrder = "TACG"; } - else if(flowOrder.length() != 4){ - m->mothurOut("The value of the order option must be four bases long\n"); - } - + temp = validParameter.validFile(parameters, "order", false); if (temp == "not found"){ temp = "A"; } + if (temp.length() > 1) { m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true; + } + else { + if (toupper(temp[0]) == 'A') { flowOrder = "TACG"; } + else if(toupper(temp[0]) == 'B'){ + flowOrder = "TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC"; } + else if(toupper(temp[0]) == 'I'){ + flowOrder = "TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC"; } + else { + m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true; + } + } + if(oligoFileName == "") { allFiles = 0; } else { allFiles = 1; } + + temp = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; } + reorient = m->isTrue(temp); numFPrimers = 0; numRPrimers = 0; numLinkers = 0; numSpacers = 0; } - } catch(exception& e) { m->errorOut(e, "TrimFlowsCommand", "TrimFlowsCommand"); @@ -306,41 +326,41 @@ int TrimFlowsCommand::execute(){ if(allFiles){ set namesAlreadyProcessed; - variables["[tag]"] = ""; flowFilesFileName = getOutputFileName("file",variables); m->openOutputFile(flowFilesFileName, output); for(int i=0;imothurRemove(barcodePrimerComboFileNames[i][j]); - } - else{ - output << m->getFullPathName(barcodePrimerComboFileNames[i][j]) << endl; - outputNames.push_back(barcodePrimerComboFileNames[i][j]); - outputTypes["flow"].push_back(barcodePrimerComboFileNames[i][j]); - } - namesAlreadyProcessed.insert(barcodePrimerComboFileNames[i][j]); + if (barcodePrimerComboFileNames[i][j] != "") { + FILE * pFile; + unsigned long long size; + + //get num bytes in file + pFile = fopen (barcodePrimerComboFileNames[i][j].c_str(),"rb"); + if (pFile==NULL) perror ("Error opening file"); + else{ + fseek (pFile, 0, SEEK_END); + size=ftell(pFile); + fclose (pFile); + } + + if(size < 10){ + m->mothurRemove(barcodePrimerComboFileNames[i][j]); + } + else{ + output << m->getFullPathName(barcodePrimerComboFileNames[i][j]) << endl; + outputNames.push_back(barcodePrimerComboFileNames[i][j]); + outputTypes["flow"].push_back(barcodePrimerComboFileNames[i][j]); + } + namesAlreadyProcessed.insert(barcodePrimerComboFileNames[i][j]); + } } } } output.close(); } else{ - variables["[tag]"] = ""; flowFilesFileName = getOutputFileName("file",variables); m->openOutputFile(flowFilesFileName, output); @@ -350,14 +370,7 @@ int TrimFlowsCommand::execute(){ } outputTypes["file"].push_back(flowFilesFileName); outputNames.push_back(flowFilesFileName); - -// set fasta file as new current fastafile -// string current = ""; -// itTypes = outputTypes.find("fasta"); -// if (itTypes != outputTypes.end()) { -// if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); } -// } - + m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } @@ -394,15 +407,17 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN if(line->start == 0){ flowFile >> numFlows; m->gobble(flowFile); - scrapFlowFile << maxFlows << endl; + scrapFlowFile << numFlows << endl; trimFlowFile << maxFlows << endl; if(allFiles){ for(int i=0;iopenOutputFile(thisBarcodePrimerComboFileNames[i][j], temp); - temp << maxFlows << endl; - temp.close(); + if (thisBarcodePrimerComboFileNames[i][j] != "") { + ofstream temp; + m->openOutputFile(thisBarcodePrimerComboFileNames[i][j], temp); + temp << maxFlows << endl; + temp.close(); + } } } } @@ -413,7 +428,15 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN int count = 0; bool moreSeqs = 1; - TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer); + TrimOligos* trimOligos = NULL; + if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes()); } + else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, oligos.getPrimers(), oligos.getBarcodes(), oligos.getReversePrimers(), oligos.getLinkers(), oligos.getSpacers()); } + + TrimOligos* rtrimOligos = NULL; + if (reorient) { + rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes()); numBarcodes = oligos.getReorientedPairedBarcodes().size(); + } + while(moreSeqs) { @@ -427,16 +450,23 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN flowData.capFlows(maxFlows); Sequence currSeq = flowData.getSequence(); + //for reorient + Sequence savedSeq(currSeq.getName(), currSeq.getAligned()); + if(!flowData.hasMinFlows(minFlows)){ //screen to see if sequence is of a minimum number of flows success = 0; trashCode += 'l'; } - + if(!flowData.hasGoodHomoP()){ //screen to see if sequence meets the maximum homopolymer limit + success = 0; + trashCode += 'h'; + } + int primerIndex = 0; int barcodeIndex = 0; if(numLinkers != 0){ - success = trimOligos.stripLinker(currSeq); + success = trimOligos->stripLinker(currSeq); if(success > ldiffs) { trashCode += 'k'; } else{ currentSeqDiffs += success; } @@ -444,21 +474,21 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + " " + currSeq.getUnaligned() + "\n"); } - if(barcodes.size() != 0){ - success = trimOligos.stripBarcode(currSeq, barcodeIndex); + if(numBarcodes != 0){ + success = trimOligos->stripBarcode(currSeq, barcodeIndex); if(success > bdiffs) { trashCode += 'b'; } else{ currentSeqDiffs += success; } } if(numSpacers != 0){ - success = trimOligos.stripSpacer(currSeq); + success = trimOligos->stripSpacer(currSeq); if(success > sdiffs) { trashCode += 's'; } else{ currentSeqDiffs += success; } } if(numFPrimers != 0){ - success = trimOligos.stripForward(currSeq, primerIndex); + success = trimOligos->stripForward(currSeq, primerIndex); if(success > pdiffs) { trashCode += 'f'; } else{ currentSeqDiffs += success; } } @@ -466,24 +496,61 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN if (currentSeqDiffs > tdiffs) { trashCode += 't'; } if(numRPrimers != 0){ - success = trimOligos.stripReverse(currSeq); + success = trimOligos->stripReverse(currSeq); if(!success) { trashCode += 'r'; } } - + + if (reorient && (trashCode != "")) { //if you failed and want to check the reverse + int thisSuccess = 0; + string thisTrashCode = ""; + int thisCurrentSeqsDiffs = 0; + + int thisBarcodeIndex = 0; + int thisPrimerIndex = 0; + //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl; + if(numBarcodes != 0){ + thisSuccess = rtrimOligos->stripBarcode(savedSeq, thisBarcodeIndex); + if(thisSuccess > bdiffs) { thisTrashCode += "b"; } + else{ thisCurrentSeqsDiffs += thisSuccess; } + } + //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl; + if(numFPrimers != 0){ + thisSuccess = rtrimOligos->stripForward(savedSeq, thisPrimerIndex); + if(thisSuccess > pdiffs) { thisTrashCode += "f"; } + else{ thisCurrentSeqsDiffs += thisSuccess; } + } + + if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; } + + if (thisTrashCode == "") { + trashCode = thisTrashCode; + success = thisSuccess; + currentSeqDiffs = thisCurrentSeqsDiffs; + barcodeIndex = thisBarcodeIndex; + primerIndex = thisPrimerIndex; + savedSeq.reverseComplement(); + currSeq.setAligned(savedSeq.getAligned()); + }else { trashCode += "(" + thisTrashCode + ")"; } + } + if(trashCode.length() == 0){ - - flowData.printFlows(trimFlowFile); - - if(fasta) { currSeq.printSequence(fastaFile); } - - if(allFiles){ - ofstream output; - m->openOutputFileAppend(thisBarcodePrimerComboFileNames[barcodeIndex][primerIndex], output); - output.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint); - - flowData.printFlows(output); - output.close(); - } + string thisGroup = oligos.getGroupName(barcodeIndex, primerIndex); + + int pos = thisGroup.find("ignore"); + if (pos == string::npos) { + flowData.printFlows(trimFlowFile); + + if(fasta) { currSeq.printSequence(fastaFile); } + + if(allFiles){ + ofstream output; + m->openOutputFileAppend(thisBarcodePrimerComboFileNames[barcodeIndex][primerIndex], output); + output.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint); + + flowData.printFlows(output); + output.close(); + } + } } else{ flowData.printFlows(scrapFlowFile, trashCode); @@ -510,6 +577,8 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN scrapFlowFile.close(); flowFile.close(); if(fasta){ fastaFile.close(); } + delete trimOligos; + if (reorient) { delete rtrimOligos; } return count; } @@ -521,189 +590,131 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN //*************************************************************************************************************** -void TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ +int TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ try { - ifstream oligosFile; - m->openInputFile(oligoFileName, oligosFile); - - string type, oligo, group; - - int indexPrimer = 0; - int indexBarcode = 0; - - while(!oligosFile.eof()){ - - oligosFile >> type; m->gobble(oligosFile); //get the first column value of the row - is it a comment or a feature we are interested in? - - if(type[0] == '#'){ //igore the line because there's a comment - while (!oligosFile.eof()) { char c = oligosFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there - } - else{ //there's a feature we're interested in - - for(int i=0;i> oligo; //get the DNA sequence for the feature - - for(int i=0;i::iterator itPrimer = primers.find(oligo); - if (itPrimer != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } - - primers[oligo]=indexPrimer; indexPrimer++; - primerNameVector.push_back(group); - - } - else if(type == "REVERSE"){ - string oligoRC = reverseOligo(oligo); - revPrimer.push_back(oligoRC); - } - else if(type == "BARCODE"){ - oligosFile >> group; - - //check for repeat barcodes - map::iterator itBar = barcodes.find(oligo); - if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } - - barcodes[oligo]=indexBarcode; indexBarcode++; - barcodeNameVector.push_back(group); - }else if(type == "LINKER"){ - linker.push_back(oligo); - }else if(type == "SPACER"){ - spacer.push_back(oligo); - } - else{ - m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); - } - } - - m->gobble(oligosFile); - } - oligosFile.close(); - - if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; } - - //add in potential combos - if(barcodeNameVector.size() == 0){ - barcodes[""] = 0; - barcodeNameVector.push_back(""); - } - - if(primerNameVector.size() == 0){ - primers[""] = 0; - primerNameVector.push_back(""); - } - - - outFlowFileNames.resize(barcodeNameVector.size()); + bool allBlank = false; + oligos.read(oligoFileName); + + if (m->control_pressed) { return 0; } //error in reading oligos + + if (oligos.hasPairedBarcodes()) { + pairedOligos = true; + numFPrimers = oligos.getPairedPrimers().size(); + numBarcodes = oligos.getPairedBarcodes().size(); + }else { + pairedOligos = false; + numFPrimers = oligos.getPrimers().size(); + numBarcodes = oligos.getBarcodes().size(); + } + + numLinkers = oligos.getLinkers().size(); + numSpacers = oligos.getSpacers().size(); + numRPrimers = oligos.getReversePrimers().size(); + + vector groupNames = oligos.getGroupNames(); + if (groupNames.size() == 0) { allFiles = 0; allBlank = true; } + + + outFlowFileNames.resize(oligos.getBarcodeNames().size()); for(int i=0;i::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ - for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ - - string primerName = primerNameVector[itPrimer->second]; - string barcodeName = barcodeNameVector[itBar->second]; - - string comboGroupName = ""; - string fileName = ""; - - map variables; - variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName)); - - if(primerName == ""){ - comboGroupName = barcodeNameVector[itBar->second]; - variables["[tag]"] = comboGroupName; - fileName = getOutputFileName("flow", variables); - } - else{ - if(barcodeName == ""){ - comboGroupName = primerNameVector[itPrimer->second]; - } - else{ - comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second]; - } - variables["[tag]"] = comboGroupName; - fileName = getOutputFileName("flow", variables); - } - - outFlowFileNames[itBar->second][itPrimer->second] = fileName; - - ofstream temp; - m->openOutputFile(fileName, temp); - temp.close(); - } - } - } - - numFPrimers = primers.size(); - numRPrimers = revPrimer.size(); - numLinkers = linker.size(); - numSpacers = spacer.size(); - - } - catch(exception& e) { - m->errorOut(e, "TrimSeqsCommand", "getOligos"); - exit(1); - } -} -//********************************************************************/ -string TrimFlowsCommand::reverseOligo(string oligo){ - try { - string reverse = ""; - - for(int i=oligo.length()-1;i>=0;i--){ - - if(oligo[i] == 'A') { reverse += 'T'; } - else if(oligo[i] == 'T'){ reverse += 'A'; } - else if(oligo[i] == 'U'){ reverse += 'A'; } - - else if(oligo[i] == 'G'){ reverse += 'C'; } - else if(oligo[i] == 'C'){ reverse += 'G'; } - - else if(oligo[i] == 'R'){ reverse += 'Y'; } - else if(oligo[i] == 'Y'){ reverse += 'R'; } - - else if(oligo[i] == 'M'){ reverse += 'K'; } - else if(oligo[i] == 'K'){ reverse += 'M'; } - - else if(oligo[i] == 'W'){ reverse += 'W'; } - else if(oligo[i] == 'S'){ reverse += 'S'; } - - else if(oligo[i] == 'B'){ reverse += 'V'; } - else if(oligo[i] == 'V'){ reverse += 'B'; } - - else if(oligo[i] == 'D'){ reverse += 'H'; } - else if(oligo[i] == 'H'){ reverse += 'D'; } + if (allFiles) { + set uniqueNames; //used to cleanup outputFileNames + if (pairedOligos) { + map barcodes = oligos.getPairedBarcodes(); + map primers = oligos.getPairedPrimers(); + for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ + for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ + + string primerName = oligos.getPrimerName(itPrimer->first); + string barcodeName = oligos.getBarcodeName(itBar->first); + + if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else if ((primerName == "") && (barcodeName == "")) { } //do nothing + else { + string comboGroupName = ""; + + if(primerName == ""){ + comboGroupName = barcodeName; + }else{ + if(barcodeName == ""){ + comboGroupName = primerName; + } + else{ + comboGroupName = barcodeName + "." + primerName; + } + } + + + ofstream temp; + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName)); + variables["[tag]"] = comboGroupName; + string fileName = getOutputFileName("flow", variables); + if (uniqueNames.count(fileName) == 0) { + outputNames.push_back(fileName); + outputTypes["flow"].push_back(fileName); + uniqueNames.insert(fileName); + } + + outFlowFileNames[itBar->first][itPrimer->first] = fileName; + m->openOutputFile(fileName, temp); temp.close(); + } + } + } + }else { + map barcodes = oligos.getBarcodes() ; + map primers = oligos.getPrimers(); + for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ + for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ + + string primerName = oligos.getPrimerName(itPrimer->second); + string barcodeName = oligos.getBarcodeName(itBar->second); + + if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else if ((primerName == "") && (barcodeName == "")) { } //do nothing + else { + string comboGroupName = ""; + + if(primerName == ""){ + comboGroupName = barcodeName; + }else{ + if(barcodeName == ""){ + comboGroupName = primerName; + } + else{ + comboGroupName = barcodeName + "." + primerName; + } + } + + ofstream temp; + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName)); + variables["[tag]"] = comboGroupName; + string fileName = getOutputFileName("flow", variables); + if (uniqueNames.count(fileName) == 0) { + outputNames.push_back(fileName); + outputTypes["flow"].push_back(fileName); + uniqueNames.insert(fileName); + } + + outFlowFileNames[itBar->second][itPrimer->second] = fileName; + m->openOutputFile(fileName, temp); temp.close(); + } + } + } + } - else { reverse += 'N'; } } - - - return reverse; - } + return 0; + } catch(exception& e) { - m->errorOut(e, "TrimFlowsCommand", "reverseOligo"); + m->errorOut(e, "TrimFlowsCommand", "getOligos"); exit(1); } } - /**************************************************************************************************/ vector TrimFlowsCommand::getFlowFileBreaks() { @@ -791,7 +802,7 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim //loop through and create all the processes you want while (process != processors) { - int pid = fork(); + pid_t pid = fork(); if (pid > 0) { processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later @@ -802,18 +813,19 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim if(allFiles){ for(int i=0;iopenOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); - temp.close(); - + if (tempBarcodePrimerComboFileNames[i][j] != "") { + tempBarcodePrimerComboFileNames[i][j] += m->mothurGetpid(process) + ".temp"; + ofstream temp; + m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); + temp.close(); + } } } } driverCreateTrim(flowFileName, - (trimFlowFileName + toString(getpid()) + ".temp"), - (scrapFlowFileName + toString(getpid()) + ".temp"), - (fastaFileName + toString(getpid()) + ".temp"), + (trimFlowFileName + m->mothurGetpid(process) + ".temp"), + (scrapFlowFileName + m->mothurGetpid(process) + ".temp"), + (fastaFileName + m->mothurGetpid(process) + ".temp"), tempBarcodePrimerComboFileNames, lines[process]); exit(0); @@ -849,7 +861,7 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim //Windows version shared memory, so be careful when passing variables through the trimFlowData struct. //Above fork() will clone, so memory is separate, but that's not the case with windows, ////////////////////////////////////////////////////////////////////////////////////////////////////// - + /* vector pDataArray; DWORD dwThreadIdArray[processors-1]; HANDLE hThreadArray[processors-1]; @@ -864,11 +876,12 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim if(allFiles){ for(int i=0;iopenOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); - temp.close(); - + if (tempBarcodePrimerComboFileNames[i][j] != "") { + tempBarcodePrimerComboFileNames[i][j] += extension; + ofstream temp; + m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); + temp.close(); + } } } } @@ -898,10 +911,12 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim if(allFiles){ for(int i=0;iopenOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); - temp.close(); + if (tempBarcodePrimerComboFileNames[i][j] != "") { + tempBarcodePrimerComboFileNames[i][j] += toString(processors-1) + ".temp"; + ofstream temp; + m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); + temp.close(); + } } } @@ -920,7 +935,7 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim CloseHandle(hThreadArray[i]); delete pDataArray[i]; } - + */ #endif //append files @@ -945,8 +960,10 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim if(allFiles){ for (int j = 0; j < barcodePrimerComboFileNames.size(); j++) { for (int k = 0; k < barcodePrimerComboFileNames[0].size(); k++) { - m->appendFiles((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp"), barcodePrimerComboFileNames[j][k]); - m->mothurRemove((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp")); + if (barcodePrimerComboFileNames[j][k] != "") { + m->appendFiles((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp"), barcodePrimerComboFileNames[j][k]); + m->mothurRemove((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp")); + } } } }