X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=trimflowscommand.cpp;h=33349decab52e90be19c928b44b9b07c8f6c710a;hp=296a6fe37d0449bb719d69c4001429583bcb74ee;hb=b206f634aae1b4ce13978d203247fb64757d5482;hpb=035f86272c776e1cccaa47021e26782e49cd41e7 diff --git a/trimflowscommand.cpp b/trimflowscommand.cpp index 296a6fe..33349de 100644 --- a/trimflowscommand.cpp +++ b/trimflowscommand.cpp @@ -14,24 +14,24 @@ //********************************************************************************************************************** vector TrimFlowsCommand::setParameters(){ try { - CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pflow); - CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos); - CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "",false,false); parameters.push_back(pmaxhomop); - CommandParameter pmaxflows("maxflows", "Number", "", "450", "", "", "",false,false); parameters.push_back(pmaxflows); - CommandParameter pminflows("minflows", "Number", "", "450", "", "", "",false,false); parameters.push_back(pminflows); - CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs); - CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs); - CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs); - CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs); - CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs); - CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); - CommandParameter psignal("signal", "Number", "", "0.50", "", "", "",false,false); parameters.push_back(psignal); - CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pnoise); - CommandParameter pallfiles("allfiles", "Boolean", "", "t", "", "", "",false,false); parameters.push_back(pallfiles); - CommandParameter porder("order", "String", "", "TACG", "", "", "",false,false); parameters.push_back(porder); - CommandParameter pfasta("fasta", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfasta); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","flow-file",false,true,true); parameters.push_back(pflow); + CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(poligos); + CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "","",false,false); parameters.push_back(pmaxhomop); + CommandParameter pmaxflows("maxflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pmaxflows); + CommandParameter pminflows("minflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pminflows); + CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs); + CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs); + CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs); + CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs); + CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs); + CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); + CommandParameter psignal("signal", "Number", "", "0.50", "", "", "","",false,false); parameters.push_back(psignal); + CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "","",false,false); parameters.push_back(pnoise); + CommandParameter pallfiles("allfiles", "Boolean", "", "t", "", "", "","",false,false); parameters.push_back(pallfiles); + CommandParameter porder("order", "Multiple", "A-B-I", "A", "", "", "","",false,false, true); parameters.push_back(porder); + CommandParameter pfasta("fasta", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pfasta); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -47,6 +47,7 @@ string TrimFlowsCommand::getHelpString(){ try { string helpString = ""; helpString += "The trim.flows command reads a flowgram file and creates .....\n"; + helpString += "The order parameter options are A, B or I. Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"; helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n"; helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.flows.\n"; return helpString; @@ -57,26 +58,21 @@ string TrimFlowsCommand::getHelpString(){ } } //********************************************************************************************************************** -string TrimFlowsCommand::getOutputFileNameTag(string type, string inputName=""){ - try { - string outputFileName = ""; - map >::iterator it; +string TrimFlowsCommand::getOutputPattern(string type) { + try { + string pattern = ""; - //is this a type this command creates - it = outputTypes.find(type); - if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } - else { - if (type == "flow") { outputFileName = "flow"; } - else if (type == "fasta") { outputFileName = "flow.fasta"; } - else if (type == "file") { outputFileName = "flow.files"; } - else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } - } - return outputFileName; - } - catch(exception& e) { - m->errorOut(e, "TrimFlowsCommand", "getOutputFileNameTag"); - exit(1); - } + if (type == "flow") { pattern = "[filename],[tag],flow"; } + else if (type == "fasta") { pattern = "[filename],flow.fasta"; } + else if (type == "file") { pattern = "[filename],flow.files"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "TrimFlowsCommand", "getOutputPattern"); + exit(1); + } } //********************************************************************************************************************** @@ -219,12 +215,20 @@ TrimFlowsCommand::TrimFlowsCommand(string option) { m->setProcessors(temp); m->mothurConvert(temp, processors); - flowOrder = validParameter.validFile(parameters, "order", false); - if (flowOrder == "not found"){ flowOrder = "TACG"; } - else if(flowOrder.length() != 4){ - m->mothurOut("The value of the order option must be four bases long\n"); - } - + temp = validParameter.validFile(parameters, "order", false); if (temp == "not found"){ temp = "A"; } + if (temp.length() > 1) { m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true; + } + else { + if (toupper(temp[0]) == 'A') { flowOrder = "TACG"; } + else if(toupper(temp[0]) == 'B'){ + flowOrder = "TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC"; } + else if(toupper(temp[0]) == 'I'){ + flowOrder = "TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC"; } + else { + m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true; + } + } + if(oligoFileName == "") { allFiles = 0; } else { allFiles = 1; } @@ -233,7 +237,6 @@ TrimFlowsCommand::TrimFlowsCommand(string option) { numLinkers = 0; numSpacers = 0; } - } catch(exception& e) { m->errorOut(e, "TrimFlowsCommand", "TrimFlowsCommand"); @@ -248,16 +251,20 @@ int TrimFlowsCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } - string trimFlowFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "trim." + getOutputFileNameTag("flow"); + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName)); + string fastaFileName = getOutputFileName("fasta",variables); + if(fasta){ outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName); } + + variables["[tag]"] = "trim"; + string trimFlowFileName = getOutputFileName("flow",variables); outputNames.push_back(trimFlowFileName); outputTypes["flow"].push_back(trimFlowFileName); - string scrapFlowFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "scrap." + getOutputFileNameTag("flow");; + variables["[tag]"] = "scrap"; + string scrapFlowFileName = getOutputFileName("flow",variables); outputNames.push_back(scrapFlowFileName); outputTypes["flow"].push_back(scrapFlowFileName); - string fastaFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("fasta"); - if(fasta){ - outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName); - } + vector flowFilePos; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) @@ -307,40 +314,42 @@ int TrimFlowsCommand::execute(){ if(allFiles){ set namesAlreadyProcessed; - flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("file"); + flowFilesFileName = getOutputFileName("file",variables); m->openOutputFile(flowFilesFileName, output); for(int i=0;imothurRemove(barcodePrimerComboFileNames[i][j]); - } - else{ - output << m->getFullPathName(barcodePrimerComboFileNames[i][j]) << endl; - outputNames.push_back(barcodePrimerComboFileNames[i][j]); - outputTypes["flow"].push_back(barcodePrimerComboFileNames[i][j]); - } - namesAlreadyProcessed.insert(barcodePrimerComboFileNames[i][j]); + if (barcodePrimerComboFileNames[i][j] != "") { + FILE * pFile; + unsigned long long size; + + //get num bytes in file + pFile = fopen (barcodePrimerComboFileNames[i][j].c_str(),"rb"); + if (pFile==NULL) perror ("Error opening file"); + else{ + fseek (pFile, 0, SEEK_END); + size=ftell(pFile); + fclose (pFile); + } + + if(size < 10){ + m->mothurRemove(barcodePrimerComboFileNames[i][j]); + } + else{ + output << m->getFullPathName(barcodePrimerComboFileNames[i][j]) << endl; + outputNames.push_back(barcodePrimerComboFileNames[i][j]); + outputTypes["flow"].push_back(barcodePrimerComboFileNames[i][j]); + } + namesAlreadyProcessed.insert(barcodePrimerComboFileNames[i][j]); + } } } } output.close(); } else{ - flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("file"); + flowFilesFileName = getOutputFileName("file",variables); m->openOutputFile(flowFilesFileName, output); output << m->getFullPathName(trimFlowFileName) << endl; @@ -349,14 +358,7 @@ int TrimFlowsCommand::execute(){ } outputTypes["file"].push_back(flowFilesFileName); outputNames.push_back(flowFilesFileName); - -// set fasta file as new current fastafile -// string current = ""; -// itTypes = outputTypes.find("fasta"); -// if (itTypes != outputTypes.end()) { -// if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); } -// } - + m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } @@ -393,15 +395,17 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN if(line->start == 0){ flowFile >> numFlows; m->gobble(flowFile); - scrapFlowFile << maxFlows << endl; + scrapFlowFile << numFlows << endl; trimFlowFile << maxFlows << endl; if(allFiles){ for(int i=0;iopenOutputFile(thisBarcodePrimerComboFileNames[i][j], temp); - temp << maxFlows << endl; - temp.close(); + if (thisBarcodePrimerComboFileNames[i][j] != "") { + ofstream temp; + m->openOutputFile(thisBarcodePrimerComboFileNames[i][j], temp); + temp << maxFlows << endl; + temp.close(); + } } } } @@ -426,11 +430,16 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN flowData.capFlows(maxFlows); Sequence currSeq = flowData.getSequence(); + //cout << currSeq.getName() << '\t' << currSeq.getUnaligned() << endl; if(!flowData.hasMinFlows(minFlows)){ //screen to see if sequence is of a minimum number of flows success = 0; trashCode += 'l'; } - + if(!flowData.hasGoodHomoP()){ //screen to see if sequence meets the maximum homopolymer limit + success = 0; + trashCode += 'h'; + } + int primerIndex = 0; int barcodeIndex = 0; @@ -470,19 +479,35 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN } if(trashCode.length() == 0){ - - flowData.printFlows(trimFlowFile); - - if(fasta) { currSeq.printSequence(fastaFile); } - - if(allFiles){ - ofstream output; - m->openOutputFileAppend(thisBarcodePrimerComboFileNames[barcodeIndex][primerIndex], output); - output.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint); - - flowData.printFlows(output); - output.close(); - } + string thisGroup = ""; + if(barcodes.size() != 0){ + thisGroup = barcodeNameVector[barcodeIndex]; + if (primers.size() != 0) { + if (primerNameVector[primerIndex] != "") { + if(thisGroup != "") { + thisGroup += "." + primerNameVector[primerIndex]; + }else { + thisGroup = primerNameVector[primerIndex]; + } + } + } + } + + int pos = thisGroup.find("ignore"); + if (pos == string::npos) { + flowData.printFlows(trimFlowFile); + + if(fasta) { currSeq.printSequence(fastaFile); } + + if(allFiles){ + ofstream output; + m->openOutputFileAppend(thisBarcodePrimerComboFileNames[barcodeIndex][primerIndex], output); + output.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint); + + flowData.printFlows(output); + output.close(); + } + } } else{ flowData.printFlows(scrapFlowFile, trashCode); @@ -532,13 +557,16 @@ void TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ while(!oligosFile.eof()){ - oligosFile >> type; m->gobble(oligosFile); //get the first column value of the row - is it a comment or a feature we are interested in? - + oligosFile >> type; //get the first column value of the row - is it a comment or a feature we are interested in? + + if (m->debug) { m->mothurOut("[DEBUG]: type = " + type + ".\n"); } + if(type[0] == '#'){ //igore the line because there's a comment - while (!oligosFile.eof()) { char c = oligosFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there + while (!oligosFile.eof()) { char c = oligosFile.get(); if (c == 10 || c == 13){ break; } } + m->gobble(oligosFile);// get rest of line if there's any crap there } else{ //there's a feature we're interested in - + m->gobble(oligosFile); for(int i=0;i> oligo; //get the DNA sequence for the feature @@ -547,13 +575,15 @@ void TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ oligo[i] = toupper(oligo[i]); if(oligo[i] == 'U') { oligo[i] = 'T'; } } - + + if (m->debug) { m->mothurOut("[DEBUG]: oligos = " + oligo + ".\n"); } + if(type == "FORWARD"){ //if the feature is a forward primer... group = ""; while (!oligosFile.eof()) { // get rest of line in case there is a primer name = will have the name of the primer char c = oligosFile.get(); - if (c == 10 || c == 13){ break; } + if (c == 10 || c == 13 || c == -1){ break; } else if (c == 32 || c == 9){;} //space or tab else { group += c; } } @@ -569,6 +599,7 @@ void TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ else if(type == "REVERSE"){ string oligoRC = reverseOligo(oligo); revPrimer.push_back(oligoRC); + if (m->debug) { m->mothurOut("[DEBUG]: reverse oligos = " + oligoRC + ".\n"); } } else if(type == "BARCODE"){ oligosFile >> group; @@ -576,7 +607,9 @@ void TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ //check for repeat barcodes map::iterator itBar = barcodes.find(oligo); if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } - + + if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ".\n"); } + barcodes[oligo]=indexBarcode; indexBarcode++; barcodeNameVector.push_back(group); }else if(type == "LINKER"){ @@ -605,8 +638,7 @@ void TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ primers[""] = 0; primerNameVector.push_back(""); } - - + outFlowFileNames.resize(barcodeNameVector.size()); for(int i=0;i >& outFlowFileNames){ string primerName = primerNameVector[itPrimer->second]; string barcodeName = barcodeNameVector[itBar->second]; - - string comboGroupName = ""; - string fileName = ""; - - if(primerName == ""){ - comboGroupName = barcodeNameVector[itBar->second]; - fileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + comboGroupName + ".flow"; - } - else{ - if(barcodeName == ""){ - comboGroupName = primerNameVector[itPrimer->second]; - } - else{ - comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second]; - } - fileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + comboGroupName + ".flow"; - } - - outFlowFileNames[itBar->second][itPrimer->second] = fileName; - - ofstream temp; - m->openOutputFile(fileName, temp); - temp.close(); + + if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing + else { + string comboGroupName = ""; + string fileName = ""; + + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName)); + + if(primerName == ""){ + comboGroupName = barcodeNameVector[itBar->second]; + variables["[tag]"] = comboGroupName; + fileName = getOutputFileName("flow", variables); + } + else{ + if(barcodeName == ""){ + comboGroupName = primerNameVector[itPrimer->second]; + } + else{ + comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second]; + } + variables["[tag]"] = comboGroupName; + fileName = getOutputFileName("flow", variables); + } + + outFlowFileNames[itBar->second][itPrimer->second] = fileName; + + ofstream temp; + m->openOutputFile(fileName, temp); + temp.close(); + } } } } @@ -796,11 +836,12 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim if(allFiles){ for(int i=0;iopenOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); - temp.close(); - + if (tempBarcodePrimerComboFileNames[i][j] != "") { + tempBarcodePrimerComboFileNames[i][j] += toString(getpid()) + ".temp"; + ofstream temp; + m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); + temp.close(); + } } } } @@ -858,11 +899,12 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim if(allFiles){ for(int i=0;iopenOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); - temp.close(); - + if (tempBarcodePrimerComboFileNames[i][j] != "") { + tempBarcodePrimerComboFileNames[i][j] += extension; + ofstream temp; + m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); + temp.close(); + } } } } @@ -892,10 +934,12 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim if(allFiles){ for(int i=0;iopenOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); - temp.close(); + if (tempBarcodePrimerComboFileNames[i][j] != "") { + tempBarcodePrimerComboFileNames[i][j] += toString(processors-1) + ".temp"; + ofstream temp; + m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); + temp.close(); + } } } @@ -939,8 +983,10 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim if(allFiles){ for (int j = 0; j < barcodePrimerComboFileNames.size(); j++) { for (int k = 0; k < barcodePrimerComboFileNames[0].size(); k++) { - m->appendFiles((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp"), barcodePrimerComboFileNames[j][k]); - m->mothurRemove((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp")); + if (barcodePrimerComboFileNames[j][k] != "") { + m->appendFiles((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp"), barcodePrimerComboFileNames[j][k]); + m->mothurRemove((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp")); + } } } }