X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=screenseqscommand.cpp;h=48bf17119c98d832b0176dcab78675a94ecf80e8;hb=315e38cf393c82be238da5b32574f225a020d25c;hp=31b6bb7137579796c3a6652b283858bff435731a;hpb=0470f6d037aacb3563c3f7010708120a4a67d4e6;p=mothur.git diff --git a/screenseqscommand.cpp b/screenseqscommand.cpp index 31b6bb7..48bf171 100644 --- a/screenseqscommand.cpp +++ b/screenseqscommand.cpp @@ -14,7 +14,6 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option){ try { - globaldata = GlobalData::getInstance(); abort = false; //allow user to run help @@ -22,103 +21,135 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option){ else { //valid paramters for this command - string AlignArray[] = {"fasta", "start", "end", "maxambig", "maxhomop", "minlength", "maxlength", "name", "group"}; + string AlignArray[] = {"fasta", "start", "end", "maxambig", "maxhomop", "minlength", "maxlength", + "name", "group", "alignreport","outputdir","inputdir"}; vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); - parser = new OptionParser(); - parser->parse(option, parameters); delete parser; + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + map::iterator it; - ValidParameters* validParameter = new ValidParameters(); - //check to make sure all parameters are valid for command for (it = parameters.begin(); it != parameters.end(); it++) { - if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("fasta"); + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["fasta"] = inputDir + it->second; } + } + + it = parameters.find("group"); + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["group"] = inputDir + it->second; } + } + + it = parameters.find("name"); + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["name"] = inputDir + it->second; } + } + + it = parameters.find("alignreport"); + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["alignreport"] = inputDir + it->second; } + } + } + //check for required parameters - fastafile = validParameter->validFile(parameters, "fasta", true); - if (fastafile == "not found") { cout << "fasta is a required parameter for the screen.seqs command." << endl; abort = true; } + fastafile = validParameter.validFile(parameters, "fasta", true); + if (fastafile == "not found") { mothurOut("fasta is a required parameter for the screen.seqs command."); mothurOutEndLine(); abort = true; } else if (fastafile == "not open") { abort = true; } - else { globaldata->setFastaFile(fastafile); } - - groupfile = validParameter->validFile(parameters, "group", true); + + groupfile = validParameter.validFile(parameters, "group", true); if (groupfile == "not open") { abort = true; } else if (groupfile == "not found") { groupfile = ""; } - else { - globaldata->setGroupFile(groupfile); - } - namefile = validParameter->validFile(parameters, "name", true); + namefile = validParameter.validFile(parameters, "name", true); if (namefile == "not open") { abort = true; } else if (namefile == "not found") { namefile = ""; } - else { - globaldata->setNameFile(namefile); + + alignreport = validParameter.validFile(parameters, "alignreport", true); + if (alignreport == "not open") { abort = true; } + else if (alignreport == "not found") { alignreport = ""; } + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ + outputDir = ""; + outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it } - //check for optional parameter and set defaults // ...at some point should added some additional type checking... string temp; - temp = validParameter->validFile(parameters, "start", false); if (temp == "not found") { temp = "-1"; } + temp = validParameter.validFile(parameters, "start", false); if (temp == "not found") { temp = "-1"; } convert(temp, startPos); - temp = validParameter->validFile(parameters, "end", false); if (temp == "not found") { temp = "-1"; } + temp = validParameter.validFile(parameters, "end", false); if (temp == "not found") { temp = "-1"; } convert(temp, endPos); - temp = validParameter->validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; } + temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; } convert(temp, maxAmbig); - temp = validParameter->validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "-1"; } + temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "-1"; } convert(temp, maxHomoP); - temp = validParameter->validFile(parameters, "minlength", false); if (temp == "not found") { temp = "-1"; } + temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "-1"; } convert(temp, minLength); - temp = validParameter->validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "-1"; } + temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "-1"; } convert(temp, maxLength); - - delete validParameter; } } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the ScreenSeqsCommand class Function ScreenSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + errorOut(e, "ScreenSeqsCommand", "ScreenSeqsCommand"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the ScreenSeqsCommand class function ScreenSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } } //********************************************************************************************************************** void ScreenSeqsCommand::help(){ try { - cout << "The screen.seqs command reads a fastafile and creates ....." << "\n"; - cout << "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, and group." << "\n"; - cout << "The fasta parameter is required." << "\n"; - cout << "The start parameter .... The default is -1." << "\n"; - cout << "The end parameter .... The default is -1." << "\n"; - cout << "The maxambig parameter .... The default is -1." << "\n"; - cout << "The maxhomop parameter .... The default is -1." << "\n"; - cout << "The minlength parameter .... The default is -1." << "\n"; - cout << "The maxlength parameter .... The default is -1." << "\n"; - cout << "The name parameter allows you to provide a namesfile, and the group parameter allows you to provide a groupfile." << "\n"; - cout << "The screen.seqs command should be in the following format: " << "\n"; - cout << "screen.seqs(fasta=yourFastaFile, name=youNameFile, group=yourGroupFIle, start=yourStart, end=yourEnd, maxambig=yourMaxambig, " << "\n"; - cout << "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) " << "\n"; - cout << "Example screen.seqs(fasta=abrecovery.fasta, name=abrecovery.names, group=abrecovery.groups, start=..., end=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...)." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta)." << "\n" << "\n"; + mothurOut("The screen.seqs command reads a fastafile and creates .....\n"); + mothurOut("The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, and group.\n"); + mothurOut("The fasta parameter is required.\n"); + mothurOut("The start parameter .... The default is -1.\n"); + mothurOut("The end parameter .... The default is -1.\n"); + mothurOut("The maxambig parameter .... The default is -1.\n"); + mothurOut("The maxhomop parameter .... The default is -1.\n"); + mothurOut("The minlength parameter .... The default is -1.\n"); + mothurOut("The maxlength parameter .... The default is -1.\n"); + mothurOut("The name parameter allows you to provide a namesfile, and the group parameter allows you to provide a groupfile.\n"); + mothurOut("The screen.seqs command should be in the following format: \n"); + mothurOut("screen.seqs(fasta=yourFastaFile, name=youNameFile, group=yourGroupFIle, start=yourStart, end=yourEnd, maxambig=yourMaxambig, \n"); + mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n"); + mothurOut("Example screen.seqs(fasta=abrecovery.fasta, name=abrecovery.names, group=abrecovery.groups, start=..., end=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n"); + mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n"); } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the ScreenSeqsCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + errorOut(e, "ScreenSeqsCommand", "help"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the ScreenSeqsCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } } //*************************************************************************************************************** @@ -137,49 +168,47 @@ int ScreenSeqsCommand::execute(){ set badSeqNames; - string goodSeqFile = getRootName(fastafile) + "good" + getExtension(fastafile); - string badSeqFile = getRootName(fastafile) + "bad" + getExtension(fastafile); + string goodSeqFile = outputDir + getRootName(getSimpleName(fastafile)) + "good" + getExtension(fastafile); + string badSeqFile = outputDir + getRootName(getSimpleName(fastafile)) + "bad" + getExtension(fastafile); ofstream goodSeqOut; openOutputFile(goodSeqFile, goodSeqOut); ofstream badSeqOut; openOutputFile(badSeqFile, badSeqOut); while(!inFASTA.eof()){ Sequence currSeq(inFASTA); - bool goodSeq = 1; // innocent until proven guilty - if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos()) { goodSeq = 0; } - if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos()) { goodSeq = 0; } - if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases()) { goodSeq = 0; } - if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer()) { goodSeq = 0; } - if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases()) { goodSeq = 0; } - if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases()) { goodSeq = 0; } - - if(goodSeq == 1){ - currSeq.printSequence(goodSeqOut); - } - else{ - currSeq.printSequence(badSeqOut); - badSeqNames.insert(currSeq.getName()); + if (currSeq.getName() != "") { + bool goodSeq = 1; // innocent until proven guilty + if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos()) { goodSeq = 0; } + if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos()) { goodSeq = 0; } + if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases()) { goodSeq = 0; } + if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer()) { goodSeq = 0; } + if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases()) { goodSeq = 0; } + if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases()) { goodSeq = 0; } + + if(goodSeq == 1){ + currSeq.printSequence(goodSeqOut); + } + else{ + currSeq.printSequence(badSeqOut); + badSeqNames.insert(currSeq.getName()); + } } gobble(inFASTA); } - if(namefile != ""){ - screenNameGroupFile(badSeqNames); - } - else if(groupfile != ""){ - screenGroupFile(badSeqNames); - } + if(namefile != "" && groupfile != "") { screenNameGroupFile(badSeqNames); } // this screens both names and groups + else if(namefile != "") { screenNameGroupFile(badSeqNames); } + else if(groupfile != "") { screenGroupFile(badSeqNames); } // this screens just the groups + if(alignreport != "") { screenAlignReport(badSeqNames); } + goodSeqOut.close(); + badSeqOut.close(); + inFASTA.close(); return 0; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the ScreenSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the ScreenSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + errorOut(e, "ScreenSeqsCommand", "execute"); exit(1); } - } //*************************************************************************************************************** @@ -187,13 +216,13 @@ int ScreenSeqsCommand::execute(){ void ScreenSeqsCommand::screenNameGroupFile(set badSeqNames){ ifstream inputNames; - openInputFile(globaldata->getNameFile(), inputNames); + openInputFile(namefile, inputNames); set badSeqGroups; string seqName, seqList, group; set::iterator it; - string goodNameFile = getRootName(globaldata->getNameFile()) + "good" + getExtension(globaldata->getNameFile()); - string badNameFile = getRootName(globaldata->getNameFile()) + "bad" + getExtension(globaldata->getNameFile()); + string goodNameFile = outputDir + getRootName(getSimpleName(namefile)) + "good" + getExtension(namefile); + string badNameFile = outputDir + getRootName(getSimpleName(namefile)) + "bad" + getExtension(namefile); ofstream goodNameOut; openOutputFile(goodNameFile, goodNameOut); ofstream badNameOut; openOutputFile(badNameFile, badNameOut); @@ -205,7 +234,7 @@ void ScreenSeqsCommand::screenNameGroupFile(set badSeqNames){ if(it != badSeqNames.end()){ badSeqNames.erase(it); badNameOut << seqName << '\t' << seqList << endl; - if(globaldata->getNameFile() != ""){ + if(namefile != ""){ int start = 0; for(int i=0;i badSeqNames){ goodNameOut.close(); badNameOut.close(); - if(globaldata->getGroupFile() != ""){ + //we were unable to remove some of the bad sequences + if (badSeqNames.size() != 0) { + for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) { + mothurOut("Your namefile does not include the sequence " + *it + " please correct."); + mothurOutEndLine(); + } + } + + if(groupfile != ""){ ifstream inputGroups; - openInputFile(globaldata->getGroupFile(), inputGroups); + openInputFile(groupfile, inputGroups); - string goodGroupFile = getRootName(globaldata->getGroupFile()) + "good" + getExtension(globaldata->getGroupFile()); - string badGroupFile = getRootName(globaldata->getGroupFile()) + "bad" + getExtension(globaldata->getGroupFile()); + string goodGroupFile = outputDir + getRootName(getSimpleName(groupfile)) + "good" + getExtension(groupfile); + string badGroupFile = outputDir + getRootName(getSimpleName(groupfile)) + "bad" + getExtension(groupfile); ofstream goodGroupOut; openOutputFile(goodGroupFile, goodGroupOut); ofstream badGroupOut; openOutputFile(badGroupFile, badGroupOut); @@ -253,6 +290,14 @@ void ScreenSeqsCommand::screenNameGroupFile(set badSeqNames){ inputGroups.close(); goodGroupOut.close(); badGroupOut.close(); + + //we were unable to remove some of the bad sequences + if (badSeqGroups.size() != 0) { + for (it = badSeqGroups.begin(); it != badSeqGroups.end(); it++) { + mothurOut("Your namefile does not include the sequence " + *it + " please correct."); + mothurOutEndLine(); + } + } } } @@ -261,12 +306,12 @@ void ScreenSeqsCommand::screenNameGroupFile(set badSeqNames){ void ScreenSeqsCommand::screenGroupFile(set badSeqNames){ ifstream inputGroups; - openInputFile(globaldata->getGroupFile(), inputGroups); + openInputFile(groupfile, inputGroups); string seqName, group; set::iterator it; - string goodGroupFile = getRootName(globaldata->getGroupFile()) + "good" + getExtension(globaldata->getGroupFile()); - string badGroupFile = getRootName(globaldata->getGroupFile()) + "bad" + getExtension(globaldata->getGroupFile()); + string goodGroupFile = outputDir + getRootName(getSimpleName(groupfile)) + "good" + getExtension(groupfile); + string badGroupFile = outputDir + getRootName(getSimpleName(groupfile)) + "bad" + getExtension(groupfile); ofstream goodGroupOut; openOutputFile(goodGroupFile, goodGroupOut); ofstream badGroupOut; openOutputFile(badGroupFile, badGroupOut); @@ -284,6 +329,15 @@ void ScreenSeqsCommand::screenGroupFile(set badSeqNames){ } gobble(inputGroups); } + + //we were unable to remove some of the bad sequences + if (badSeqNames.size() != 0) { + for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) { + mothurOut("Your groupfile does not include the sequence " + *it + " please correct."); + mothurOutEndLine(); + } + } + inputGroups.close(); goodGroupOut.close(); badGroupOut.close(); @@ -292,4 +346,60 @@ void ScreenSeqsCommand::screenGroupFile(set badSeqNames){ //*************************************************************************************************************** +void ScreenSeqsCommand::screenAlignReport(set badSeqNames){ + + ifstream inputAlignReport; + openInputFile(alignreport, inputAlignReport); + string seqName, group; + set::iterator it; + + string goodAlignReportFile = outputDir + getRootName(getSimpleName(alignreport)) + "good" + getExtension(alignreport); + string badAlignReportFile = outputDir + getRootName(getSimpleName(alignreport)) + "bad" + getExtension(alignreport); + + ofstream goodAlignReportOut; openOutputFile(goodAlignReportFile, goodAlignReportOut); + ofstream badAlignReportOut; openOutputFile(badAlignReportFile, badAlignReportOut); + + while (!inputAlignReport.eof()) { // need to copy header + char c = inputAlignReport.get(); + goodAlignReportOut << c; + badAlignReportOut << c; + if (c == 10 || c == 13){ break; } + } + + while(!inputAlignReport.eof()){ + inputAlignReport >> seqName; + it = badSeqNames.find(seqName); + string line; + while (!inputAlignReport.eof()) { // need to copy header + char c = inputAlignReport.get(); + line += c; + if (c == 10 || c == 13){ break; } + } + + if(it != badSeqNames.end()){ + badSeqNames.erase(it); + badAlignReportOut << seqName << '\t' << line;; + } + else{ + goodAlignReportOut << seqName << '\t' << line; + } + gobble(inputAlignReport); + } + + //we were unable to remove some of the bad sequences + if (badSeqNames.size() != 0) { + for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) { + mothurOut("Your file does not include the sequence " + *it + " please correct."); + mothurOutEndLine(); + } + } + + inputAlignReport.close(); + goodAlignReportOut.close(); + badAlignReportOut.close(); + +} + +//*************************************************************************************************************** +