X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=screenseqscommand.cpp;h=4b267b532ae1a19be791c380339b928212477edd;hb=c53c53b0280452e1c3a087fbf807b2e508cd66c4;hp=ed71559fba582e8d5a696621ab943a4851579704;hpb=64eee3a595ae53817f52807d7393b22e74e31f56;p=mothur.git diff --git a/screenseqscommand.cpp b/screenseqscommand.cpp index ed71559..4b267b5 100644 --- a/screenseqscommand.cpp +++ b/screenseqscommand.cpp @@ -12,19 +12,99 @@ //*************************************************************************************************************** -ScreenSeqsCommand::ScreenSeqsCommand(){ +ScreenSeqsCommand::ScreenSeqsCommand(string option){ try { - globaldata = GlobalData::getInstance(); - if(globaldata->getFastaFile() == "") { cout << "you must provide a fasta formatted file" << endl; } + abort = false; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string AlignArray[] = {"fasta", "start", "end", "maxambig", "maxhomop", "minlength", "maxlength", + "name", "group", "alignreport"}; + vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + + //check to make sure all parameters are valid for command + for (map::iterator it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //check for required parameters + fastafile = validParameter.validFile(parameters, "fasta", true); + if (fastafile == "not found") { mothurOut("fasta is a required parameter for the screen.seqs command."); mothurOutEndLine(); abort = true; } + else if (fastafile == "not open") { abort = true; } + + groupfile = validParameter.validFile(parameters, "group", true); + if (groupfile == "not open") { abort = true; } + else if (groupfile == "not found") { groupfile = ""; } + + namefile = validParameter.validFile(parameters, "name", true); + if (namefile == "not open") { abort = true; } + else if (namefile == "not found") { namefile = ""; } + + alignreport = validParameter.validFile(parameters, "alignreport", true); + if (alignreport == "not open") { abort = true; } + else if (alignreport == "not found") { alignreport = ""; } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + string temp; + temp = validParameter.validFile(parameters, "start", false); if (temp == "not found") { temp = "-1"; } + convert(temp, startPos); + + temp = validParameter.validFile(parameters, "end", false); if (temp == "not found") { temp = "-1"; } + convert(temp, endPos); + + temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; } + convert(temp, maxAmbig); + + temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "-1"; } + convert(temp, maxHomoP); + + temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "-1"; } + convert(temp, minLength); + + temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "-1"; } + convert(temp, maxLength); + } + } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the ScreenSeqsCommand class Function ScreenSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + errorOut(e, "ScreenSeqsCommand", "ScreenSeqsCommand"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the ScreenSeqsCommand class function ScreenSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; +} +//********************************************************************************************************************** + +void ScreenSeqsCommand::help(){ + try { + mothurOut("The screen.seqs command reads a fastafile and creates .....\n"); + mothurOut("The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, and group.\n"); + mothurOut("The fasta parameter is required.\n"); + mothurOut("The start parameter .... The default is -1.\n"); + mothurOut("The end parameter .... The default is -1.\n"); + mothurOut("The maxambig parameter .... The default is -1.\n"); + mothurOut("The maxhomop parameter .... The default is -1.\n"); + mothurOut("The minlength parameter .... The default is -1.\n"); + mothurOut("The maxlength parameter .... The default is -1.\n"); + mothurOut("The name parameter allows you to provide a namesfile, and the group parameter allows you to provide a groupfile.\n"); + mothurOut("The screen.seqs command should be in the following format: \n"); + mothurOut("screen.seqs(fasta=yourFastaFile, name=youNameFile, group=yourGroupFIle, start=yourStart, end=yourEnd, maxambig=yourMaxambig, \n"); + mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n"); + mothurOut("Example screen.seqs(fasta=abrecovery.fasta, name=abrecovery.names, group=abrecovery.groups, start=..., end=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n"); + mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n"); + + } + catch(exception& e) { + errorOut(e, "ScreenSeqsCommand", "help"); exit(1); - } + } } //*************************************************************************************************************** @@ -35,21 +115,16 @@ ScreenSeqsCommand::~ScreenSeqsCommand(){ /* do nothing */ } int ScreenSeqsCommand::execute(){ try{ - int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength; - convert(globaldata->getStartPos(), startPos); - convert(globaldata->getEndPos(), endPos); - convert(globaldata->getMaxAmbig(), maxAmbig); - convert(globaldata->getMaxHomoPolymer(), maxHomoP); - convert(globaldata->getMinLength(), minLength); - convert(globaldata->getMaxLength(), maxLength); + if (abort == true) { return 0; } + ifstream inFASTA; - openInputFile(globaldata->getFastaFile(), inFASTA); + openInputFile(fastafile, inFASTA); set badSeqNames; - string goodSeqFile = getRootName(globaldata->getFastaFile()) + "good" + getExtension(globaldata->getFastaFile()); - string badSeqFile = getRootName(globaldata->getFastaFile()) + "bad" + getExtension(globaldata->getFastaFile()); + string goodSeqFile = getRootName(fastafile) + "good" + getExtension(fastafile); + string badSeqFile = getRootName(fastafile) + "bad" + getExtension(fastafile); ofstream goodSeqOut; openOutputFile(goodSeqFile, goodSeqOut); ofstream badSeqOut; openOutputFile(badSeqFile, badSeqOut); @@ -73,24 +148,19 @@ int ScreenSeqsCommand::execute(){ } gobble(inFASTA); } - if(globaldata->getNameFile() != ""){ - screenNameGroupFile(badSeqNames); - } - else if(globaldata->getGroupFile() != ""){ - screenGroupFile(badSeqNames); - } + if(namefile != "" && groupfile != "") { screenNameGroupFile(badSeqNames); } // this screens both names and groups + else if(groupfile != "") { screenGroupFile(badSeqNames); } // this screens just the groups + if(alignreport != "") { screenAlignReport(badSeqNames); } + goodSeqOut.close(); + badSeqOut.close(); + inFASTA.close(); return 0; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the ScreenSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the ScreenSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + errorOut(e, "ScreenSeqsCommand", "execute"); exit(1); } - } //*************************************************************************************************************** @@ -98,13 +168,13 @@ int ScreenSeqsCommand::execute(){ void ScreenSeqsCommand::screenNameGroupFile(set badSeqNames){ ifstream inputNames; - openInputFile(globaldata->getNameFile(), inputNames); + openInputFile(namefile, inputNames); set badSeqGroups; string seqName, seqList, group; set::iterator it; - string goodNameFile = getRootName(globaldata->getNameFile()) + "good" + getExtension(globaldata->getNameFile()); - string badNameFile = getRootName(globaldata->getNameFile()) + "bad" + getExtension(globaldata->getNameFile()); + string goodNameFile = getRootName(namefile) + "good" + getExtension(namefile); + string badNameFile = getRootName(namefile) + "bad" + getExtension(namefile); ofstream goodNameOut; openOutputFile(goodNameFile, goodNameOut); ofstream badNameOut; openOutputFile(badNameFile, badNameOut); @@ -116,7 +186,7 @@ void ScreenSeqsCommand::screenNameGroupFile(set badSeqNames){ if(it != badSeqNames.end()){ badSeqNames.erase(it); badNameOut << seqName << '\t' << seqList << endl; - if(globaldata->getNameFile() != ""){ + if(namefile != ""){ int start = 0; for(int i=0;i badSeqNames){ goodNameOut.close(); badNameOut.close(); - if(globaldata->getGroupFile() != ""){ + if(groupfile != ""){ ifstream inputGroups; - openInputFile(globaldata->getGroupFile(), inputGroups); + openInputFile(groupfile, inputGroups); - string goodGroupFile = getRootName(globaldata->getGroupFile()) + "good" + getExtension(globaldata->getGroupFile()); - string badGroupFile = getRootName(globaldata->getGroupFile()) + "bad" + getExtension(globaldata->getGroupFile()); + string goodGroupFile = getRootName(groupfile) + "good" + getExtension(groupfile); + string badGroupFile = getRootName(groupfile) + "bad" + getExtension(groupfile); ofstream goodGroupOut; openOutputFile(goodGroupFile, goodGroupOut); ofstream badGroupOut; openOutputFile(badGroupFile, badGroupOut); @@ -172,12 +242,12 @@ void ScreenSeqsCommand::screenNameGroupFile(set badSeqNames){ void ScreenSeqsCommand::screenGroupFile(set badSeqNames){ ifstream inputGroups; - openInputFile(globaldata->getGroupFile(), inputGroups); + openInputFile(groupfile, inputGroups); string seqName, group; set::iterator it; - string goodGroupFile = getRootName(globaldata->getGroupFile()) + "good" + getExtension(globaldata->getGroupFile()); - string badGroupFile = getRootName(globaldata->getGroupFile()) + "bad" + getExtension(globaldata->getGroupFile()); + string goodGroupFile = getRootName(groupfile) + "good" + getExtension(groupfile); + string badGroupFile = getRootName(groupfile) + "bad" + getExtension(groupfile); ofstream goodGroupOut; openOutputFile(goodGroupFile, goodGroupOut); ofstream badGroupOut; openOutputFile(badGroupFile, badGroupOut); @@ -203,4 +273,51 @@ void ScreenSeqsCommand::screenGroupFile(set badSeqNames){ //*************************************************************************************************************** +void ScreenSeqsCommand::screenAlignReport(set badSeqNames){ + + ifstream inputAlignReport; + openInputFile(alignreport, inputAlignReport); + string seqName, group; + set::iterator it; + + string goodAlignReportFile = getRootName(alignreport) + "good" + getExtension(alignreport); + string badAlignReportFile = getRootName(alignreport) + "bad" + getExtension(alignreport); + + ofstream goodAlignReportOut; openOutputFile(goodAlignReportFile, goodAlignReportOut); + ofstream badAlignReportOut; openOutputFile(badAlignReportFile, badAlignReportOut); + + while (!inputAlignReport.eof()) { // need to copy header + char c = inputAlignReport.get(); + goodAlignReportOut << c; + badAlignReportOut << c; + if (c == 10 || c == 13){ break; } + } + + while(!inputAlignReport.eof()){ + inputAlignReport >> seqName; + it = badSeqNames.find(seqName); + string line; + while (!inputAlignReport.eof()) { // need to copy header + char c = inputAlignReport.get(); + line += c; + if (c == 10 || c == 13){ break; } + } + + if(it != badSeqNames.end()){ + badSeqNames.erase(it); + badAlignReportOut << seqName << '\t' << line;; + } + else{ + goodAlignReportOut << seqName << '\t' << line; + } + gobble(inputAlignReport); + } + inputAlignReport.close(); + goodAlignReportOut.close(); + badAlignReportOut.close(); + +} + +//*************************************************************************************************************** +