From 462fca6d24fd15ca035358ff70bcfae52c3281c3 Mon Sep 17 00:00:00 2001 From: westcott Date: Fri, 18 Sep 2009 17:38:21 +0000 Subject: [PATCH] adding more error checking for list and group files. outputs missing.names or missing.group if you have files with different number of sequences in them --- Mothur.xcodeproj/project.pbxproj | 6 ++ groupmap.cpp | 20 +++++ groupmap.h | 1 + readotucommand.cpp | 20 +++-- secondarystructurecommand.cpp | 128 +++++++++++++++++++++++++++++++ secondarystructurecommand.h | 51 ++++++++++++ sharedcommand.cpp | 87 ++++++++++++++++++++- sharedcommand.h | 1 + 8 files changed, 307 insertions(+), 7 deletions(-) create mode 100644 secondarystructurecommand.cpp create mode 100644 secondarystructurecommand.h diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index fda0adc..7f74d48 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -162,6 +162,7 @@ A70B53AA0F4CD7AD0064797E /* getgroupcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70B53A40F4CD7AD0064797E /* getgroupcommand.cpp */; }; A70B53AB0F4CD7AD0064797E /* getlabelcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70B53A60F4CD7AD0064797E /* getlabelcommand.cpp */; }; A70B53AC0F4CD7AD0064797E /* getlinecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70B53A80F4CD7AD0064797E /* getlinecommand.cpp */; }; + A70DECD91063D8B40057C03C /* secondarystructurecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70DECD81063D8B40057C03C /* secondarystructurecommand.cpp */; }; A7283FF81056CAE100D0CC69 /* chimeracheckrdp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */; }; A75B887E104C16860083C454 /* ccode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75B887B104C16860083C454 /* ccode.cpp */; }; EB1216880F619B83004A865F /* bergerparker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = EB1216870F619B83004A865F /* bergerparker.cpp */; }; @@ -515,6 +516,8 @@ A70B53A70F4CD7AD0064797E /* getlabelcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getlabelcommand.h; sourceTree = SOURCE_ROOT; }; A70B53A80F4CD7AD0064797E /* getlinecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getlinecommand.cpp; sourceTree = SOURCE_ROOT; }; A70B53A90F4CD7AD0064797E /* getlinecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getlinecommand.h; sourceTree = SOURCE_ROOT; }; + A70DECD71063D8B40057C03C /* secondarystructurecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = secondarystructurecommand.h; sourceTree = ""; }; + A70DECD81063D8B40057C03C /* secondarystructurecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = secondarystructurecommand.cpp; sourceTree = ""; }; A7283FF61056CAE100D0CC69 /* chimeracheckrdp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeracheckrdp.h; sourceTree = SOURCE_ROOT; }; A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeracheckrdp.cpp; sourceTree = SOURCE_ROOT; }; A75B887B104C16860083C454 /* ccode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ccode.cpp; sourceTree = SOURCE_ROOT; }; @@ -866,6 +869,8 @@ 37B73CBF1004EB38008C4B41 /* removeseqscommand.cpp */, 7E09C5120FDA79C5002ECAE5 /* reversecommand.h */, 7E09C5130FDA79C5002ECAE5 /* reversecommand.cpp */, + A70DECD71063D8B40057C03C /* secondarystructurecommand.h */, + A70DECD81063D8B40057C03C /* secondarystructurecommand.cpp */, 371B30B30FD7EE67000414CA /* screenseqscommand.h */, 371B30B20FD7EE67000414CA /* screenseqscommand.cpp */, 3799A94F0FD6A58C00E33EDE /* seqsummarycommand.h */, @@ -1179,6 +1184,7 @@ 372095C2103196D70004D347 /* chimera.cpp in Sources */, A75B887E104C16860083C454 /* ccode.cpp in Sources */, A7283FF81056CAE100D0CC69 /* chimeracheckrdp.cpp in Sources */, + A70DECD91063D8B40057C03C /* secondarystructurecommand.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/groupmap.cpp b/groupmap.cpp index dd6f924..6521128 100644 --- a/groupmap.cpp +++ b/groupmap.cpp @@ -85,3 +85,23 @@ bool GroupMap::isValidGroup(string groupname) { exit(1); } } + +/************************************************************/ +vector GroupMap::getNamesSeqs(){ + try { + + vector names; + + for (it = groupmap.begin(); it != groupmap.end(); it++) { + names.push_back(it->first); + } + + return names; + } + catch(exception& e) { + errorOut(e, "GroupMap", "getNamesSeqs"); + exit(1); + } +} +/************************************************************/ + diff --git a/groupmap.h b/groupmap.h index 54225e2..e52ada5 100644 --- a/groupmap.h +++ b/groupmap.h @@ -27,6 +27,7 @@ public: vector namesOfGroups; map groupIndex; //groupname, vectorIndex in namesOfGroups. - used by collectdisplays. int getNumSeqs() { return groupmap.size(); } + vector getNamesSeqs(); private: ifstream fileHandle; diff --git a/readotucommand.cpp b/readotucommand.cpp index bb866ed..999d8de 100644 --- a/readotucommand.cpp +++ b/readotucommand.cpp @@ -159,14 +159,22 @@ int ReadOtuCommand::execute(){ if (globaldata->getFormat() == "shared") { shared = new SharedCommand(); - shared->execute(); + int okay = shared->execute(); delete shared; + + //problem with shared + if (okay == 1) { + globaldata->setListFile(""); + globaldata->setGroupFile(""); + globaldata->setSharedFile(""); + }else{ - //change format to shared to speed up commands - globaldata->setFormat("sharedfile"); - globaldata->setListFile(""); - globaldata->setGroupFile(""); - globaldata->setSharedFile(getRootName(filename) + "shared"); + //change format to shared to speed up commands + globaldata->setFormat("sharedfile"); + globaldata->setListFile(""); + globaldata->setGroupFile(""); + globaldata->setSharedFile(getRootName(filename) + "shared"); + } } return 0; } diff --git a/secondarystructurecommand.cpp b/secondarystructurecommand.cpp new file mode 100644 index 0000000..48ea154 --- /dev/null +++ b/secondarystructurecommand.cpp @@ -0,0 +1,128 @@ +/* + * secondarystructurecommand.cpp + * Mothur + * + * Created by westcott on 9/18/09. + * Copyright 2009 Schloss Lab. All rights reserved. + * + */ + +#include "secondarystructurecommand.h" +#include "sequence.hpp" + +//********************************************************************************************************************** + +AlignCheckCommand::AlignCheckCommand(string option){ + try { + abort = false; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"fasta","map"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + + //check to make sure all parameters are valid for command + for (map::iterator it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //check for required parameters + mapfile = validParameter.validFile(parameters, "map", true); + if (mapfile == "not open") { abort = true; } + else if (mapfile == "not found") { mapfile = ""; mothurOut("You must provide an map file."); mothurOutEndLine(); abort = true; } + + fastafile = validParameter.validFile(parameters, "fasta", true); + if (fastafile == "not open") { abort = true; } + else if (fastafile == "not found") { fastafile = ""; mothurOut("You must provide an fasta file."); mothurOutEndLine(); abort = true; } + + } + + } + catch(exception& e) { + errorOut(e, "AlignCheckCommand", "RemoveSeqsCommand"); + exit(1); + } +} +//********************************************************************************************************************** + +void AlignCheckCommand::help(){ + try { + //mothurOut("The remove.seqs command reads an .accnos file and one of the following file types: fasta, name, group or alignreport file.\n"); + //mothurOut("It outputs a file containing the sequences NOT in the .accnos file.\n"); + //mothurOut("The remove.seqs command parameters are accnos, fasta, name, group and alignreport. You must provide accnos and one of the other parameters.\n"); + //mothurOut("The remove.seqs command should be in the following format: remove.seqs(accnos=yourAccnos, fasta=yourFasta).\n"); + //mothurOut("Example remove.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n"); + //mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n"); + } + catch(exception& e) { + errorOut(e, "AlignCheckCommand", "help"); + exit(1); + } +} + +//********************************************************************************************************************** + +int AlignCheckCommand::execute(){ + try { + + if (abort == true) { return 0; } + + //get secondary structure info. + readMap(); + + + + return 0; + } + + catch(exception& e) { + errorOut(e, "AlignCheckCommand", "execute"); + exit(1); + } +} +//********************************************************************************************************************** +void AlignCheckCommand::readMap(){ + try { + + structMap.resize(1, 0); + ifstream in; + + openInputFile(mapfile, in); + + while(!in.eof()){ + int position; + in >> position; + structMap.push_back(position); + gobble(in); + } + in.close(); + + seqLength = structMap.size(); + + + //check you make sure is structMap[10] = 380 then structMap[380] = 10. + for(int i=0;i structMap; + string mapfile, fastafile; + bool abort; + int seqLength; + + void readMap(); +}; + +/**************************************************************************************************/ +#endif + diff --git a/sharedcommand.cpp b/sharedcommand.cpp index 717d3aa..aba07b2 100644 --- a/sharedcommand.cpp +++ b/sharedcommand.cpp @@ -67,6 +67,8 @@ int SharedCommand::execute(){ if (SharedList->getNumSeqs() != groupMap->getNumSeqs()) { mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); mothurOutEndLine(); + createMisMatchFile(); + //delete memory for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; @@ -74,7 +76,7 @@ int SharedCommand::execute(){ delete SharedList; globaldata->gSharedList = NULL; - return(0); + return 1; } //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. @@ -181,6 +183,89 @@ void SharedCommand::printSharedData(vector thislookup) { exit(1); } } +//********************************************************************************************************************** +void SharedCommand::createMisMatchFile() { + try { + ofstream outMisMatch; + string outputMisMatchName = getRootName(globaldata->inputFileName); + + //you have sequences in your list file that are not in your group file + if (SharedList->getNumSeqs() > groupMap->getNumSeqs()) { + outputMisMatchName += "missing.group"; + mothurOut("For a list of names that are in your list file and not in your group file, please refer to " + outputMisMatchName + "."); mothurOutEndLine(); + + openOutputFile(outputMisMatchName, outMisMatch); + + //go through list and if group returns "not found" output it + for (int i = 0; i < SharedList->getNumBins(); i++) { + + string names = SharedList->get(i); + + while (names.find_first_of(',') != -1) { + string name = names.substr(0,names.find_first_of(',')); + names = names.substr(names.find_first_of(',')+1, names.length()); + string group = groupMap->getGroup(name); + + if(group == "not found") { outMisMatch << name << endl; } + } + + //get last name + string group = groupMap->getGroup(names); + if(group == "not found") { outMisMatch << names << endl; } + } + + outMisMatch.close(); + + + }else {//you have sequences in your group file that are not in you list file + + outputMisMatchName += "missing.name"; + mothurOut("For a list of names that are in your group file and not in your list file, please refer to " + outputMisMatchName + "."); mothurOutEndLine(); + + map namesInList; + + //go through listfile and get names + for (int i = 0; i < SharedList->getNumSeqs(); i++) { + + string names = SharedList->get(i); + + while (names.find_first_of(',') != -1) { + string name = names.substr(0,names.find_first_of(',')); + names = names.substr(names.find_first_of(',')+1, names.length()); + + namesInList[name] = name; + } + + //get last name + namesInList[names] = names; + } + + //get names of sequences in groupfile + vector seqNames = groupMap->getNamesSeqs(); + + map::iterator itMatch; + + openOutputFile(outputMisMatchName, outMisMatch); + + //loop through names in seqNames and if they aren't in namesIn list output them + for (int i = 0; i < seqNames.size(); i++) { + + itMatch = namesInList.find(seqNames[i]); + + if (itMatch == namesInList.end()) { + + outMisMatch << seqNames[i] << endl; + } + } + outMisMatch.close(); + } + + } + catch(exception& e) { + errorOut(e, "SharedCommand", "createMisMatchFile"); + exit(1); + } +} //********************************************************************************************************************** diff --git a/sharedcommand.h b/sharedcommand.h index 3467c7b..f0c2488 100644 --- a/sharedcommand.h +++ b/sharedcommand.h @@ -33,6 +33,7 @@ public: private: void printSharedData(vector); + void createMisMatchFile(); GlobalData* globaldata; ReadOTUFile* read; SharedListVector* SharedList; -- 2.39.2