]> git.donarmstrong.com Git - mothur.git/commitdiff
adding more error checking for list and group files. outputs missing.names or missin...
authorwestcott <westcott>
Fri, 18 Sep 2009 17:38:21 +0000 (17:38 +0000)
committerwestcott <westcott>
Fri, 18 Sep 2009 17:38:21 +0000 (17:38 +0000)
Mothur.xcodeproj/project.pbxproj
groupmap.cpp
groupmap.h
readotucommand.cpp
secondarystructurecommand.cpp [new file with mode: 0644]
secondarystructurecommand.h [new file with mode: 0644]
sharedcommand.cpp
sharedcommand.h

index fda0adcb25e16f7388a8aad0dd67cf8b37373e03..7f74d48b66e3c8251cfedba52695f71b2457e720 100644 (file)
                A70B53AA0F4CD7AD0064797E /* getgroupcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70B53A40F4CD7AD0064797E /* getgroupcommand.cpp */; };
                A70B53AB0F4CD7AD0064797E /* getlabelcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70B53A60F4CD7AD0064797E /* getlabelcommand.cpp */; };
                A70B53AC0F4CD7AD0064797E /* getlinecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70B53A80F4CD7AD0064797E /* getlinecommand.cpp */; };
+               A70DECD91063D8B40057C03C /* secondarystructurecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70DECD81063D8B40057C03C /* secondarystructurecommand.cpp */; };
                A7283FF81056CAE100D0CC69 /* chimeracheckrdp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */; };
                A75B887E104C16860083C454 /* ccode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75B887B104C16860083C454 /* ccode.cpp */; };
                EB1216880F619B83004A865F /* bergerparker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = EB1216870F619B83004A865F /* bergerparker.cpp */; };
                A70B53A70F4CD7AD0064797E /* getlabelcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getlabelcommand.h; sourceTree = SOURCE_ROOT; };
                A70B53A80F4CD7AD0064797E /* getlinecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getlinecommand.cpp; sourceTree = SOURCE_ROOT; };
                A70B53A90F4CD7AD0064797E /* getlinecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getlinecommand.h; sourceTree = SOURCE_ROOT; };
+               A70DECD71063D8B40057C03C /* secondarystructurecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = secondarystructurecommand.h; sourceTree = "<group>"; };
+               A70DECD81063D8B40057C03C /* secondarystructurecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = secondarystructurecommand.cpp; sourceTree = "<group>"; };
                A7283FF61056CAE100D0CC69 /* chimeracheckrdp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeracheckrdp.h; sourceTree = SOURCE_ROOT; };
                A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeracheckrdp.cpp; sourceTree = SOURCE_ROOT; };
                A75B887B104C16860083C454 /* ccode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ccode.cpp; sourceTree = SOURCE_ROOT; };
                                37B73CBF1004EB38008C4B41 /* removeseqscommand.cpp */,
                                7E09C5120FDA79C5002ECAE5 /* reversecommand.h */,
                                7E09C5130FDA79C5002ECAE5 /* reversecommand.cpp */,
+                               A70DECD71063D8B40057C03C /* secondarystructurecommand.h */,
+                               A70DECD81063D8B40057C03C /* secondarystructurecommand.cpp */,
                                371B30B30FD7EE67000414CA /* screenseqscommand.h */,
                                371B30B20FD7EE67000414CA /* screenseqscommand.cpp */,
                                3799A94F0FD6A58C00E33EDE /* seqsummarycommand.h */,
                                372095C2103196D70004D347 /* chimera.cpp in Sources */,
                                A75B887E104C16860083C454 /* ccode.cpp in Sources */,
                                A7283FF81056CAE100D0CC69 /* chimeracheckrdp.cpp in Sources */,
+                               A70DECD91063D8B40057C03C /* secondarystructurecommand.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index dd6f924ccdf661f4c376020e7f5459bc840cf895..6521128016900efe90b8f85b82a808f9f8791709 100644 (file)
@@ -85,3 +85,23 @@ bool GroupMap::isValidGroup(string groupname) {
                exit(1);
        }
 }
+
+/************************************************************/
+vector<string> GroupMap::getNamesSeqs(){
+       try {
+       
+               vector<string> names;
+               
+               for (it = groupmap.begin(); it != groupmap.end(); it++) {
+                       names.push_back(it->first);
+               }
+               
+               return names;
+       }
+       catch(exception& e) {
+               errorOut(e, "GroupMap", "getNamesSeqs");
+               exit(1);
+       }
+}
+/************************************************************/
+
index 54225e22a0ffab29c3ef7081aec8e75257d6d2d7..e52ada56056e14cf8df6f4a2b7e541d7c8b1fcc7 100644 (file)
@@ -27,6 +27,7 @@ public:
        vector<string> namesOfGroups;
        map<string, int> groupIndex;  //groupname, vectorIndex in namesOfGroups. - used by collectdisplays.
        int getNumSeqs()  {  return groupmap.size();  }
+       vector<string> getNamesSeqs();
                        
 private:
        ifstream fileHandle;
index bb866ed9364557765a38ad78cf1bb047ea502fc9..999d8de95ecdda62b411e3d5fab11092d3105636 100644 (file)
@@ -159,14 +159,22 @@ int ReadOtuCommand::execute(){
                if (globaldata->getFormat() == "shared") {
                        
                        shared = new SharedCommand();
-                       shared->execute();
+                       int okay = shared->execute();
                        delete shared;
+                       
+                       //problem with shared
+                       if (okay == 1) {
+                               globaldata->setListFile("");
+                               globaldata->setGroupFile("");
+                               globaldata->setSharedFile("");
+                       }else{
                                
-                       //change format to shared  to speed up commands
-                       globaldata->setFormat("sharedfile");
-                       globaldata->setListFile("");
-                       globaldata->setGroupFile("");
-                       globaldata->setSharedFile(getRootName(filename) + "shared");
+                               //change format to shared  to speed up commands
+                               globaldata->setFormat("sharedfile");
+                               globaldata->setListFile("");
+                               globaldata->setGroupFile("");
+                               globaldata->setSharedFile(getRootName(filename) + "shared");
+                       }
                }
                return 0;
        }
diff --git a/secondarystructurecommand.cpp b/secondarystructurecommand.cpp
new file mode 100644 (file)
index 0000000..48ea154
--- /dev/null
@@ -0,0 +1,128 @@
+/*
+ *  secondarystructurecommand.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 9/18/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "secondarystructurecommand.h"
+#include "sequence.hpp"
+
+//**********************************************************************************************************************
+
+AlignCheckCommand::AlignCheckCommand(string option){
+       try {
+               abort = false;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"fasta","map"};
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //check for required parameters
+                       mapfile = validParameter.validFile(parameters, "map", true);
+                       if (mapfile == "not open") { abort = true; }
+                       else if (mapfile == "not found") {  mapfile = "";  mothurOut("You must provide an map file."); mothurOutEndLine(); abort = true; }      
+                       
+                       fastafile = validParameter.validFile(parameters, "fasta", true);
+                       if (fastafile == "not open") { abort = true; }
+                       else if (fastafile == "not found") {  fastafile = "";  mothurOut("You must provide an fasta file."); mothurOutEndLine(); abort = true;  }       
+                       
+               }
+
+       }
+       catch(exception& e) {
+               errorOut(e, "AlignCheckCommand", "RemoveSeqsCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+void AlignCheckCommand::help(){
+       try {
+               //mothurOut("The remove.seqs command reads an .accnos file and one of the following file types: fasta, name, group or alignreport file.\n");
+               //mothurOut("It outputs a file containing the sequences NOT in the .accnos file.\n");
+               //mothurOut("The remove.seqs command parameters are accnos, fasta, name, group and alignreport.  You must provide accnos and one of the other parameters.\n");
+               //mothurOut("The remove.seqs command should be in the following format: remove.seqs(accnos=yourAccnos, fasta=yourFasta).\n");
+               //mothurOut("Example remove.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n");
+               //mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
+       }
+       catch(exception& e) {
+               errorOut(e, "AlignCheckCommand", "help");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+int AlignCheckCommand::execute(){
+       try {
+               
+               if (abort == true) { return 0; }
+               
+               //get secondary structure info.
+               readMap();
+               
+       
+               
+               return 0;               
+       }
+
+       catch(exception& e) {
+               errorOut(e, "AlignCheckCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+void AlignCheckCommand::readMap(){
+       try {
+                       
+               structMap.resize(1, 0);
+               ifstream in;
+               
+               openInputFile(mapfile, in);
+               
+               while(!in.eof()){
+                       int position;
+                       in >> position;
+                       structMap.push_back(position);  
+                       gobble(in);
+               }
+               in.close();
+               
+               seqLength = structMap.size();
+               
+               
+               //check you make sure is structMap[10] = 380 then structMap[380] = 10.
+               for(int i=0;i<seqLength;i++){
+                       if(structMap[i] != 0){
+                               if(structMap[structMap[i]] != i){
+                                       mothurOut("Your map file contains an error:  line " + toString(i) + " does not match line " + toString(structMap[i]) + "."); mothurOutEndLine();
+                               }
+                       }
+               }
+               
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "AlignCheckCommand", "readFasta");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
diff --git a/secondarystructurecommand.h b/secondarystructurecommand.h
new file mode 100644 (file)
index 0000000..d94cc95
--- /dev/null
@@ -0,0 +1,51 @@
+#ifndef SECONDARYSTRUCTURECHECKERCOMMAND_H
+#define SECONDARYSTRUCTURECHECKERCOMMAND_H
+
+/*
+ *  secondarystructurecommand.h
+ *  Mothur
+ *
+ *  Created by westcott on 9/18/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+#include "command.hpp"
+
+/**************************************************************************************************/
+
+struct statData {
+       int pound;
+       int tilde;
+       int dash;
+       int plus;
+       int equal;
+       int loop;
+       int total;
+       statData() : pound(0), loop(0), tilde(0), dash(0), plus(0), equal(0), total(0) {};      
+};
+
+/**************************************************************************************************/
+
+
+class AlignCheckCommand : public Command {
+       
+       public:
+       
+               AlignCheckCommand(string);      
+               ~AlignCheckCommand(){};
+               int execute();
+               void help();    
+               
+       private:
+               vector<int> structMap;
+               string mapfile, fastafile;
+               bool abort;
+               int seqLength;
+               
+               void readMap();
+};
+
+/**************************************************************************************************/
+#endif
+
index 717d3aaba162daad61bfcd053f672a3f5f685382..aba07b2a8d97503913cff85fedde5d8562ca4f5c 100644 (file)
@@ -67,6 +67,8 @@ int SharedCommand::execute(){
                if (SharedList->getNumSeqs() != groupMap->getNumSeqs()) {  
                        mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); mothurOutEndLine(); 
                        
+                       createMisMatchFile();
+                       
                        //delete memory
                        for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
                                delete it3->second;
@@ -74,7 +76,7 @@ int SharedCommand::execute(){
                        delete SharedList;
                        globaldata->gSharedList = NULL;
                        
-                       return(0)
+                       return 1
                }
                
                //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
@@ -181,6 +183,89 @@ void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
                exit(1);
        }
 }
+//**********************************************************************************************************************
+void SharedCommand::createMisMatchFile() {
+       try {
+               ofstream outMisMatch;
+               string outputMisMatchName = getRootName(globaldata->inputFileName);
+               
+               //you have sequences in your list file that are not in your group file
+               if (SharedList->getNumSeqs() > groupMap->getNumSeqs()) { 
+                       outputMisMatchName += "missing.group";
+                       mothurOut("For a list of names that are in your list file and not in your group file, please refer to " + outputMisMatchName + "."); mothurOutEndLine();
+                       
+                       openOutputFile(outputMisMatchName, outMisMatch);
+                       
+                       //go through list and if group returns "not found" output it
+                       for (int i = 0; i < SharedList->getNumBins(); i++) {
+                       
+                               string names = SharedList->get(i); 
+                               
+                               while (names.find_first_of(',') != -1) { 
+                                       string name = names.substr(0,names.find_first_of(','));
+                                       names = names.substr(names.find_first_of(',')+1, names.length());
+                                       string group = groupMap->getGroup(name);
+                                       
+                                       if(group == "not found") {      outMisMatch << name << endl;  }
+                               }
+                               
+                               //get last name
+                               string group = groupMap->getGroup(names);
+                               if(group == "not found") {      outMisMatch << names << endl;  }                                
+                       }
+                       
+                       outMisMatch.close();
+                       
+               
+               }else {//you have sequences in your group file that are not in you list file
+                       
+                       outputMisMatchName += "missing.name";
+                       mothurOut("For a list of names that are in your group file and not in your list file, please refer to " + outputMisMatchName + "."); mothurOutEndLine();
+                       
+                       map<string, string> namesInList;
+                       
+                       //go through listfile and get names
+                       for (int i = 0; i < SharedList->getNumSeqs(); i++) {
+                               
+                               string names = SharedList->get(i); 
+                               
+                               while (names.find_first_of(',') != -1) { 
+                                       string name = names.substr(0,names.find_first_of(','));
+                                       names = names.substr(names.find_first_of(',')+1, names.length());
+                                       
+                                       namesInList[name] = name;
+                               }
+                               
+                               //get last name
+                               namesInList[names] = names;                             
+                       }
+                       
+                       //get names of sequences in groupfile
+                       vector<string> seqNames = groupMap->getNamesSeqs();
+               
+                       map<string, string>::iterator itMatch;
+                       
+                       openOutputFile(outputMisMatchName, outMisMatch);
+                       
+                       //loop through names in seqNames and if they aren't in namesIn list output them
+                       for (int i = 0; i < seqNames.size(); i++) {
+                               
+                               itMatch = namesInList.find(seqNames[i]);
+                               
+                               if (itMatch == namesInList.end()) {
+                               
+                                       outMisMatch << seqNames[i] << endl; 
+                               }
+                       }               
+                       outMisMatch.close();
+               }
+       }
+       catch(exception& e) {
+               errorOut(e, "SharedCommand", "createMisMatchFile");
+               exit(1);
+       }
+}
 
 //**********************************************************************************************************************
 
index 3467c7b1d9e6beb1bcd2b5af9a63f9d9bbaf8045..f0c24884f05d36f3ed5981ef0ef75a7c57e17a8a 100644 (file)
@@ -33,6 +33,7 @@ public:
        
 private:
        void printSharedData(vector<SharedRAbundVector*>);
+       void createMisMatchFile();
        GlobalData* globaldata;
        ReadOTUFile* read;
        SharedListVector* SharedList;