]> git.donarmstrong.com Git - mothur.git/blobdiff - groupmap.cpp
added rename.seqs command.
[mothur.git] / groupmap.cpp
index 0e0be0e8cf2799527e2664bb79b1cdded09e215c..9b8aa3f8abc87ef9ff93520a80b3da9169d406f4 100644 (file)
 
 /************************************************************/
  GroupMap::~GroupMap(){}
-
 /************************************************************/
 int GroupMap::readMap() {
-               string seqName, seqGroup;
+    try {
+        string seqName, seqGroup;
+               int error = 0;
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
+    
+        while (!fileHandle.eof()) {
+            if (m->control_pressed) { fileHandle.close();  return 1; }
+        
+            fileHandle.read(buffer, 4096);
+            vector<string> pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount());
+        
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  seqName = pieces[i]; columnOne=false; }
+                else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
+            
+                if (pairDone) { 
+                    setNamesOfGroups(seqGroup);
+                    
+                    if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
+                    m->checkName(seqName);
+                    it = groupmap.find(seqName);
+                    
+                    if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
+                    else {
+                        groupmap[seqName] = seqGroup;  //store data in map
+                        seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
+                    }
+                    pairDone = false; 
+                } 
+            }
+        }
+               fileHandle.close();
+        
+        if (rest != "") {
+            vector<string> pieces = m->splitWhiteSpace(rest);
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  seqName = pieces[i]; columnOne=false; }
+                else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    setNamesOfGroups(seqGroup);
+                    
+                    if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
+                    m->checkName(seqName);
+                    it = groupmap.find(seqName);
+                    
+                    if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
+                    else {
+                        groupmap[seqName] = seqGroup;  //store data in map
+                        seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
+                    }
+                    pairDone = false; 
+                } 
+            }
+        }
+        
+               m->setAllGroups(namesOfGroups);
+               return error;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "GroupMap", "readMap");
+               exit(1);
+       }
+}
+/************************************************************/
+int GroupMap::readDesignMap() {
+    try {
+        string seqName, seqGroup;
                int error = 0;
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
+        
+        while (!fileHandle.eof()) {
+            if (m->control_pressed) { fileHandle.close();  return 1; }
+            
+            fileHandle.read(buffer, 4096);
+            vector<string> pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount());
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  seqName = pieces[i]; columnOne=false; }
+                else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    setNamesOfGroups(seqGroup);
+                    
+                    if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
+                    m->checkName(seqName);
+                    it = groupmap.find(seqName);
+                    
+                    if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
+                    else {
+                        groupmap[seqName] = seqGroup;  //store data in map
+                        seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
+                    }
+                    pairDone = false; 
+                } 
+            }
+        }
+               fileHandle.close();
+        
+        if (rest != "") {
+            vector<string> pieces = m->splitWhiteSpace(rest);
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  seqName = pieces[i]; columnOne=false; }
+                else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    setNamesOfGroups(seqGroup);
+                    
+                    if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
+                    m->checkName(seqName);
+                    it = groupmap.find(seqName);
+                    
+                    if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
+                    else {
+                        groupmap[seqName] = seqGroup;  //store data in map
+                        seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
+                    }
+                    pairDone = false; 
+                } 
+            }
 
-               while(fileHandle){
-                       fileHandle >> seqName;  m->gobble(fileHandle);          //read from first column
-                       fileHandle >> seqGroup;                 //read from second column
-                       
-                       if (m->control_pressed) {  fileHandle.close();  return 1; }
-       
-                       setNamesOfGroups(seqGroup);
-                       
-                       it = groupmap.find(seqName);
-                       
-                       if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
-                       else {
-                               groupmap[seqName] = seqGroup;   //store data in map
-                               seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
-                       }
-                       m->gobble(fileHandle);
-               }
+        }
+        
+               m->setAllGroups(namesOfGroups);
+               return error;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "GroupMap", "readDesignMap");
+               exit(1);
+       }
+}
+/************************************************************/
+int GroupMap::readMap(string filename) {
+    try {
+        groupFileName = filename;
+        m->openInputFile(filename, fileHandle);
+        index = 0;
+        string seqName, seqGroup;
+               int error = 0;
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
+        
+        while (!fileHandle.eof()) {
+            if (m->control_pressed) { fileHandle.close();  return 1; }
+            
+            fileHandle.read(buffer, 4096);
+            vector<string> pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount());
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  seqName = pieces[i]; columnOne=false; }
+                else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    setNamesOfGroups(seqGroup);
+                    
+                    if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
+                    m->checkName(seqName);
+                    it = groupmap.find(seqName);
+                    
+                    if (it != groupmap.end()) { error = 1; m->mothurOut("Your group file contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
+                    else {
+                        groupmap[seqName] = seqGroup;  //store data in map
+                        seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
+                    }
+                    pairDone = false; 
+                } 
+            }
+        }
                fileHandle.close();
+        
+        if (rest != "") {
+            vector<string> pieces = m->splitWhiteSpace(rest);
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  seqName = pieces[i]; columnOne=false; }
+                else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    setNamesOfGroups(seqGroup);
+                    
+                    if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
+                    m->checkName(seqName);
+                    it = groupmap.find(seqName);
+                    
+                    if (it != groupmap.end()) { error = 1; m->mothurOut("Your group file contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
+                    else {
+                        groupmap[seqName] = seqGroup;  //store data in map
+                        seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
+                    }
+                    pairDone = false; 
+                } 
+            }
+        }
+        
+               m->setAllGroups(namesOfGroups);
                return error;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "GroupMap", "readMap");
+               exit(1);
+       }
+}
+/************************************************************/
+int GroupMap::readDesignMap(string filename) {
+    try {
+        groupFileName = filename;
+        m->openInputFile(filename, fileHandle);
+        index = 0;
+        string seqName, seqGroup;
+               int error = 0;
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
+        
+        while (!fileHandle.eof()) {
+            if (m->control_pressed) { fileHandle.close();  return 1; }
+            
+            fileHandle.read(buffer, 4096);
+            vector<string> pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount());
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  seqName = pieces[i]; columnOne=false; }
+                else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    setNamesOfGroups(seqGroup);
+                    
+                    if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
+                    m->checkName(seqName);
+                    it = groupmap.find(seqName);
+                    
+                    if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
+                    else {
+                        groupmap[seqName] = seqGroup;  //store data in map
+                        seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
+                    }
+                    pairDone = false; 
+                } 
+            }
+        }
+               fileHandle.close();
+        
+        if (rest != "") {
+            vector<string> pieces = m->splitWhiteSpace(rest);
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  seqName = pieces[i]; columnOne=false; }
+                else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    setNamesOfGroups(seqGroup);
+                    
+                    if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
+                    m->checkName(seqName);
+                    it = groupmap.find(seqName);
+                    
+                    if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
+                    else {
+                        groupmap[seqName] = seqGroup;  //store data in map
+                        seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
+                    }
+                    pairDone = false; 
+                } 
+            }
+        }
+        
+               m->setAllGroups(namesOfGroups);
+               return error;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "GroupMap", "readDesignMap");
+               exit(1);
+       }
 }
 /************************************************************/
 int GroupMap::getNumGroups() { return namesOfGroups.size();    }
@@ -56,6 +317,10 @@ string GroupMap::getGroup(string sequenceName) {
        if (it != groupmap.end()) { //sequence name was in group file
                return it->second;      
        }else {
+        //look for it in names of groups to see if the user accidently used the wrong file
+        if (m->inUsersGroups(sequenceName, namesOfGroups)) {
+            m->mothurOut("[WARNING]: Your group or design file contains a group named " + sequenceName + ".  Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group."); m->mothurOutEndLine(); 
+        }
                return "not found";
        }
 }
@@ -63,7 +328,15 @@ string GroupMap::getGroup(string sequenceName) {
 /************************************************************/
 
 void GroupMap::setGroup(string sequenceName, string groupN) {
-       groupmap[sequenceName] = groupN;
+       setNamesOfGroups(groupN);
+       m->checkName(sequenceName);
+       it = groupmap.find(sequenceName);
+       
+       if (it != groupmap.end()) {  m->mothurOut("Your groupfile contains more than 1 sequence named " + sequenceName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
+       else {
+               groupmap[sequenceName] = groupN;        //store data in map
+               seqsPerGroup[groupN]++;  //increment number of seqs in that group
+       }
 }
 
 /************************************************************/
@@ -99,6 +372,22 @@ bool GroupMap::isValidGroup(string groupname) {
        }
 }
 /************************************************************/
+int GroupMap::getCopy(GroupMap* g) {
+       try {
+        vector<string> names = g->getNamesSeqs();
+        for (int i = 0; i < names.size(); i++) {
+            if (m->control_pressed) { break; }
+            string group = g->getGroup(names[i]);
+            setGroup(names[i], group);
+        }
+        return names.size();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GroupMap", "getCopy");
+               exit(1);
+       }
+}
+/************************************************************/
 int GroupMap::getNumSeqs(string group) {
        try {
                
@@ -116,7 +405,66 @@ int GroupMap::getNumSeqs(string group) {
                exit(1);
        }
 }
-
+/************************************************************/
+int GroupMap::renameSeq(string oldName, string newName) {
+       try {
+               
+               map<string, string>::iterator itName;
+               
+               itName = groupmap.find(oldName);
+               
+               if (itName == groupmap.end()) {
+            m->mothurOut("[ERROR]: cannot find " + toString(oldName) + " in group file");
+            m->control_pressed = true;
+            return 0;
+        }else {
+            string group = itName->second;
+            groupmap.erase(itName);
+            groupmap[newName] = group;
+        }
+        
+        return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GroupMap", "renameSeq");
+               exit(1);
+       }
+}
+/************************************************************/
+int GroupMap::print(ofstream& out) {
+       try {
+               
+               for (map<string, string>::iterator itName = groupmap.begin(); itName != groupmap.end(); itName++) {
+            out << itName->first << '\t' << itName->second << endl;
+        }
+             
+        return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GroupMap", "print");
+               exit(1);
+       }
+}
+/************************************************************/
+int GroupMap::print(ofstream& out, vector<string> userGroups) {
+       try {
+               
+               for (map<string, string>::iterator itName = groupmap.begin(); itName != groupmap.end(); itName++) {
+            if (m->inUsersGroups(itName->second, userGroups)) {
+                out << itName->first << '\t' << itName->second << endl;
+            }
+        }
+        
+        return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GroupMap", "print");
+               exit(1);
+       }
+}
 /************************************************************/
 vector<string> GroupMap::getNamesSeqs(){
        try {
@@ -135,4 +483,25 @@ vector<string> GroupMap::getNamesSeqs(){
        }
 }
 /************************************************************/
+vector<string> GroupMap::getNamesSeqs(vector<string> picked){
+       try {
+               
+               vector<string> names;
+               
+               for (it = groupmap.begin(); it != groupmap.end(); it++) {
+                       //if you are belong to one the the groups in the picked vector add you
+                       if (m->inUsersGroups(it->second, picked)) {
+                               names.push_back(it->first);
+                       }
+               }
+               
+               return names;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GroupMap", "getNamesSeqs");
+               exit(1);
+       }
+}
+
+/************************************************************/