]> git.donarmstrong.com Git - mothur.git/blobdiff - counttable.cpp
major change to the tree class to use the count table class instead of tree map....
[mothur.git] / counttable.cpp
index 376bd73487b4031077f7dd26b1c58b648d9ff04d..5307beee8379d8e758b946b3847dfa1be2d5a7b2 100644 (file)
@@ -8,6 +8,57 @@
 
 #include "counttable.h"
 
+/************************************************************/
+int CountTable::createTable(set<string>& n, map<string, string>& g, set<string>& gs) {
+    try {
+        int numGroups = 0;
+        groups.clear();
+        totalGroups.clear();
+        indexGroupMap.clear();
+        indexNameMap.clear();
+        counts.clear();
+        for (set<string>::iterator it = gs.begin(); it != gs.end(); it++) { groups.push_back(*it);  hasGroups = true; }
+        numGroups = groups.size();
+        totalGroups.resize(numGroups, 0);
+        
+               //sort groups to keep consistent with how we store the groups in groupmap
+        sort(groups.begin(), groups.end());
+        for (int i = 0; i < groups.size(); i++) {  indexGroupMap[groups[i]] = i; }
+        m->setAllGroups(groups);
+        
+        uniques = 0;
+        total = 0;
+        for (set<string>::iterator it = n.begin(); it != n.end(); it++) {
+            
+            if (m->control_pressed) { break; }
+            
+            string seqName = *it;
+            
+            vector<int> groupCounts; groupCounts.resize(numGroups, 0);
+            map<string, string>::iterator itGroup = g.find(seqName);
+            
+            if (itGroup != g.end()) {   
+                groupCounts[indexGroupMap[itGroup->second]] = 1; 
+                totalGroups[indexGroupMap[itGroup->second]]++;
+            }else { m->mothurOut("[ERROR]: Your group file does not contain " + seqName + ". Please correct."); m->mothurOutEndLine(); }
+            
+            map<string, int>::iterator it2 = indexNameMap.find(seqName);
+            if (it2 == indexNameMap.end()) {
+                if (hasGroups) {  counts.push_back(groupCounts);  }
+                indexNameMap[seqName] = uniques;
+                totals.push_back(1);
+                total++;
+                uniques++;
+            }
+        }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "createTable");
+               exit(1);
+       }
+}
 /************************************************************/
 bool CountTable::testGroups(string file) {
     try {
@@ -26,6 +77,118 @@ bool CountTable::testGroups(string file) {
        }
 }
 /************************************************************/
+int CountTable::createTable(string namefile, string groupfile, bool createGroup) {
+    try {
+        
+        if (namefile == "") { m->mothurOut("[ERROR]: namefile cannot be blank when creating a count table.\n"); m->control_pressed = true; }
+                                           
+        GroupMap* groupMap;
+        int numGroups = 0;
+        groups.clear();
+        totalGroups.clear();
+        indexGroupMap.clear();
+        indexNameMap.clear();
+        counts.clear();
+        map<int, string> originalGroupIndexes;
+        
+        if (groupfile != "") { 
+            hasGroups = true;
+            groupMap = new GroupMap(groupfile); groupMap->readMap();
+            numGroups = groupMap->getNumGroups();
+            groups = groupMap->getNamesOfGroups();
+            totalGroups.resize(numGroups, 0);
+        }else if(createGroup) {
+            hasGroups = true;
+            numGroups = 1;
+            groups.push_back("Group1");
+            totalGroups.resize(numGroups, 0);
+        }
+               //sort groups to keep consistent with how we store the groups in groupmap
+        sort(groups.begin(), groups.end());
+        for (int i = 0; i < groups.size(); i++) {  indexGroupMap[groups[i]] = i; }
+        m->setAllGroups(groups);
+        
+        bool error = false;
+        string name;
+        uniques = 0;
+        total = 0;
+        
+        
+        //open input file
+        ifstream in;
+        m->openInputFile(namefile, in);
+        
+        int total = 0;
+        while (!in.eof()) {
+            if (m->control_pressed) { break; }
+            
+            string firstCol, secondCol;
+            in >> firstCol; m->gobble(in); in >> secondCol; m->gobble(in);
+            
+            vector<string> names;
+            m->splitAtChar(secondCol, names, ',');
+            
+            map<string, int> groupCounts;
+            int thisTotal = 0;
+            if (groupfile != "") {
+                //set to 0
+                for (int i = 0; i < groups.size(); i++) { groupCounts[groups[i]] = 0; }
+                
+                //get counts for each of the users groups
+                for (int i = 0; i < names.size(); i++) {
+                    string group = groupMap->getGroup(names[i]);
+                    
+                    if (group == "not found") { m->mothurOut("[ERROR]: " + names[i] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); error=true; }
+                    else {
+                        map<string, int>::iterator it = groupCounts.find(group);
+                        
+                        //if not found, then this sequence is not from a group we care about
+                        if (it != groupCounts.end()) {
+                            it->second++;
+                            thisTotal++;
+                        }
+                    }
+                }
+            }else if (createGroup) {
+                groupCounts["Group1"]=0;
+                for (int i = 0; i < names.size(); i++) {
+                    string group = "Group1";
+                    groupCounts["Group1"]++; thisTotal++;
+                }
+            }else { thisTotal = names.size();  }
+            
+            //if group info, then read it
+            vector<int> thisGroupsCount; thisGroupsCount.resize(numGroups, 0);
+            for (int i = 0; i < numGroups; i++) {  
+                thisGroupsCount[i] = groupCounts[groups[i]]; 
+                totalGroups[i] += thisGroupsCount[i]; 
+            }
+            
+            map<string, int>::iterator it = indexNameMap.find(firstCol);
+            if (it == indexNameMap.end()) {
+                if (hasGroups) {  counts.push_back(thisGroupsCount);  }
+                indexNameMap[firstCol] = uniques;
+                totals.push_back(thisTotal);
+                total += thisTotal;
+                uniques++;
+            }else {
+                error = true;
+                m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + firstCol + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); 
+            }
+        }
+        in.close();
+               
+        if (error) { m->control_pressed = true; }
+               if (groupfile != "") { delete groupMap; }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "createTable");
+               exit(1);
+       }
+}
+/************************************************************/
 int CountTable::readTable(string file) {
     try {
         filename = file;
@@ -89,6 +252,68 @@ int CountTable::readTable(string file) {
        }
 }
 /************************************************************/
+int CountTable::printTable(string file) {
+    try {
+        ofstream out;
+        m->openOutputFile(file, out); 
+               out << "Representative_Sequence\ttotal\t";
+        for (int i = 0; i < groups.size(); i++) { out << groups[i] << '\t'; }
+        out << endl;
+        
+        for (map<string, int>::iterator itNames = indexNameMap.begin(); itNames != indexNameMap.end(); itNames++) {
+            out << itNames->first << '\t' << totals[itNames->second] << '\t';
+            if (hasGroups) {
+                
+                for (int i = 0; i < groups.size(); i++) {
+                    out << counts[itNames->second][i] << '\t';
+                }
+            }
+            out << endl;
+        }
+        out.close();
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "printTable");
+               exit(1);
+       }
+}
+/************************************************************/
+int CountTable::printHeaders(ofstream& out) {
+    try {
+               out << "Representative_Sequence\ttotal\t";
+        for (int i = 0; i < groups.size(); i++) { out << groups[i] << '\t'; }
+        out << endl;
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "printHeaders");
+               exit(1);
+       }
+}
+/************************************************************/
+int CountTable::printSeq(ofstream& out, string seqName) {
+    try {
+               map<string, int>::iterator it = indexNameMap.find(seqName);
+        if (it == indexNameMap.end()) {
+            m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+        }else { 
+            out << it->first << '\t' << totals[it->second] << '\t';
+            if (hasGroups) {
+                for (int i = 0; i < groups.size(); i++) {
+                    out << counts[it->second][i] << '\t';
+                }
+            }
+            out << endl;
+        }
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "printSeq");
+               exit(1);
+       }
+}
+/************************************************************/
 //group counts for a seq
 vector<int> CountTable::getGroupCounts(string seqName) {
     try {
@@ -154,6 +379,123 @@ int CountTable::getGroupCount(string seqName, string groupName) {
                exit(1);
        }
 }
+/************************************************************/
+//set the number of sequences for the seq for the group
+int CountTable::setAbund(string seqName, string groupName, int num) {
+    try {
+        if (hasGroups) {
+            map<string, int>::iterator it = indexGroupMap.find(groupName);
+            if (it == indexGroupMap.end()) {
+                m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+            }else { 
+                map<string, int>::iterator it2 = indexNameMap.find(seqName);
+                if (it2 == indexNameMap.end()) {
+                    m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+                }else { 
+                    int oldCount = counts[it2->second][it->second];
+                    counts[it2->second][it->second] = num;
+                    totalGroups[it->second] += (num - oldCount);
+                    total += (num - oldCount);
+                    totals[it2->second] += (num - oldCount);
+                }
+            }
+        }else{  m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n");  m->control_pressed = true; }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "set");
+               exit(1);
+       }
+}
+/************************************************************/
+//add group
+int CountTable::addGroup(string groupName) {
+    try {        
+        bool sanity = m->inUsersGroups(groupName, groups);
+        if (sanity) { m->mothurOut("[ERROR]: " + groupName + " is already in the count table, cannot add again.\n"); m->control_pressed = true;  return 0; }
+        
+        groups.push_back(groupName);
+        if (!hasGroups) { counts.resize(uniques);  }
+        
+        for (int i = 0; i < counts.size(); i++) { counts[i].push_back(0); }
+        totalGroups.push_back(0);
+        indexGroupMap[groupName] = groups.size()-1;
+        map<string, int> originalGroupMap = indexGroupMap;
+        
+        //important to play well with others, :)
+        sort(groups.begin(), groups.end());
+        
+        //fix indexGroupMap && totalGroups
+        vector<int> newTotals; newTotals.resize(groups.size(), 0);
+        for (int i = 0; i < groups.size(); i++) {  
+            indexGroupMap[groups[i]] = i;  
+            //find original spot of group[i]
+            int index = originalGroupMap[groups[i]];
+            newTotals[i] = totalGroups[index];
+        }
+        totalGroups = newTotals;
+        
+        //fix counts vectors
+        for (int i = 0; i < counts.size(); i++) {
+            vector<int> newCounts; newCounts.resize(groups.size(), 0);
+            for (int j = 0; j < groups.size(); j++) {  
+                //find original spot of group[i]
+                int index = originalGroupMap[groups[j]];
+                newCounts[j] = counts[i][index];
+            }
+            counts[i] = newCounts;
+        }
+        hasGroups = true;
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "addGroup");
+               exit(1);
+       }
+}
+/************************************************************/
+//vector of groups for the seq
+vector<string> CountTable::getGroups(string seqName) {
+    try {
+        vector<string> thisGroups;
+        if (hasGroups) {
+            vector<int> thisCounts = getGroupCounts(seqName);
+            for (int i = 0; i < thisCounts.size(); i++) {  
+                if (thisCounts[i] != 0) {  thisGroups.push_back(groups[i]); }
+            } 
+        }else{  m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n");  m->control_pressed = true; }
+        
+        return thisGroups;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "getGroups");
+               exit(1);
+       }
+}
+/************************************************************/
+//total number of seqs represented by seq
+int CountTable::renameSeq(string oldSeqName, string newSeqName) {
+    try {
+        
+        map<string, int>::iterator it = indexNameMap.find(oldSeqName);
+        if (it == indexNameMap.end()) {
+            m->mothurOut("[ERROR]: " + oldSeqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+        }else {  
+            int index = it->second;
+            indexNameMap.erase(it);
+            indexNameMap[newSeqName] = index;
+        }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "renameSeq");
+               exit(1);
+       }
+}
+
 /************************************************************/
 //total number of seqs represented by seq
 int CountTable::getNumSeqs(string seqName) {
@@ -213,6 +555,30 @@ int CountTable::push_back(string seqName) {
        }
 }
 /************************************************************/
+//remove sequence
+int CountTable::remove(string seqName) {
+    try {
+        map<string, int>::iterator it = indexNameMap.find(seqName);
+        if (it == indexNameMap.end()) {
+            uniques--;
+            if (hasGroups){ //remove this sequences counts from group totals
+                for (int i = 0; i < totalGroups.size(); i++) {  totalGroups[i] -= counts[it->second][i];  counts[it->second][i] = 0; }
+            }
+            int thisTotal = totals[it->second]; totals[it->second] = 0;
+            total -= thisTotal;
+            indexNameMap.erase(it);
+        }else {
+            m->mothurOut("[ERROR]: Your count table contains does not include " + seqName + ", cannot remove."); m->mothurOutEndLine(); m->control_pressed = true;
+        }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "push_back");
+               exit(1);
+       }
+}
+/************************************************************/
 //add seqeunce without group info
 int CountTable::push_back(string seqName, int thisTotal) {
     try {
@@ -243,6 +609,7 @@ int CountTable::push_back(string seqName, vector<int> groupCounts) {
             if ((hasGroups) && (groupCounts.size() != getNumGroups())) {  m->mothurOut("[ERROR]: Your count table has a " + toString(getNumGroups()) + " groups and " + seqName + " has " + toString(groupCounts.size()) + ", please correct."); m->mothurOutEndLine(); m->control_pressed = true;  }
             int thisTotal = 0;
             for (int i = 0; i < getNumGroups(); i++) {   totalGroups[i] += groupCounts[i];  thisTotal += groupCounts[i]; }
+            if (hasGroups) {  counts.push_back(groupCounts);  }
             indexNameMap[seqName] = uniques;
             totals.push_back(thisTotal);
             total+= thisTotal;
@@ -293,7 +660,30 @@ vector<string> CountTable::getNamesOfSeqs() {
        }
 }
 /************************************************************/
-//returns names of seqs
+//returns the names of all unique sequences in file
+vector<string> CountTable::getNamesOfSeqs(string group) {
+    try {
+        vector<string> names;
+        if (hasGroups) {
+            map<string, int>::iterator it = indexGroupMap.find(group);
+            if (it == indexGroupMap.end()) {
+                m->mothurOut("[ERROR]: " + group + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+            }else { 
+                for (map<string, int>::iterator it2 = indexNameMap.begin(); it2 != indexNameMap.end(); it2++) {
+                    if (counts[it2->second][it->second] != 0) {  names.push_back(it2->first); }
+                }
+            }
+        }else{  m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n");  m->control_pressed = true; }
+        
+        return names;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "getNamesOfSeqs");
+               exit(1);
+       }
+}
+/************************************************************/
+//merges counts of seq1 and seq2, saving in seq1
 int CountTable::mergeCounts(string seq1, string seq2) {
     try {
         map<string, int>::iterator it = indexNameMap.find(seq1);
@@ -305,17 +695,12 @@ int CountTable::mergeCounts(string seq1, string seq2) {
                 m->mothurOut("[ERROR]: " + seq2 + " is not in your count table. Please correct.\n"); m->control_pressed = true;
             }else { 
                 //merge data
-                for (int i = 0; i < groups.size(); i++) {
-                    counts[it->second][i] += counts[it2->second][i];
-                    counts[it2->second][i] = 0;
-                }
+                for (int i = 0; i < groups.size(); i++) { counts[it->second][i] += counts[it2->second][i]; }
                 totals[it->second] += totals[it2->second];
-                totals[it2->second] = 0;
                 uniques--;
                 indexNameMap.erase(it2); 
             }
         }
-        
         return 0;
     }
        catch(exception& e) {
@@ -323,6 +708,25 @@ int CountTable::mergeCounts(string seq1, string seq2) {
                exit(1);
        }
 }
+/************************************************************/
+int CountTable::copy(CountTable* ct) {
+    try {
+        vector<string> thisGroups = ct->getNamesOfGroups();
+        for (int i = 0; i < thisGroups.size(); i++) { addGroup(thisGroups[i]); }
+        vector<string> names = ct->getNamesOfSeqs();
+                                                               
+        for (int i = 0; i < names.size(); i++) {
+            vector<int> thisCounts = ct->getGroupCounts(names[i]);
+            push_back(names[i], thisCounts);
+        }
+                                                               
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "copy");
+               exit(1);
+       }
+}
 
 /************************************************************/