]> git.donarmstrong.com Git - mothur.git/commitdiff
added countable class to read and store count file. added count parameter to make...
authorSarah Westcott <mothur.westcott@gmail.com>
Thu, 28 Jun 2012 13:37:34 +0000 (09:37 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Thu, 28 Jun 2012 13:37:34 +0000 (09:37 -0400)
Mothur.xcodeproj/project.pbxproj
counttable.cpp [new file with mode: 0644]
counttable.h [new file with mode: 0644]
mothurout.h
sharedcommand.cpp
sharedcommand.h
sharedlistvector.cpp
sharedlistvector.h

index 6db66c6184bc3798ee4d8ba2fc7247a9feb6acb4..b6d221f3ba93e62c67ca7c2fe3f239bf5813758e 100644 (file)
@@ -26,6 +26,7 @@
                A73DDC3813C4BF64006AAE38 /* mothurmetastats.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73DDC3713C4BF64006AAE38 /* mothurmetastats.cpp */; };
                A74A9A9F148E881E00AB5E3E /* spline.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74A9A9E148E881E00AB5E3E /* spline.cpp */; };
                A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */; };
+               A74D59A4159A1E2000043046 /* counttable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D59A3159A1E2000043046 /* counttable.cpp */; };
                A754149714840CF7005850D1 /* summaryqualcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A754149614840CF7005850D1 /* summaryqualcommand.cpp */; };
                A75790591301749D00A30DAB /* homovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75790581301749D00A30DAB /* homovacommand.cpp */; };
                A76CDD821510F143004C8458 /* prcseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A76CDD811510F143004C8458 /* prcseqscommand.cpp */; };
                A74A9A9E148E881E00AB5E3E /* spline.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = spline.cpp; sourceTree = "<group>"; };
                A74D36B6137DAFAA00332B0C /* chimerauchimecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimerauchimecommand.h; sourceTree = "<group>"; };
                A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimerauchimecommand.cpp; sourceTree = "<group>"; };
+               A74D59A3159A1E2000043046 /* counttable.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = counttable.cpp; sourceTree = "<group>"; };
+               A74D59A6159A1E3600043046 /* counttable.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = counttable.h; sourceTree = "<group>"; };
                A754149514840CF7005850D1 /* summaryqualcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = summaryqualcommand.h; sourceTree = "<group>"; };
                A754149614840CF7005850D1 /* summaryqualcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = summaryqualcommand.cpp; sourceTree = "<group>"; };
                A75790571301749D00A30DAB /* homovacommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = homovacommand.h; sourceTree = "<group>"; };
                                A7E9B66312D37EC400DA6239 /* blastalign.hpp */,
                                A7E9B66412D37EC400DA6239 /* blastdb.cpp */,
                                A7E9B66512D37EC400DA6239 /* blastdb.hpp */,
+                               A74D59A6159A1E3600043046 /* counttable.h */,
+                               A74D59A3159A1E2000043046 /* counttable.cpp */,
+                               A7E9B6CD12D37EC400DA6239 /* distancedb.cpp */,
                                A7E9B6BD12D37EC400DA6239 /* database.cpp */,
                                A7E9B6BE12D37EC400DA6239 /* database.hpp */,
                                A7E9B6BF12D37EC400DA6239 /* datavector.hpp */,
-                               A7E9B6CD12D37EC400DA6239 /* distancedb.cpp */,
                                A7E9B6CE12D37EC400DA6239 /* distancedb.hpp */,
                                A7E9B6DE12D37EC400DA6239 /* fastamap.cpp */,
                                A7E9B6DF12D37EC400DA6239 /* fastamap.h */,
                                A70056E6156A93D000924A2D /* getotulabelscommand.cpp in Sources */,
                                A70056EB156AB6E500924A2D /* removeotulabelscommand.cpp in Sources */,
                                A73901081588C40900ED2ED6 /* loadlogfilecommand.cpp in Sources */,
+                               A74D59A4159A1E2000043046 /* counttable.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
diff --git a/counttable.cpp b/counttable.cpp
new file mode 100644 (file)
index 0000000..c4e2732
--- /dev/null
@@ -0,0 +1,177 @@
+//
+//  counttable.cpp
+//  Mothur
+//
+//  Created by Sarah Westcott on 6/26/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "counttable.h"
+
+
+/************************************************************/
+int CountTable::readTable(string file) {
+    try {
+        filename = file;
+        ifstream in;
+        m->openInputFile(filename, in);
+        
+        string headers = m->getline(in); m->gobble(in);
+        vector<string> columnHeaders = m->splitWhiteSpace(headers);
+        
+        int numGroups = 0;
+        groups.clear();
+        totalGroups.clear();
+        indexGroupMap.clear();
+        indexNameMap.clear();
+        counts.clear();
+        map<int, string> originalGroupIndexes;
+        if (columnHeaders.size() > 2) { hasGroups = true; numGroups = columnHeaders.size() - 2;  }
+        for (int i = 2; i < columnHeaders.size(); i++) {  groups.push_back(columnHeaders[i]);  originalGroupIndexes[i-2] = columnHeaders[i]; totalGroups.push_back(0); }
+        //sort groups to keep consistent with how we store the groups in groupmap
+        sort(groups.begin(), groups.end());
+        for (int i = 0; i < groups.size(); i++) {  indexGroupMap[groups[i]] = i; }
+        m->setAllGroups(groups);
+        
+        bool error = false;
+        string name;
+        int thisTotal;
+        uniques = 0;
+        total = 0;
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { break; }
+            
+            in >> name >> thisTotal; m->gobble(in);
+            
+            //if group info, then read it
+            vector<int> groupCounts; groupCounts.resize(numGroups, 0);
+            for (int i = 0; i < numGroups; i++) {  int thisIndex = indexGroupMap[originalGroupIndexes[i]]; in >> groupCounts[thisIndex]; m->gobble(in); totalGroups[thisIndex] += groupCounts[thisIndex];  }
+            
+            map<string, int>::iterator it = indexNameMap.find(name);
+            if (it == indexNameMap.end()) {
+                if (hasGroups) {  counts.push_back(groupCounts);  }
+                indexNameMap[name] = uniques;
+                totals.push_back(thisTotal);
+                total += thisTotal;
+                uniques++;
+            }else {
+                error = true;
+                m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + name + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); 
+            }
+        }
+        in.close();
+        
+        if (error) { m->control_pressed = true; }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "readTable");
+               exit(1);
+       }
+}
+/************************************************************/
+//group counts for a seq
+vector<int> CountTable::getGroupCounts(string seqName) {
+    try {
+        vector<int> temp;
+        if (hasGroups) {
+            map<string, int>::iterator it = indexNameMap.find(seqName);
+            if (it == indexNameMap.end()) {
+                m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+            }else { 
+                temp = counts[it->second];
+            }
+        }else{  m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
+        
+        return temp;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "getGroupCounts");
+               exit(1);
+       }
+}
+/************************************************************/
+//total number of sequences for the group
+int CountTable::getGroupCount(string groupName) {
+    try {
+        if (hasGroups) {
+            map<string, int>::iterator it = indexGroupMap.find(groupName);
+            if (it == indexGroupMap.end()) {
+                m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+            }else { 
+                return totalGroups[it->second];
+            }
+        }else{  m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n");  m->control_pressed = true; }
+
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "getGroupCount");
+               exit(1);
+       }
+}
+/************************************************************/
+//total number of sequences for the seq for the group
+int CountTable::getGroupCount(string seqName, string groupName) {
+    try {
+        if (hasGroups) {
+            map<string, int>::iterator it = indexGroupMap.find(groupName);
+            if (it == indexGroupMap.end()) {
+                m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+            }else { 
+                map<string, int>::iterator it2 = indexNameMap.find(seqName);
+                if (it2 == indexNameMap.end()) {
+                    m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+                }else { 
+                    return counts[it2->second][it->second];
+                }
+            }
+        }else{  m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n");  m->control_pressed = true; }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "getGroupCount");
+               exit(1);
+       }
+}
+/************************************************************/
+//total number of seqs represented by seq
+int CountTable::getNumSeqs(string seqName) {
+    try {
+                
+        map<string, int>::iterator it = indexNameMap.find(seqName);
+        if (it == indexNameMap.end()) {
+            m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+        }else { 
+            return totals[it->second];
+        }
+
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "getNumSeqs");
+               exit(1);
+       }
+}
+/************************************************************/
+//returns names of seqs
+vector<string> CountTable::getNamesOfSeqs() {
+    try {
+        vector<string> names;
+        for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
+            names.push_back(it->first);
+        }
+                
+        return names;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "getNamesOfSeqs");
+               exit(1);
+       }
+}
+/************************************************************/
+
+
diff --git a/counttable.h b/counttable.h
new file mode 100644 (file)
index 0000000..8c97019
--- /dev/null
@@ -0,0 +1,77 @@
+#ifndef Mothur_counttable_h
+#define Mothur_counttable_h
+
+
+//
+//  counttable.h
+//  Mothur
+//
+//  Created by Sarah Westcott on 6/26/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+//This class is designed to read a count table file and store its data.
+//count table files look like:
+
+/*
+ Representative_Sequence       total   F003D000        F003D002        F003D004        F003D006        F003D008        F003D142        F003D144        F003D146        F003D148        F003D150        MOCK.GQY1XT001  
+ GQY1XT001C296C        6051    409     985     923     937     342     707     458     439     387     464     0       
+ GQY1XT001A3TJI        4801    396     170     413     442     306     769     581     576     497     651     0       
+ GQY1XT001CS2B8        3018    263     226     328     460     361     336     248     290     187     319     0       
+ GQY1XT001CD9IB        2736    239     177     256     405     306     286     263     248     164     392     0       
+ or if no group info was used to create it
+ Representative_Sequence       total   
+ GQY1XT001C296C        6051
+ GQY1XT001A3TJI        4801
+ GQY1XT001CS2B8        3018
+ GQY1XT001CD9IB        2736
+ GQY1XT001ARCB1        2183
+ GQY1XT001CNF2P        2796
+ GQY1XT001CJMDA        1667
+ GQY1XT001CBVJB        3758
+ */
+
+
+#include "mothurout.h"
+
+class CountTable {
+    
+    public:
+    
+        CountTable() { m = MothurOut::getInstance(); hasGroups = false; total = 0; }
+        ~CountTable() {}
+    
+        int readTable(string);
+    
+        bool hasGroupInfo() { return hasGroups; }
+        int getNumGroups() { return groups.size(); }
+        vector<string> getNamesOfGroups() {  return groups;   }  //returns group names, if no group info vector is blank.
+    
+        vector<int> getGroupCounts(string);  //returns group counts for a seq passed in, if no group info is in file vector is blank. Order is the same as the groups returned by getGroups function.
+        int getGroupCount(string, string); //returns number of seqs for that group for that seq
+        int getGroupCount(string); // returns total seqs for that group
+        int getNumSeqs(string); //returns total seqs for that seq
+        int getNumSeqs() { return total; } //return total number of seqs
+        int getNumUniqueSeqs() { return uniques; } //return number of unique/representative seqs
+        int getGroupIndex(string); //returns index in getGroupCounts vector of specific group
+        vector<string> getNamesOfSeqs();
+    
+    private:
+        string filename;
+        MothurOut* m;
+        bool hasGroups;
+        int total, uniques;
+        vector<string> groups;
+        vector< vector<int> > counts;
+        vector<int> totals;
+        vector<int> totalGroups;
+        map<string, int> indexNameMap;
+        map<string, int> indexGroupMap;
+    
+};
+
+#endif
index 98565dc547d1e81dc03d88c9a526983727c1b124..ac47d79f7d92b113f58dbe421fdbaad892b2b175 100644 (file)
@@ -68,7 +68,7 @@ class MothurOut {
                //map<string, string> names;
                vector<string> binLabelsInFile;
                vector<string> currentBinLabels;
-               string saveNextLabel, argv, sharedHeaderMode;
+               string saveNextLabel, argv, sharedHeaderMode, groupMode;
                bool printedHeaders, commandInputsConvertError;
                
                //functions from mothur.h
@@ -179,7 +179,7 @@ class MothurOut {
                
                void setListFile(string f)                      { listfile = getFullPathName(f);                        }
                void setTreeFile(string f)                      { treefile = getFullPathName(f);                        }
-               void setGroupFile(string f)                     { groupfile = getFullPathName(f);                       }               
+               void setGroupFile(string f)                     { groupfile = getFullPathName(f);       groupMode = "group";            }               
                void setPhylipFile(string f)            { phylipfile = getFullPathName(f);                      }
                void setColumnFile(string f)            { columnfile = getFullPathName(f);                      }
                void setNameFile(string f)                      { namefile = getFullPathName(f);                        }       
@@ -198,7 +198,7 @@ class MothurOut {
                void setTaxonomyFile(string f)          { taxonomyfile = getFullPathName(f);            }
                void setFlowFile(string f)                      { flowfile = getFullPathName(f);                        }
         void setBiomFile(string f)                     { biomfile = getFullPathName(f);                        }
-        void setCountTableFile(string f)       { counttablefile = getFullPathName(f);          }
+        void setCountTableFile(string f)       { counttablefile = getFullPathName(f);  groupMode = "count";    }
         void setProcessors(string p)           { processors = p; mothurOut("\nUsing " + toString(p) + " processors.\n");       }
                
                void printCurrentFiles();
@@ -241,6 +241,7 @@ class MothurOut {
             mothurCalling = false;
             debug = false;
                        sharedHeaderMode = "";
+            groupMode = "group";
                }
                ~MothurOut();
 
index f3330f7672e1fbaef8300f70b88c625ac9857b1c..1150e53735481aea3377ec81f78c0d2544f324e6 100644 (file)
@@ -9,6 +9,7 @@
 
 #include "sharedcommand.h"
 #include "sharedutilities.h"
+#include "counttable.h"
 
 //********************************************************************************************************************
 //sorts lowest to highest
@@ -20,7 +21,8 @@ vector<string> SharedCommand::setParameters(){
        try {
         CommandParameter pbiom("biom", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "none",false,false); parameters.push_back(pbiom);
                CommandParameter plist("list", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "ListGroup",false,false); parameters.push_back(plist);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "ListGroup",false,false); parameters.push_back(pgroup);
+        CommandParameter pcount("count", "InputTypes", "", "", "", "GroupCount", "",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "none", "GroupCount", "ListGroup",false,false); parameters.push_back(pgroup);
                //CommandParameter pordergroup("ordergroup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pordergroup);
                CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
                CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
@@ -41,7 +43,8 @@ string SharedCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The make.shared command reads a list and group file or a biom file and creates a shared file. If a list and group are provided a rabund file is created for each group.\n";
-               helpString += "The make.shared command parameters are list, group, biom, groups and label. list and group are required unless a current file is available or you provide a biom file.\n";
+               helpString += "The make.shared command parameters are list, group, biom, groups, count and label. list and group or count are required unless a current file is available or you provide a biom file.\n";
+        helpString += "The count parameter allows you to provide a count file containing the group info for the list file.\n";
                helpString += "The groups parameter allows you to indicate which groups you want to include, group names should be separated by dashes. ex. groups=A-B-C. Default is all groups in your groupfile.\n";
                helpString += "The label parameter is only valid with the list and group option and allows you to indicate which labels you want to include, label names should be separated by dashes. Default is all labels in your list file.\n";
                //helpString += "The ordergroup parameter allows you to indicate the order of the groups in the sharedfile, by default the groups are listed alphabetically.\n";
@@ -137,13 +140,13 @@ SharedCommand::SharedCommand(string option)  {
                                         if (path == "") {      parameters["group"] = inputDir + it->second;            }
                                 }
                         
-                                /*it = parameters.find("ordergroup");
+                                it = parameters.find("count");
                                 //user has given a template file
                                 if(it != parameters.end()){ 
                                         path = m->hasPath(it->second);
                                         //if the user has not given a path then, add inputdir. else leave path alone.
-                                        if (path == "") {      parameters["ordergroup"] = inputDir + it->second;               }
-                                }*/
+                                        if (path == "") {      parameters["count"] = inputDir + it->second;            }
+                                }
                  
                  it = parameters.find("biom");
                                 //user has given a template file
@@ -181,6 +184,11 @@ SharedCommand::SharedCommand(string option)  {
                         if (groupfile == "not open") { groupfile = ""; abort = true; } 
                         else if (groupfile == "not found") { groupfile = ""; }
                         else {  m->setGroupFile(groupfile); }
+            
+             countfile = validParameter.validFile(parameters, "count", true);
+             if (countfile == "not open") { countfile = ""; abort = true; }    
+             else if (countfile == "not found") { countfile = ""; }
+             else {  m->setCountTableFile(countfile); }
                         
             if ((biomfile == "") && (listfile == "")) { 
                                //is there are current file available for either of these?
@@ -199,12 +207,16 @@ SharedCommand::SharedCommand(string option)  {
                        else if ((biomfile != "") && (listfile != "")) { m->mothurOut("When executing a make.shared command you must enter ONLY ONE of the following: list or biom."); m->mothurOutEndLine(); abort = true; }
                        
                        if (listfile != "") {
-                               if (groupfile == "") { 
+                               if ((groupfile == "") && (countfile == "")) { 
                                        groupfile = m->getGroupFile(); 
                                        if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
                                        else { 
-                                               m->mothurOut("You need to provide a groupfle if you are going to use the list format."); m->mothurOutEndLine(); 
-                                               abort = true; 
+                                               countfile = m->getCountTableFile(); 
+                        if (countfile != "") {  m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+                        else { 
+                            m->mothurOut("You need to provide a groupfile or countfile if you are going to use the list format."); m->mothurOutEndLine(); 
+                            abort = true; 
+                        }      
                                        }       
                                }
                        }
@@ -301,9 +313,7 @@ int SharedCommand::createSharedFromBiom(string filename) {
         
         ifstream in;
         m->openInputFile(biomfile, in);
-        
-        m->getline(in); m->gobble(in);  //grab first '{'
-        
+         
         string matrixFormat = "";
         int numRows = 0;
         int numCols = 0;
@@ -311,83 +321,159 @@ int SharedCommand::createSharedFromBiom(string filename) {
         int shapeNumCols = 0;
         vector<string> otuNames;
         vector<string> groupNames;
-        while (!in.eof()) {
-            
+        map<string, string> fileLines;
+        vector<string> names;
+        int countOpenBrace = 0;
+        int countClosedBrace = 0;
+        int openParen = -1; //account for opening brace
+        int closeParen = 0;
+        bool ignoreCommas = false;
+        bool atComma = false;
+        string line = "";
+        string matrixElementType = "";
+        
+        while (!in.eof()) { //split file by tags, so each "line" will have something like "id":"/Users/SarahsWork/Desktop/release/final.tx.1.subsample.1.pick.shared-1"
             if (m->control_pressed) { break; }
             
-            string line = m->getline(in); m->gobble(in);
+            char c = in.get(); m->gobble(in);
             
-            string tag = getTag(line);
+            if (c == '[')               { countOpenBrace++;     }
+            else if (c == ']')          { countClosedBrace++;   }
+            else if (c == '{')          { openParen++;          }
+            else if (c == '}')          { closeParen++;         }
+            else if ((!ignoreCommas) && (c == ','))          { atComma = true;       }  
             
-            if (tag == "type") {
-                //check to make sure this is an OTU table
-                string type = getTag(line);
-                if (type != "OTU table") { m->mothurOut("[ERROR]: " + type + " is not a valid biom type for mothur. Only type allowed is OTU table.\n"); m->control_pressed = true; }
-            }else if (tag == "matrix_type") {
-                //get type and check type
-                matrixFormat = getTag(line);
-                if ((matrixFormat != "sparse") && (matrixFormat != "dense")) { m->mothurOut("[ERROR]: " + matrixFormat + " is not a valid biom matrix_type for mothur. Types allowed are sparse and dense.\n"); m->control_pressed = true; }
-            }else if (tag == "matrix_element_type") {
-                //get type and check type
-                string matrixElementType = getTag(line);
-                if (matrixElementType != "int") { m->mothurOut("[ERROR]: " + matrixElementType + " is not a valid matrix_element_type for mothur. Only type allowed is int.\n"); m->control_pressed = true; }
-            }else if (tag == "rows") {
-                //read otu names
-                otuNames = readRows(line, in, numRows);  
-            }else if (tag == "columns") {
-                //read sample names
-                groupNames = readRows(line, in, numCols); 
-                
-                //if users selected groups, then remove the groups not wanted.
-                SharedUtil util;
-                vector<string> Groups = m->getGroups();
-                vector<string> allGroups = groupNames;
-                util.setGroups(Groups, allGroups);
-                m->setGroups(Groups);
-                
-                //fill filehandles with neccessary ofstreams
-                int i;
-                ofstream* temp;
-                for (i=0; i<Groups.size(); i++) {
-                    temp = new ofstream;
-                    filehandles[Groups[i]] = temp;
-                }
-                
-                //set fileroot
-                fileroot = outputDir + m->getRootName(m->getSimpleName(biomfile));
-                
-                //clears file before we start to write to it below
-                for (int i=0; i<Groups.size(); i++) {
-                    m->mothurRemove((fileroot + Groups[i] + ".rabund"));
-                    outputNames.push_back((fileroot + Groups[i] + ".rabund"));
-                    outputTypes["rabund"].push_back((fileroot + Groups[i] + ".rabund"));
-                }
-
-            }else if (tag == "shape") {
-                getDims(line, shapeNumRows, shapeNumCols);
-                
-                //check shape
-                if (shapeNumCols != numCols) {
-                    m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumCols) + " columns, but I only read " + toString(numCols) + " columns.\n"); m->control_pressed = true;
-                }
-                
-                if (shapeNumRows != numRows) {
-                    m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumRows) + " rows, but I only read " + toString(numRows) + " rows.\n"); m->control_pressed = true;
+            if ((countOpenBrace != countClosedBrace) && (countOpenBrace != countClosedBrace)) { ignoreCommas = true;  }
+            else if ((countOpenBrace == countClosedBrace) && (countOpenBrace == countClosedBrace)) { ignoreCommas = false;  }
+            if (atComma && !ignoreCommas) { 
+                if (fileLines.size() == 0) { //clip first {
+                    line = line.substr(1);
                 }
-            }else if (tag == "data") {
-                m->currentBinLabels = otuNames;
+                string tag = getTag(line);
+                fileLines[tag] = line;
+                line = "";
+                atComma = false;
+                ignoreCommas = false;
                 
-                //read data
-                vector<SharedRAbundVector*> lookup = readData(matrixFormat, line, in, groupNames, otuNames.size());
-
-                m->mothurOutEndLine(); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                lookup[0]->printHeaders(out); 
-                printSharedData(lookup, out);
-            }
+            }else {  line += c;  }
+            
+        }
+        if (line != "") {
+            line = line.substr(0, line.length()-1);
+            string tag = getTag(line);
+            fileLines[tag] = line;
         }
         in.close();
         
-                
+        map<string, string>::iterator it;
+        it = fileLines.find("type");
+        if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a type provided.\n"); }
+        else {
+            string thisLine = it->second;
+            string type = getTag(thisLine);
+            if ((type != "OTU table") && (type != "OTUtable")) { m->mothurOut("[ERROR]: " + type + " is not a valid biom type for mothur. Only type allowed is OTU table.\n"); m->control_pressed = true;  }
+        }
+        
+        if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
+        
+        it = fileLines.find("matrix_type");
+        if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_type provided.\n"); }
+        else {
+            string thisLine = it->second;
+            matrixFormat = getTag(thisLine);
+            if ((matrixFormat != "sparse") && (matrixFormat != "dense")) { m->mothurOut("[ERROR]: " + matrixFormat + " is not a valid biom matrix_type for mothur. Types allowed are sparse and dense.\n"); m->control_pressed = true; }
+        }
+        
+        if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
+        
+        it = fileLines.find("matrix_element_type");
+        if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_element_type provided.\n"); }
+        else {
+            string thisLine = it->second;
+            matrixElementType = getTag(thisLine);
+            if ((matrixElementType != "int") && (matrixElementType != "float")) { m->mothurOut("[ERROR]: " + matrixElementType + " is not a valid biom matrix_element_type for mothur. Types allowed are int and float.\n"); m->control_pressed = true; }
+            if (matrixElementType == "float") { m->mothurOut("[WARNING]: the shared file only uses integers, any float values will be rounded down to the nearest integer.\n"); }
+        }
+        
+        if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
+        
+        it = fileLines.find("rows");
+        if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a rows provided.\n"); }
+        else {
+            string thisLine = it->second;
+            otuNames = readRows(thisLine, numRows);  
+        }
+        
+        if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
+        
+        it = fileLines.find("columns");
+        if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a columns provided.\n"); }
+        else {
+            string thisLine = it->second;
+            //read sample names
+            groupNames = readRows(thisLine, numCols); 
+            
+            //if users selected groups, then remove the groups not wanted.
+            SharedUtil util;
+            vector<string> Groups = m->getGroups();
+            vector<string> allGroups = groupNames;
+            util.setGroups(Groups, allGroups);
+            m->setGroups(Groups);
+            
+            //fill filehandles with neccessary ofstreams
+            int i;
+            ofstream* temp;
+            for (i=0; i<Groups.size(); i++) {
+                temp = new ofstream;
+                filehandles[Groups[i]] = temp;
+            }
+            
+            //set fileroot
+            fileroot = outputDir + m->getRootName(m->getSimpleName(biomfile));
+            
+            //clears file before we start to write to it below
+            for (int i=0; i<Groups.size(); i++) {
+                m->mothurRemove((fileroot + Groups[i] + ".rabund"));
+                outputNames.push_back((fileroot + Groups[i] + ".rabund"));
+                outputTypes["rabund"].push_back((fileroot + Groups[i] + ".rabund"));
+            }
+        }
+        
+        if (m->control_pressed) { for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } out.close(); m->mothurRemove(filename); return 0; }
+
+        it = fileLines.find("shape");
+        if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a shape provided.\n"); }
+        else {
+            string thisLine = it->second;
+            getDims(thisLine, shapeNumRows, shapeNumCols);
+            
+            //check shape
+            if (shapeNumCols != numCols) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumCols) + " columns, but I only read " + toString(numCols) + " columns.\n"); m->control_pressed = true; }
+            
+            if (shapeNumRows != numRows) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumRows) + " rows, but I only read " + toString(numRows) + " rows.\n"); m->control_pressed = true; }
+        }
+        
+        if (m->control_pressed) { for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } out.close(); m->mothurRemove(filename); return 0; }
+        
+        it = fileLines.find("data");
+        if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a data provided.\n"); }
+        else {
+            string thisLine = it->second;
+            m->currentBinLabels = otuNames;
+            
+            //read data
+            vector<SharedRAbundVector*> lookup = readData(matrixFormat, thisLine, matrixElementType, groupNames, otuNames.size());
+            
+            m->mothurOutEndLine(); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+            lookup[0]->printHeaders(out); 
+            printSharedData(lookup, out);
+        }
+        
+        for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } 
+        out.close();
+        
+        if (m->control_pressed) {  m->mothurRemove(filename); return 0; }
+
         return 0;
     }
        catch(exception& e) {
@@ -396,7 +482,7 @@ int SharedCommand::createSharedFromBiom(string filename) {
        }
 }
 //**********************************************************************************************************************
-vector<SharedRAbundVector*> SharedCommand::readData(string matrixFormat, string line, ifstream& in, vector<string>& groupNames, int numOTUs) {
+vector<SharedRAbundVector*> SharedCommand::readData(string matrixFormat, string line, string matrixElementType, vector<string>& groupNames, int numOTUs) {
        try {
         
         vector<SharedRAbundVector*> lookup; 
@@ -427,7 +513,9 @@ vector<SharedRAbundVector*> SharedCommand::readData(string matrixFormat, string
                 else if ((line[i] == ']') && (inBrackets)) { 
                     inBrackets = false; 
                     int temp;
-                    m->mothurConvert(num, temp);
+                    float temp2;
+                    if (matrixElementType == "float") { m->mothurConvert(num, temp2); temp = (int)temp2; }
+                    else { m->mothurConvert(num, temp); }
                     nums.push_back(temp);
                     num = "";
                     
@@ -463,58 +551,6 @@ vector<SharedRAbundVector*> SharedCommand::readData(string matrixFormat, string
             }
         }
         
-        //same as above just reading from file.
-        while (!in.eof()) {
-            
-            char c = in.get(); m->gobble(in);
-            
-            if (m->control_pressed) { return lookup; }
-            
-            //look for opening [ to indicate data is starting
-            if ((c == '[') && (!dataStart)) { dataStart = true; c = in.get();  if (in.eof()) { break; } }
-            else if ((c == ']') && dataStart && (!inBrackets)) { break; } //we are done reading data
-              
-            if (dataStart) {
-                if ((c == '[') && (!inBrackets)) { inBrackets = true; c = in.get();  if (in.eof()) { break; }  }
-                else if ((c == ']') && (inBrackets)) { 
-                    inBrackets = false; 
-                    int temp;
-                    m->mothurConvert(num, temp);
-                    nums.push_back(temp);
-                    num = "";
-                    
-                    //save info to vectors
-                    if (matrixFormat == "dense") {
-                        
-                        //sanity check
-                        if (nums.size() != lookup.size()) { m->mothurOut("[ERROR]: trouble parsing OTU data.  OTU " + toString(otuCount) + " causing errors.\n"); m->control_pressed = true; }
-                        
-                        //set abundances for this otu
-                        //nums contains [abundSample0, abundSample1, abundSample2, ...] for current OTU
-                        for (int j = 0; j < lookup.size(); j++) { lookup[j]->set(otuCount, nums[j], groupNames[j]); }
-                        
-                        otuCount++;
-                    }else {
-                        //sanity check
-                        if (nums.size() != 3) { m->mothurOut("[ERROR]: trouble parsing OTU data.\n"); m->control_pressed = true; }
-                        
-                        //nums contains [otuNum, sampleNum, abundance]
-                        lookup[nums[1]]->set(nums[0], nums[2], groupNames[nums[1]]);
-                    }
-                    nums.clear();
-                }
-                
-                if (inBrackets) {
-                    if (c == ',') {
-                        int temp;
-                        m->mothurConvert(num, temp);
-                        nums.push_back(temp);
-                        num = "";
-                    }else { if (!isspace(c)) { num += c; }  }
-                }
-            }
-        }
-        
         SharedUtil util;
         
                bool remove = false;
@@ -628,7 +664,7 @@ int SharedCommand::getDims(string line, int& shapeNumRows, int& shapeNumCols) {
        }
 }
 //**********************************************************************************************************************
-vector<string> SharedCommand::readRows(string line, ifstream& in, int& numRows) {
+vector<string> SharedCommand::readRows(string line, int& numRows) {
        try {
         /*"rows":[
          {"id":"Otu01", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Porphyromonadaceae", "unclassified"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
@@ -679,48 +715,6 @@ vector<string> SharedCommand::readRows(string line, ifstream& in, int& numRows)
             }
         }
         
-        //keep reading
-        if (!end) {
-            while (!in.eof()) {
-                
-                if (m->control_pressed) { break; }
-                
-                char c = in.get(); m->gobble(in);
-                
-                if (c == '[')               { countOpenBrace++;     }
-                else if (c == ']')          { countClosedBrace++;   }
-                else if (c == '{')          { openParen++;          }
-                else if (c == '}')          { closeParen++;         }
-                else if (openParen != 0)    { nextRow += c;         }  //you are reading the row info
-                
-                
-                //you have reached the end of the rows info
-                if ((countOpenBrace == countClosedBrace) && (countClosedBrace != 0)) { end = true; break; }
-                if ((openParen == closeParen) && (closeParen != 0)) { //process row 
-                    numRows++;
-                    vector<string> items;
-                    m->splitAtChar(nextRow, items, ','); //parse by comma, will return junk for metadata but we aren't using that anyway
-                    string part = items[0]; items.clear();
-                    m->splitAtChar(part, items, ':'); //split part we want containing the ids
-                    string name = items[1];
-                    
-                    //remove "" if needed
-                    int pos = name.find("\"");
-                    if (pos != string::npos) {
-                        string newName = "";
-                        for (int k = 0; k < name.length(); k++) {
-                            if (name[k] != '\"') { newName += name[k]; }
-                        }
-                        name = newName;
-                    }
-                    names.push_back(name);
-                    nextRow = "";
-                    openParen = 0;
-                    closeParen = 0;
-                }  
-            }
-        }
-        
         return names;
     }
        catch(exception& e) {
@@ -729,7 +723,7 @@ vector<string> SharedCommand::readRows(string line, ifstream& in, int& numRows)
        }
 }
 //**********************************************************************************************************************
-//designed for things like "type": "OTU table", returns map type -> OTU table
+//designed for things like "type": "OTU table", returns type 
 string SharedCommand::getTag(string& line) {
        try {
         bool inQuotes = false;
@@ -762,18 +756,29 @@ int SharedCommand::createSharedFromListGroup(string filename) {
         ofstream out;
         m->openOutputFile(filename, out);
         
-        GroupMap* groupMap = new GroupMap(groupfile);
+        GroupMap* groupMap = NULL;
+        CountTable* countTable = NULL;
+        if (groupfile != "") {
+            groupMap = new GroupMap(groupfile);
+        
+            int groupError = groupMap->readMap();
+            if (groupError == 1) { delete groupMap; return 0; }
+            vector<string> allGroups = groupMap->getNamesOfGroups();
+            m->setAllGroups(allGroups);
+        }else{
+            countTable = new CountTable();
+            countTable->readTable(countfile);
+        }
         
-        int groupError = groupMap->readMap();
-        if (groupError == 1) { delete groupMap; return 0; }
-        vector<string> allGroups = groupMap->getNamesOfGroups();
-        m->setAllGroups(allGroups);
+        if (m->control_pressed) { return 0; }
         
         pickedGroups = false;
         
         //if hte user has not specified any groups then use them all
         if (Groups.size() == 0) {
-            Groups = groupMap->getNamesOfGroups(); m->setGroups(Groups);
+            if (groupfile != "") { Groups = groupMap->getNamesOfGroups();  }
+            else {  Groups = countTable->getNamesOfGroups();  }
+            m->setGroups(Groups);
         }else { pickedGroups = true; }
         
         //fill filehandles with neccessary ofstreams
@@ -805,7 +810,7 @@ int SharedCommand::createSharedFromListGroup(string filename) {
         vector<SharedRAbundVector*> lookup; 
         
         if (m->control_pressed) { 
-            delete SharedList; delete groupMap; 
+            delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
             for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
             out.close(); m->mothurRemove(filename); 
             for (int i=0; i<Groups.size(); i++) {  m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund")));             }
@@ -813,31 +818,27 @@ int SharedCommand::createSharedFromListGroup(string filename) {
         }
         
         //sanity check
-        vector<string> groupMapNamesSeqs = groupMap->getNamesSeqs();
-        int error = ListGroupSameSeqs(groupMapNamesSeqs, SharedList);
+        vector<string> namesSeqs;
+        int numGroupNames = 0;
+        if (m->groupMode == "group") { namesSeqs = groupMap->getNamesSeqs(); numGroupNames = groupMap->getNumSeqs(); }
+        else { namesSeqs = countTable->getNamesOfSeqs(); numGroupNames = countTable->getNumUniqueSeqs(); }
+        int error = ListGroupSameSeqs(namesSeqs, SharedList);
         
-        if ((!pickedGroups) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) {  //if the user has not specified any groups and their files don't match exit with error
-            m->mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine(); 
+        if ((!pickedGroups) && (SharedList->getNumSeqs() != numGroupNames)) {  //if the user has not specified any groups and their files don't match exit with error
+            m->mothurOut("Your group file contains " + toString(numGroupNames) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine(); 
             
-            out.close();
-            m->mothurRemove(filename); //remove blank shared file you made
-            
-            createMisMatchFile(SharedList, groupMap);
+            out.close(); m->mothurRemove(filename); //remove blank shared file you made
             
             //delete memory
-            for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
-                delete it3->second;
-            }
-            
-            delete SharedList; delete groupMap; 
-            
+            for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
+            delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
             return 0; 
         }
         
         if (error == 1) { m->control_pressed = true; }
         
         //if user has specified groups make new groupfile for them
-        if (pickedGroups) { //make new group file
+        if ((pickedGroups) && (m->groupMode == "group")) { //make new group file
             string groups = "";
             if (m->getNumGroups() < 4) {
                 for (int i = 0; i < m->getNumGroups(); i++) {
@@ -868,7 +869,7 @@ int SharedCommand::createSharedFromListGroup(string filename) {
         
         while((SharedList != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
             if (m->control_pressed) { 
-                delete SharedList; delete groupMap;
+                delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
                 out.close(); m->mothurRemove(filename); 
                 for (int i=0; i<Groups.size(); i++) {  m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund")));         }
@@ -885,7 +886,7 @@ int SharedCommand::createSharedFromListGroup(string filename) {
                 }
                 
                 if (m->control_pressed) { 
-                    delete SharedList; delete groupMap; 
+                    delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                     for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
                     for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
                     out.close(); m->mothurRemove(filename); 
@@ -915,7 +916,7 @@ int SharedCommand::createSharedFromListGroup(string filename) {
                 
                 
                 if (m->control_pressed) { 
-                    delete SharedList; delete groupMap; 
+                    delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                     for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
                     for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
                     out.close(); m->mothurRemove(filename); 
@@ -962,7 +963,7 @@ int SharedCommand::createSharedFromListGroup(string filename) {
             }
             
             if (m->control_pressed) { 
-                delete groupMap;
+                if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;   }
                 out.close(); m->mothurRemove(filename); 
                 for (int i=0; i<Groups.size(); i++) {  m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund")));         }
@@ -981,7 +982,7 @@ int SharedCommand::createSharedFromListGroup(string filename) {
             delete it3->second;
         }
         
-        delete groupMap;
+        if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                
         if (m->control_pressed) { 
             m->mothurRemove(filename); 
@@ -1061,104 +1062,6 @@ void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup, ofst
        }
 }
 //**********************************************************************************************************************
-int SharedCommand::createMisMatchFile(SharedListVector* SharedList, GroupMap* groupMap) {
-       try {
-               ofstream outMisMatch;
-               string outputMisMatchName = outputDir + m->getRootName(m->getSimpleName(listfile));
-               
-               //you have sequences in your list file that are not in your group file
-               if (SharedList->getNumSeqs() > groupMap->getNumSeqs()) { 
-                       outputMisMatchName += "missing.group";
-                       m->mothurOut("For a list of names that are in your list file and not in your group file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
-                       
-                       m->openOutputFile(outputMisMatchName, outMisMatch);
-                       
-                       set<string> listNames;
-                       set<string>::iterator itList;
-                       
-                       //go through list and if group returns "not found" output it
-                       for (int i = 0; i < SharedList->getNumBins(); i++) {
-                               if (m->control_pressed) { outMisMatch.close(); m->mothurRemove(outputMisMatchName); return 0; } 
-                       
-                               string names = SharedList->get(i); 
-                               
-                vector<string> binNames;
-                m->splitAtComma(names, binNames);
-                
-                               for (int j = 0; j < binNames.size(); j++) { 
-                                       string name = binNames[j];
-                                       string group = groupMap->getGroup(name);
-                                       
-                                       if(group == "not found") {      outMisMatch << name << endl;  }
-                                       
-                                       itList = listNames.find(name);
-                                       if (itList != listNames.end()) {  m->mothurOut(name + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
-                                       else { listNames.insert(name); }
-                               }
-                       }
-                       
-                       outMisMatch.close();
-                       
-               
-               }else {//you have sequences in your group file that are not in you list file
-                       
-                       outputMisMatchName += "missing.name";
-                       m->mothurOut("For a list of names that are in your group file and not in your list file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
-                       
-                       map<string, string> namesInList;
-                       map<string, string>::iterator itList;
-                       
-                       //go through listfile and get names
-                       for (int i = 0; i < SharedList->getNumBins(); i++) {
-                               if (m->control_pressed) {  return 0; } 
-
-                               
-                               string names = SharedList->get(i); 
-               
-                               vector<string> binNames;
-                m->splitAtComma(names, binNames);
-                
-                               for (int j = 0; j < binNames.size(); j++) { 
-
-                                       string name = binNames[j];
-                                       
-                                       itList = namesInList.find(name);
-                                       if (itList != namesInList.end()) {  m->mothurOut(name + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
-
-                                       namesInList[name] = name;
-                                       
-                               }
-                       }
-                       
-                       //get names of sequences in groupfile
-                       vector<string> seqNames = groupMap->getNamesSeqs();
-               
-                       map<string, string>::iterator itMatch;
-                       
-                       m->openOutputFile(outputMisMatchName, outMisMatch);
-                       
-                       //loop through names in seqNames and if they aren't in namesIn list output them
-                       for (int i = 0; i < seqNames.size(); i++) {
-                               if (m->control_pressed) { outMisMatch.close(); m->mothurRemove(outputMisMatchName); return 0; } 
-                               
-                               itMatch = namesInList.find(seqNames[i]);
-                               
-                               if (itMatch == namesInList.end()) {
-                               
-                                       outMisMatch << seqNames[i] << endl; 
-                               }
-                       }               
-                       outMisMatch.close();
-               }
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SharedCommand", "createMisMatchFile");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
 int SharedCommand::ListGroupSameSeqs(vector<string>& groupMapsSeqs, SharedListVector* SharedList) {
        try {
                int error = 0; 
index 1100c3d30afc344119538e89e7c40a8dfa839578..9d9c194dbbaab4ff7a079058c1c43421937659fe 100644 (file)
@@ -40,7 +40,6 @@ public:
        
 private:
        void printSharedData(vector<SharedRAbundVector*>, ofstream&);
-       int createMisMatchFile(SharedListVector*, GroupMap*);
        int readOrderFile();
        bool isValidGroup(string, vector<string>);
        int eliminateZeroOTUS(vector<SharedRAbundVector*>&);
@@ -48,13 +47,13 @@ private:
     int createSharedFromListGroup(string);
     int createSharedFromBiom(string);
     string getTag(string&);
-    vector<string> readRows(string, ifstream&, int&); 
+    vector<string> readRows(string, int&); 
     int getDims(string, int&, int&);
-    vector<SharedRAbundVector*> readData(string, string, ifstream&, vector<string>&, int);
+    vector<SharedRAbundVector*> readData(string, string, string, vector<string>&, int);
        
        vector<string> Groups, outputNames, order;
        set<string> labels;
-       string fileroot, outputDir, listfile, groupfile, biomfile, ordergroupfile;
+       string fileroot, outputDir, listfile, groupfile, biomfile, ordergroupfile, countfile;
        bool firsttime, pickedGroups, abort, allLines;
        map<string, ofstream*> filehandles;
        map<string, ofstream*>::iterator it3;
index 6dfcb97f0aff013d15c1c641e93a24b0d7b229a3..2cecb5de49d2aca77c33f21b77d584d6c15f893e 100644 (file)
 
 /***********************************************************************/
 
-SharedListVector::SharedListVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0){ groupmap = NULL; }
+SharedListVector::SharedListVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0){ groupmap = NULL; countTable = NULL; }
 
 /***********************************************************************/
 
-SharedListVector::SharedListVector(int n):     DataVector(), data(n, "") , maxRank(0), numBins(0), numSeqs(0){ groupmap = NULL; }
+SharedListVector::SharedListVector(int n):     DataVector(), data(n, "") , maxRank(0), numBins(0), numSeqs(0){ groupmap = NULL; countTable = NULL; }
 
 /***********************************************************************/
 SharedListVector::SharedListVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) {
        try {
                //set up groupmap for later.
-               groupmap = new GroupMap(m->getGroupFile());
-               groupmap->readMap(); 
+        if (m->groupMode == "group") {
+            groupmap = new GroupMap(m->getGroupFile());
+            groupmap->readMap(); 
+        }else {
+            countTable = new CountTable();
+            countTable->readTable(m->getCountTableFile());
+        }
 
                int hold;
                string inputData;
@@ -188,27 +193,34 @@ SAbundVector SharedListVector::getSAbundVector(){
 /***********************************************************************/
 SharedOrderVector* SharedListVector::getSharedOrderVector(){
        try {
-               string groupName, names, name;
-       
                SharedOrderVector* order = new SharedOrderVector();
                order->setLabel(label);
        
                for(int i=0;i<numBins;i++){
                        int binSize = m->getNumNames(get(i));   //find number of individual in given bin        
-                       names = get(i);
-                       while (names.find_first_of(',') != -1) { 
-                               name = names.substr(0,names.find_first_of(','));
-                               names = names.substr(names.find_first_of(',')+1, names.length());
-                               groupName = groupmap->getGroup(name);
-                               
-                               if(groupName == "not found") {  m->mothurOut("Error: Sequence '" + name + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
+                       string names = get(i);
+            vector<string> binNames;
+            m->splitAtComma(names, binNames);
+            if (m->groupMode != "group") {
+                binSize = 0;
+                for (int j = 0; j < binNames.size(); j++) {  binSize += countTable->getNumSeqs(binNames[i]);  }
+            }
+                       for (int j = 0; j < binNames.size(); j++) { 
+                if (m->control_pressed) { return order; }
+                if (m->groupMode == "group") {
+                    string groupName = groupmap->getGroup(binNames[i]);
+                    if(groupName == "not found") {     m->mothurOut("Error: Sequence '" + binNames[i] + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
                                
-                               order->push_back(i, binSize, groupName);  //i represents what bin you are in
+                    order->push_back(i, binSize, groupName);  //i represents what bin you are in
+                }else {
+                    vector<int> groupAbundances = countTable->getGroupCounts(binNames[i]);
+                    vector<string> groupNames = countTable->getNamesOfGroups();
+                    for (int k = 0; k < groupAbundances.size(); k++) { //groupAbundances.size() == 0 if there is a file mismatch and m->control_pressed is true.
+                        if (m->control_pressed) { return order; }
+                        for (int l = 0; l < groupAbundances[k]; l++) {  order->push_back(i, binSize, groupNames[k]);  }
+                    }
+                }
                        }
-                       //get last name
-                       groupName = groupmap->getGroup(names);
-                       if(groupName == "not found") {  m->mothurOut("Error: Sequence '" + names + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
-                       order->push_back(i, binSize, groupName);
                }
 
                random_shuffle(order->begin(), order->end());
@@ -225,25 +237,23 @@ SharedOrderVector* SharedListVector::getSharedOrderVector(){
 SharedRAbundVector SharedListVector::getSharedRAbundVector(string groupName) {
        try {
                SharedRAbundVector rav(data.size());
-               string group, names, name;
                
                for(int i=0;i<numBins;i++){
-                       names = get(i);  
-                       while (names.find_first_of(',') != -1) { 
-                               name = names.substr(0,names.find_first_of(','));
-                               names = names.substr(names.find_first_of(',')+1, names.length());
-                               group = groupmap->getGroup(name);
-                               if(group == "not found") {      m->mothurOut("Error: Sequence '" + name + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
-                               if (group == groupName) { //this name is in the group you want the vector for.
-                                       rav.set(i, rav.getAbundance(i) + 1, group);  //i represents what bin you are in
-                               }
-                       }
-                       
-                       //get last name
-                       groupName = groupmap->getGroup(names);
-                       if(groupName == "not found") {  m->mothurOut("Error: Sequence '" + names + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
-                       if (group == groupName) { //this name is in the group you want the vector for.
-                                       rav.set(i, rav.getAbundance(i) + 1, group);  //i represents what bin you are in
+                       string names = get(i);
+            vector<string> binNames;
+            m->splitAtComma(names, binNames);
+            for (int j = 0; j < binNames.size(); j++) { 
+                               if (m->control_pressed) { return rav; }
+                if (m->groupMode == "group") {
+                    string group = groupmap->getGroup(binNames[j]);
+                    if(group == "not found") { m->mothurOut("Error: Sequence '" + binNames[j] + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
+                    if (group == groupName) { //this name is in the group you want the vector for.
+                        rav.set(i, rav.getAbundance(i) + 1, group);  //i represents what bin you are in
+                    }
+                }else {
+                    int count = countTable->getGroupCount(binNames[j], groupName);
+                    rav.set(i, rav.getAbundance(i) + count, groupName);
+                }
                        }
                }
                
@@ -264,11 +274,13 @@ vector<SharedRAbundVector*> SharedListVector::getSharedRAbundVector() {
                SharedUtil* util;
                util = new SharedUtil();
                vector<SharedRAbundVector*> lookup;  //contains just the groups the user selected
+        vector<SharedRAbundVector*> lookupDelete;
                map<string, SharedRAbundVector*> finder;  //contains all groups in groupmap
-               string group, names, name;
                
                vector<string> Groups = m->getGroups();
-               vector<string> allGroups = groupmap->getNamesOfGroups();
+        vector<string> allGroups;
+               if (m->groupMode == "group") {  allGroups = groupmap->getNamesOfGroups();  }
+        else {  allGroups = countTable->getNamesOfGroups();  }
                util->setGroups(Groups, allGroups);
                m->setGroups(Groups);
                delete util;
@@ -280,47 +292,31 @@ vector<SharedRAbundVector*> SharedListVector::getSharedRAbundVector() {
                        finder[allGroups[i]]->setGroup(allGroups[i]);
                        if (m->inUsersGroups(allGroups[i], m->getGroups())) {  //if this group is in user groups
                                lookup.push_back(finder[allGroups[i]]);
-                       }
+                       }else {
+                lookupDelete.push_back(finder[allGroups[i]]);
+            }
                }
        
                //fill vectors
                for(int i=0;i<numBins;i++){
-                       names = get(i);  
-                       int nameLength = names.size();
-                       string seqName = "";
-                       
-                       for(int j=0;j<nameLength;j++){
-                               if(names[j] == ','){
-                                       group = groupmap->getGroup(seqName);
-                                       if(group == "not found") {      m->mothurOut("Error: Sequence '" + seqName + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
-                                       finder[group]->set(i, finder[group]->getAbundance(i) + 1, group);  //i represents what bin you are in
-                                       
-                                       seqName = "";
-                               }
-                               else{
-                                       seqName += names[j];
-                               }
+                       string names = get(i);  
+                       vector<string> binNames;
+            m->splitAtComma(names, binNames);
+            for (int j = 0; j < binNames.size(); j++) { 
+                if (m->groupMode == "group") {
+                    string group = groupmap->getGroup(binNames[j]);
+                    if(group == "not found") { m->mothurOut("Error: Sequence '" + binNames[j] + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
+                    finder[group]->set(i, finder[group]->getAbundance(i) + 1, group);  //i represents what bin you are in      
+                }else{
+                    vector<int> counts = countTable->getGroupCounts(binNames[j]);
+                    for (int k = 0; k < allGroups.size(); k++) {
+                        finder[allGroups[k]]->set(i, finder[allGroups[k]]->getAbundance(i) + counts[k], allGroups[k]);
+                    }
+                }
                        }
-                       group = groupmap->getGroup(seqName);
-                       if(group == "not found") {      m->mothurOut("Error: Sequence '" + seqName + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
-                       finder[group]->set(i, finder[group]->getAbundance(i) + 1, group);  //i represents what bin you are in
-                       
-                       
-                       
-//                     while (names.find_first_of(',') != -1) { 
-//                             name = names.substr(0,names.find_first_of(','));
-//                             names = names.substr(names.find_first_of(',')+1, names.length());
-//                             group = groupmap->getGroup(name);
-//                             if(group == "not found") {      m->mothurOut("Error: Sequence '" + name + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
-//                             finder[group]->set(i, finder[group]->getAbundance(i) + 1, group);  //i represents what bin you are in
-//                     }
-                       
-                       //get last name
-//                     group = groupmap->getGroup(names);
-//                     if(group == "not found") {      m->mothurOut("Error: Sequence '" + names + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
-//                     finder[group]->set(i, finder[group]->getAbundance(i) + 1, group);  //i represents what bin you are in
-                       
                }
+        
+        for (int j = 0; j < lookupDelete.size(); j++) {  delete lookupDelete[j];  }
 
                return lookup;
        }
@@ -355,7 +351,14 @@ OrderVector SharedListVector::getOrderVector(map<string,int>* orderMap = NULL){
                        OrderVector ov;
                
                        for(int i=0;i<data.size();i++){
-                               int binSize = m->getNumNames(data[i]);          
+                string names = data[i];
+                vector<string> binNames;
+                m->splitAtComma(names, binNames);
+                               int binSize = binNames.size();  
+                if (m->groupMode != "group") {
+                    binSize = 0;
+                    for (int j = 0; j < binNames.size(); j++) {  binSize += countTable->getNumSeqs(binNames[i]);  }
+                }
                                for(int j=0;j<binSize;j++){
                                        ov.push_back(i);
                                }
@@ -372,31 +375,15 @@ OrderVector SharedListVector::getOrderVector(map<string,int>* orderMap = NULL){
                
                        for(int i=0;i<data.size();i++){
                                string listOTU = data[i];
-                               int length = listOTU.size();
-                               
-                               string seqName="";
-                       
-                               for(int j=0;j<length;j++){
-                               
-                                       if(listOTU[j] != ','){
-                                               seqName += listOTU[j];
-                                       }
-                                       else{
-                                               if(orderMap->count(seqName) == 0){
-                                                       m->mothurOut(seqName + " not found, check *.names file\n");
-                                                       exit(1);
-                                               }
-                                       
-                                               ov.set((*orderMap)[seqName], i);
-                                               seqName = "";
-                                       }                                               
-                               }
-                       
-                               if(orderMap->count(seqName) == 0){
-                                       m->mothurOut(seqName + " not found, check *.names file\n");
-                                       exit(1);
+                               vector<string> binNames;
+                m->splitAtComma(listOTU, binNames);
+                for (int j = 0; j < binNames.size(); j++) { 
+                    if(orderMap->count(binNames[j]) == 0){
+                        m->mothurOut(binNames[j] + " not found, check *.names file\n");
+                        exit(1);
+                    }
+                    ov.set((*orderMap)[binNames[j]], i);
                                }
-                               ov.set((*orderMap)[seqName], i);        
                        }
                
                        ov.setLabel(label);
index 56ea802a1b1bca511454f83ddcb9e8f29bd5ed27..81779257346d4b25dbecf092a6c730162d7d4991 100644 (file)
@@ -12,6 +12,7 @@
 
 #include "datavector.hpp"
 #include "groupmap.h"
+#include "counttable.h"
 #include "sharedrabundvector.h"
 #include "sharedsabundvector.h"
 
@@ -32,8 +33,8 @@ public:
        SharedListVector();
        SharedListVector(int);
        SharedListVector(ifstream&);
-       SharedListVector(const SharedListVector& lv) : DataVector(lv.label), data(lv.data), maxRank(lv.maxRank), numBins(lv.numBins), numSeqs(lv.numSeqs){ groupmap = NULL; };
-       ~SharedListVector(){ if (groupmap != NULL) { delete groupmap; } };
+       SharedListVector(const SharedListVector& lv) : DataVector(lv.label), data(lv.data), maxRank(lv.maxRank), numBins(lv.numBins), numSeqs(lv.numSeqs){ groupmap = NULL; countTable = NULL; };
+       ~SharedListVector(){ if (groupmap != NULL) { delete groupmap; } if (countTable != NULL) { delete countTable; } };
        
        int getNumBins()                                                        {       return numBins;         }
        int getNumSeqs()                                                        {       return numSeqs;         }
@@ -58,6 +59,7 @@ public:
 private:
        vector<string> data;  //data[i] is a list of names of sequences in the ith OTU.
        GroupMap* groupmap;
+    CountTable* countTable;
        int maxRank;
        int numBins;
        int numSeqs;