]> git.donarmstrong.com Git - mothur.git/commitdiff
Merge branch 'master' of https://github.com/mothur/mothur.git into development
authorKathryn Iverson <kd.iverson@gmail.com>
Tue, 3 Jul 2012 16:40:45 +0000 (12:40 -0400)
committerKathryn Iverson <kd.iverson@gmail.com>
Tue, 3 Jul 2012 16:40:45 +0000 (12:40 -0400)
30 files changed:
Mothur.xcodeproj/project.pbxproj
commandfactory.cpp
countseqscommand.cpp
countseqscommand.h
counttable.cpp [new file with mode: 0644]
counttable.h [new file with mode: 0644]
createdatabasecommand.cpp
createdatabasecommand.h
distancecommand.cpp
getcoremicrobiomecommand.cpp
getcurrentcommand.cpp
getotulabelscommand.cpp
makecontigscommand.cpp
makefile
mothurout.cpp
mothurout.h
nameassignment.cpp
optionparser.cpp
pairwiseseqscommand.cpp
rarefactsharedcommand.cpp
rarefactsharedcommand.h
removeotulabelscommand.cpp
sharedcommand.cpp
sharedcommand.h
sharedlistvector.cpp
sharedlistvector.h
shhhercommand.cpp
unifracunweightedcommand.cpp
unifracweightedcommand.cpp
venncommand.cpp

index 6db66c6184bc3798ee4d8ba2fc7247a9feb6acb4..13f12e072b616b5337cd5bece8f3c68c44ca6a3d 100644 (file)
@@ -26,6 +26,7 @@
                A73DDC3813C4BF64006AAE38 /* mothurmetastats.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73DDC3713C4BF64006AAE38 /* mothurmetastats.cpp */; };
                A74A9A9F148E881E00AB5E3E /* spline.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74A9A9E148E881E00AB5E3E /* spline.cpp */; };
                A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */; };
+               A74D59A4159A1E2000043046 /* counttable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D59A3159A1E2000043046 /* counttable.cpp */; };
                A754149714840CF7005850D1 /* summaryqualcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A754149614840CF7005850D1 /* summaryqualcommand.cpp */; };
                A75790591301749D00A30DAB /* homovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75790581301749D00A30DAB /* homovacommand.cpp */; };
                A76CDD821510F143004C8458 /* prcseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A76CDD811510F143004C8458 /* prcseqscommand.cpp */; };
                A74A9A9E148E881E00AB5E3E /* spline.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = spline.cpp; sourceTree = "<group>"; };
                A74D36B6137DAFAA00332B0C /* chimerauchimecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimerauchimecommand.h; sourceTree = "<group>"; };
                A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimerauchimecommand.cpp; sourceTree = "<group>"; };
+               A74D59A3159A1E2000043046 /* counttable.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = counttable.cpp; sourceTree = "<group>"; };
+               A74D59A6159A1E3600043046 /* counttable.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = counttable.h; sourceTree = "<group>"; };
                A754149514840CF7005850D1 /* summaryqualcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = summaryqualcommand.h; sourceTree = "<group>"; };
                A754149614840CF7005850D1 /* summaryqualcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = summaryqualcommand.cpp; sourceTree = "<group>"; };
                A75790571301749D00A30DAB /* homovacommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = homovacommand.h; sourceTree = "<group>"; };
                                A7E9B66312D37EC400DA6239 /* blastalign.hpp */,
                                A7E9B66412D37EC400DA6239 /* blastdb.cpp */,
                                A7E9B66512D37EC400DA6239 /* blastdb.hpp */,
+                               A74D59A6159A1E3600043046 /* counttable.h */,
+                               A74D59A3159A1E2000043046 /* counttable.cpp */,
+                               A7E9B6CD12D37EC400DA6239 /* distancedb.cpp */,
                                A7E9B6BD12D37EC400DA6239 /* database.cpp */,
                                A7E9B6BE12D37EC400DA6239 /* database.hpp */,
                                A7E9B6BF12D37EC400DA6239 /* datavector.hpp */,
-                               A7E9B6CD12D37EC400DA6239 /* distancedb.cpp */,
                                A7E9B6CE12D37EC400DA6239 /* distancedb.hpp */,
                                A7E9B6DE12D37EC400DA6239 /* fastamap.cpp */,
                                A7E9B6DF12D37EC400DA6239 /* fastamap.h */,
                                A70056E6156A93D000924A2D /* getotulabelscommand.cpp in Sources */,
                                A70056EB156AB6E500924A2D /* removeotulabelscommand.cpp in Sources */,
                                A73901081588C40900ED2ED6 /* loadlogfilecommand.cpp in Sources */,
+                               A74D59A4159A1E2000043046 /* counttable.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
                                GCC_OPTIMIZATION_LEVEL = 0;
                                GCC_PREPROCESSOR_DEFINITIONS = (
                                        "MOTHUR_FILES=\"\\\"../release\\\"\"",
-                                       "VERSION=\"\\\"1.25.0\\\"\"",
-                                       "RELEASE_DATE=\"\\\"5/01/2012\\\"\"",
+                                       "VERSION=\"\\\"1.26.0\\\"\"",
+                                       "RELEASE_DATE=\"\\\"7/9/2012\\\"\"",
                                );
                                "GCC_VERSION[arch=*]" = "";
                                GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
                                GCC_MODEL_TUNING = "";
                                GCC_OPTIMIZATION_LEVEL = 0;
                                GCC_PREPROCESSOR_DEFINITIONS = (
-                                       "VERSION=\"\\\"1.25.0\\\"\"",
-                                       "RELEASE_DATE=\"\\\"4/30/2012\\\"\"",
+                                       "VERSION=\"\\\"1.26.0\\\"\"",
+                                       "RELEASE_DATE=\"\\\"7/9/2012\\\"\"",
                                );
                                GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
                                GCC_WARN_ABOUT_RETURN_TYPE = YES;
index 54ff4a12d120ddcc482d84876ac53a04f6e3a04a..02af6767b00b724459c054839b789798f4b295f7 100644 (file)
@@ -289,6 +289,7 @@ CommandFactory::CommandFactory(){
     commands["remove.otulabels"]    = "remove.otulabels";
     commands["make.contigs"]        = "make.contigs";
     commands["load.logfile"]        = "load.logfile";
+    commands["make.table"]          = "make.table";
        commands["quit"]                                = "MPIEnabled"; 
 
 }
@@ -482,7 +483,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "make.shared")                   {       command = new SharedCommand(optionString);                                      }
                else if(commandName == "get.commandinfo")               {       command = new GetCommandInfoCommand(optionString);                      }
                else if(commandName == "deunique.tree")                 {       command = new DeuniqueTreeCommand(optionString);                        }
-               else if(commandName == "count.seqs")                    {       command = new CountSeqsCommand(optionString);                           }
+               else if((commandName == "count.seqs") || (commandName == "make.table"))                 {       command = new CountSeqsCommand(optionString);                           }
                else if(commandName == "count.groups")                  {       command = new CountGroupsCommand(optionString);                         }
                else if(commandName == "clear.memory")                  {       command = new ClearMemoryCommand(optionString);                         }
                else if(commandName == "summary.tax")                   {       command = new SummaryTaxCommand(optionString);                          }
@@ -636,7 +637,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
                else if(commandName == "make.shared")                   {       pipecommand = new SharedCommand(optionString);                                  }
                else if(commandName == "get.commandinfo")               {       pipecommand = new GetCommandInfoCommand(optionString);                  }
                else if(commandName == "deunique.tree")                 {       pipecommand = new DeuniqueTreeCommand(optionString);                    }
-               else if(commandName == "count.seqs")                    {       pipecommand = new CountSeqsCommand(optionString);                               }
+               else if((commandName == "count.seqs") || (commandName == "make.table"))                 {       pipecommand = new CountSeqsCommand(optionString);                               }
                else if(commandName == "count.groups")                  {       pipecommand = new CountGroupsCommand(optionString);                             }
                else if(commandName == "clear.memory")                  {       pipecommand = new ClearMemoryCommand(optionString);                             }
                else if(commandName == "summary.tax")                   {       pipecommand = new SummaryTaxCommand(optionString);                              }
@@ -776,7 +777,7 @@ Command* CommandFactory::getCommand(string commandName){
                else if(commandName == "make.shared")                   {       shellcommand = new SharedCommand();                                     }
                else if(commandName == "get.commandinfo")               {       shellcommand = new GetCommandInfoCommand();                     }
                else if(commandName == "deunique.tree")                 {       shellcommand = new DeuniqueTreeCommand();                       }
-               else if(commandName == "count.seqs")                    {       shellcommand = new CountSeqsCommand();                          }
+               else if((commandName == "count.seqs") || (commandName == "make.table"))                 {       shellcommand = new CountSeqsCommand();                          }
                else if(commandName == "count.groups")                  {       shellcommand = new CountGroupsCommand();                        }
                else if(commandName == "clear.memory")                  {       shellcommand = new ClearMemoryCommand();                        }
                else if(commandName == "summary.tax")                   {       shellcommand = new SummaryTaxCommand();                         }
index 8a18fc755e66a04d6013ecbf7dd088821088501a..210dd9641a1736f8b0ea509f96b210dbbe3bd2e1 100644 (file)
@@ -16,6 +16,7 @@ vector<string> CountSeqsCommand::setParameters(){
        try {
                CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pname);
                CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
+        CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
                CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
@@ -33,11 +34,12 @@ vector<string> CountSeqsCommand::setParameters(){
 string CountSeqsCommand::getHelpString(){      
        try {
                string helpString = "";
-               helpString += "The count.seqs command reads a name file and outputs a .seq.count file.  You may also provide a group file to get the counts broken down by group.\n";
+               helpString += "The count.seqs aka. make.table command reads a name file and outputs a .count.table file.  You may also provide a group file to get the counts broken down by group.\n";
                helpString += "The groups parameter allows you to indicate which groups you want to include in the counts, by default all groups in your groupfile are used.\n";
+        helpString += "The large parameter indicates the name and group files are too large to fit in RAM.\n";
                helpString += "When you use the groups parameter and a sequence does not represent any sequences from the groups you specify it is not included in the .count.summary file.\n";
                helpString += "The count.seqs command should be in the following format: count.seqs(name=yourNameFile).\n";
-               helpString += "Example count.seqs(name=amazon.names).\n";
+               helpString += "Example count.seqs(name=amazon.names) or make.table(name=amazon.names).\n";
                helpString += "Note: No spaces between parameter labels (i.e. name), '=' and parameters (i.e.yourNameFile).\n";
                return helpString;
        }
@@ -56,7 +58,7 @@ string CountSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
         it = outputTypes.find(type);
         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
         else {
-            if (type == "summary") {  outputFileName =  "seq.count"; }
+            if (type == "counttable") {  outputFileName =  "count.table"; }
             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
         }
         return outputFileName;
@@ -72,7 +74,7 @@ CountSeqsCommand::CountSeqsCommand(){
                abort = true; calledHelp = true; 
                setParameters();
                vector<string> tempOutNames;
-               outputTypes["summary"] = tempOutNames;
+               outputTypes["counttable"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "CountSeqsCommand", "CountSeqsCommand");
@@ -104,7 +106,7 @@ CountSeqsCommand::CountSeqsCommand(string option)  {
                        
                        //initialize outputTypes
                        vector<string> tempOutNames;
-                       outputTypes["summary"] = tempOutNames;
+                       outputTypes["counttable"] = tempOutNames;
                        
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
@@ -146,6 +148,9 @@ CountSeqsCommand::CountSeqsCommand(string option)  {
                        groups = validParameter.validFile(parameters, "groups", false);                 
                        if (groups == "not found") { groups = "all"; }
                        m->splitAtDash(groups, Groups);
+            
+            string temp = validParameter.validFile(parameters, "large", false);                if (temp == "not found") {      temp = "F";     }
+                       large = m->isTrue(temp);
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(namefile);               }
@@ -165,12 +170,45 @@ int CountSeqsCommand::execute(){
                
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
-               ofstream out;
-               string outputFileName = outputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("summary");
-               m->openOutputFile(outputFileName, out); outputTypes["summary"].push_back(outputFileName);
-               out << "Representative_Sequence\ttotal\t";
+               string outputFileName = outputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("counttable");
+               
+        int total = 0;
+        if (!large) { total = processSmall(outputFileName); }
+        else { total = processLarge(outputFileName);  }
+                               
+               if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; }
+               
+        //set rabund file as new current rabundfile
+               itTypes = outputTypes.find("counttable");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { string current = (itTypes->second)[0]; m->setCountTableFile(current); }
+               }
+        
+        m->mothurOutEndLine();
+               m->mothurOut("Total number of sequences: " + toString(total)); m->mothurOutEndLine();
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Name: "); m->mothurOutEndLine();
+               m->mothurOut(outputFileName); m->mothurOutEndLine();    
+               m->mothurOutEndLine();
                
-               GroupMap* groupMap;
+               return 0;               
+       }
+       
+       catch(exception& e) {
+               m->errorOut(e, "CountSeqsCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int CountSeqsCommand::processSmall(string outputFileName){
+       try {
+        ofstream out;
+        m->openOutputFile(outputFileName, out); outputTypes["counttable"].push_back(outputFileName);
+        outputNames.push_back(outputFileName); outputTypes["counttable"].push_back(outputFileName);
+               out << "Representative_Sequence\ttotal\t";
+        
+        GroupMap* groupMap;
                if (groupfile != "") { 
                        groupMap = new GroupMap(groupfile); groupMap->readMap(); 
                        
@@ -199,7 +237,7 @@ int CountSeqsCommand::execute(){
                        if (m->control_pressed) { break; }
                        
                        string firstCol, secondCol;
-                       in >> firstCol >> secondCol; m->gobble(in);
+                       in >> firstCol; m->gobble(in); in >> secondCol; m->gobble(in);
                        
                        vector<string> names;
                        m->splitAtChar(secondCol, names, ',');
@@ -240,24 +278,241 @@ int CountSeqsCommand::execute(){
                        total += names.size();
                }
                in.close();
+        out.close();
                
                if (groupfile != "") { delete groupMap; }
+
+        return total;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountSeqsCommand", "processSmall");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int CountSeqsCommand::processLarge(string outputFileName){
+       try {
+        set<string> namesOfGroups;
+        map<string, int> initial;
+        for (set<string>::iterator it = namesOfGroups.begin(); it != namesOfGroups.end(); it++) { initial[(*it)] = 0;  }
+        ofstream out;
+        m->openOutputFile(outputFileName, out); 
+        outputNames.push_back(outputFileName); outputTypes["counttable"].push_back(outputFileName);
+               out << "Representative_Sequence\ttotal\t";
+        if (groupfile == "") { out << endl; }
+        
+        map<string, unsigned long long> namesToIndex;
+        string outfile = m->getRootName(groupfile) + "sorted.groups.temp";
+        string outName = m->getRootName(namefile) + "sorted.name.temp";
+        map<int, string> indexToName;
+        map<int, string> indexToGroup;
+        if (groupfile != "") { 
+            time_t estart = time(NULL);
+            //convert name file to redundant -> unique.  set unique name equal to index so we can use vectors, save name for later.
+            string newNameFile = m->getRootName(namefile) + ".name.temp";
+            string newGroupFile = m->getRootName(groupfile) + ".group.temp";
+            indexToName = processNameFile(newNameFile);
+            indexToGroup = getGroupNames(newGroupFile, namesOfGroups);
+            
+            //sort file by first column so the names of sequences will be easier to find
+            //use the unix sort 
+            #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                string command = "sort -n " + newGroupFile + " -o " + outfile;
+                system(command.c_str());
+                command = "sort -n " + newNameFile + " -o " + outName;
+                system(command.c_str());
+            #else //sort using windows sort
+                string command = "sort " + newGroupFile + " /O " + outfile;
+                system(command.c_str());
+                command = "sort " + newNameFile + " /O " + outName;
+                system(command.c_str());
+            #endif
+            m->mothurRemove(newNameFile);
+            m->mothurRemove(newGroupFile);
+            
+            m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to sort and index the group and name files. "); m->mothurOutEndLine();
+        }else { outName = namefile; }
+         
+        time_t estart = time(NULL);
+        //open input file
+               ifstream in;
+               m->openInputFile(outName, in);
+        
+        //open input file
+               ifstream in2;
                
-               if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; }
+               int total = 0;
+        vector< vector<int> > nameMapCount;
+        if (groupfile != "") {
+            m->openInputFile(outfile, in2);
+            nameMapCount.resize(indexToName.size());
+            for (int i = 0; i < nameMapCount.size(); i++) {
+                nameMapCount[i].resize(indexToGroup.size(), 0);
+            }
+        }
+        
+               while (!in.eof()) {
+                       if (m->control_pressed) { break; }
+                       
+                       string firstCol;
+                       in >> firstCol;  m->gobble(in);
+                       
+                       if (groupfile != "") {
+                int uniqueIndex;
+                in >> uniqueIndex; m->gobble(in);
+                
+                string name; int groupIndex;
+                in2 >> name >> groupIndex; m->gobble(in2);
+                
+                if (name != firstCol) { m->mothurOut("[ERROR]: found " + name + " in your groupfile, but " + firstCol + " was in your namefile, please correct.\n"); m->control_pressed = true; }
+                
+                nameMapCount[uniqueIndex][groupIndex]++;
+                total++;
+            }else { 
+                string secondCol;
+                in >> secondCol; m->gobble(in);
+                int num = m->getNumNames(secondCol);
+                out << firstCol << '\t' << num << endl;
+                total += num;
+            }
+               }
+               in.close();
+        
+        if (groupfile != "") {
+            m->mothurRemove(outfile);
+            m->mothurRemove(outName);
+            in2.close();
+            for (map<int, string>::iterator it = indexToGroup.begin(); it != indexToGroup.end(); it++) { out << it->second << '\t';  }
+            out << endl;
+            for (int i = 0; i < nameMapCount.size(); i++) {
+                string totalsLine = "";
+                int seqTotal = 0;
+                for (int j = 0; j < nameMapCount[i].size(); j++) {
+                    seqTotal += nameMapCount[i][j];
+                    totalsLine += toString(nameMapCount[i][j]) + '\t';
+                }
+                out << indexToName[i] << '\t' << seqTotal << '\t' << totalsLine << endl;
+            }
+        }
+        
+        out.close();
+        
+        m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to create the count table file. "); m->mothurOutEndLine();
+        
+        return total;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountSeqsCommand", "processLarge");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+map<int, string> CountSeqsCommand::processNameFile(string name) {
+       try {
+        map<int, string> indexToNames;
+        
+        ofstream out;
+        m->openOutputFile(name, out);
+        
+        //open input file
+               ifstream in;
+               m->openInputFile(namefile, in);
+        
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
+        string firstCol, secondCol;
+        int count = 0;
+        
+               while (!in.eof()) {
+                       if (m->control_pressed) { break; }
+                       
+            in.read(buffer, 4096);
+            vector<string> pieces = m->splitWhiteSpace(rest, buffer, in.gcount());
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    //parse names into vector
+                    vector<string> theseNames;
+                    m->splitAtComma(secondCol, theseNames);
+                    for (int i = 0; i < theseNames.size(); i++) {  out << theseNames[i] << '\t' << count << endl;  }
+                    indexToNames[count] = firstCol;
+                    pairDone = false; 
+                    count++;
+                }
+            }
+               }
+               in.close();
+        out.close();
                
-        m->mothurOutEndLine();
-               m->mothurOut("Total number of sequences: " + toString(total)); m->mothurOutEndLine();
-               m->mothurOutEndLine();
-               m->mothurOut("Output File Name: "); m->mothurOutEndLine();
-               m->mothurOut(outputFileName); m->mothurOutEndLine();    
-               m->mothurOutEndLine();
+        return indexToNames;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountSeqsCommand", "processNameFile");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+map<int, string> CountSeqsCommand::getGroupNames(string filename, set<string>& namesOfGroups) {
+       try {
+        map<int, string> indexToGroups;
+        map<string, int> groupIndex;
+        map<string, int>::iterator it;
+        
+        ofstream out;
+        m->openOutputFile(filename, out);
+        
+        //open input file
+               ifstream in;
+               m->openInputFile(groupfile, in);
+        
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
+        string firstCol, secondCol;
+        int count = 0;
+        
+               while (!in.eof()) {
+                       if (m->control_pressed) { break; }
+                       
+            in.read(buffer, 4096);
+            vector<string> pieces = m->splitWhiteSpace(rest, buffer, in.gcount());
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    it = groupIndex.find(secondCol);
+                    if (it == groupIndex.end()) { //add group, assigning the group and number so we can use vectors above
+                        groupIndex[secondCol] = count;
+                        count++;
+                    }
+                    out << firstCol << '\t' << groupIndex[secondCol] << endl; 
+                    namesOfGroups.insert(secondCol);
+                    pairDone = false; 
+                }
+            }
+               }
+               in.close();
+        out.close();
                
-               return 0;               
+        for (it = groupIndex.begin(); it != groupIndex.end(); it++) {  indexToGroups[it->second] = it->first;  }
+        
+        return indexToGroups;
        }
-       
        catch(exception& e) {
-               m->errorOut(e, "CountSeqsCommand", "execute");
+               m->errorOut(e, "CountSeqsCommand", "getGroupNames");
                exit(1);
        }
 }
 //**********************************************************************************************************************
+
+
+
index 54982c11939e82fae8de341f74c1aa3a01e4f609..555d6a74fa36fa034b6d0aafeb5f07bd0c4e7bbc 100644 (file)
@@ -34,8 +34,14 @@ public:
        
 private:
        string namefile, groupfile, outputDir, groups;
-       bool abort;
-       vector<string> Groups;
+       bool abort, large;
+       vector<string> Groups, outputNames;
+    
+    int processSmall(string);
+    int processLarge(string);
+    map<int, string> processNameFile(string);
+    map<int, string> getGroupNames(string, set<string>&);
+    
 };
 
 #endif
diff --git a/counttable.cpp b/counttable.cpp
new file mode 100644 (file)
index 0000000..7f08509
--- /dev/null
@@ -0,0 +1,209 @@
+//
+//  counttable.cpp
+//  Mothur
+//
+//  Created by Sarah Westcott on 6/26/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "counttable.h"
+
+
+/************************************************************/
+int CountTable::readTable(string file) {
+    try {
+        filename = file;
+        ifstream in;
+        m->openInputFile(filename, in);
+        
+        string headers = m->getline(in); m->gobble(in);
+        vector<string> columnHeaders = m->splitWhiteSpace(headers);
+        
+        int numGroups = 0;
+        groups.clear();
+        totalGroups.clear();
+        indexGroupMap.clear();
+        indexNameMap.clear();
+        counts.clear();
+        map<int, string> originalGroupIndexes;
+        if (columnHeaders.size() > 2) { hasGroups = true; numGroups = columnHeaders.size() - 2;  }
+        for (int i = 2; i < columnHeaders.size(); i++) {  groups.push_back(columnHeaders[i]);  originalGroupIndexes[i-2] = columnHeaders[i]; totalGroups.push_back(0); }
+        //sort groups to keep consistent with how we store the groups in groupmap
+        sort(groups.begin(), groups.end());
+        for (int i = 0; i < groups.size(); i++) {  indexGroupMap[groups[i]] = i; }
+        m->setAllGroups(groups);
+        
+        bool error = false;
+        string name;
+        int thisTotal;
+        uniques = 0;
+        total = 0;
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { break; }
+            
+            in >> name >> thisTotal; m->gobble(in);
+            
+            //if group info, then read it
+            vector<int> groupCounts; groupCounts.resize(numGroups, 0);
+            for (int i = 0; i < numGroups; i++) {  int thisIndex = indexGroupMap[originalGroupIndexes[i]]; in >> groupCounts[thisIndex]; m->gobble(in); totalGroups[thisIndex] += groupCounts[thisIndex];  }
+            
+            map<string, int>::iterator it = indexNameMap.find(name);
+            if (it == indexNameMap.end()) {
+                if (hasGroups) {  counts.push_back(groupCounts);  }
+                indexNameMap[name] = uniques;
+                totals.push_back(thisTotal);
+                total += thisTotal;
+                uniques++;
+            }else {
+                error = true;
+                m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + name + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); 
+            }
+        }
+        in.close();
+        
+        if (error) { m->control_pressed = true; }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "readTable");
+               exit(1);
+       }
+}
+/************************************************************/
+//group counts for a seq
+vector<int> CountTable::getGroupCounts(string seqName) {
+    try {
+        vector<int> temp;
+        if (hasGroups) {
+            map<string, int>::iterator it = indexNameMap.find(seqName);
+            if (it == indexNameMap.end()) {
+                m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+            }else { 
+                temp = counts[it->second];
+            }
+        }else{  m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
+        
+        return temp;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "getGroupCounts");
+               exit(1);
+       }
+}
+/************************************************************/
+//total number of sequences for the group
+int CountTable::getGroupCount(string groupName) {
+    try {
+        if (hasGroups) {
+            map<string, int>::iterator it = indexGroupMap.find(groupName);
+            if (it == indexGroupMap.end()) {
+                m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+            }else { 
+                return totalGroups[it->second];
+            }
+        }else{  m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n");  m->control_pressed = true; }
+
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "getGroupCount");
+               exit(1);
+       }
+}
+/************************************************************/
+//total number of sequences for the seq for the group
+int CountTable::getGroupCount(string seqName, string groupName) {
+    try {
+        if (hasGroups) {
+            map<string, int>::iterator it = indexGroupMap.find(groupName);
+            if (it == indexGroupMap.end()) {
+                m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+            }else { 
+                map<string, int>::iterator it2 = indexNameMap.find(seqName);
+                if (it2 == indexNameMap.end()) {
+                    m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+                }else { 
+                    return counts[it2->second][it->second];
+                }
+            }
+        }else{  m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n");  m->control_pressed = true; }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "getGroupCount");
+               exit(1);
+       }
+}
+/************************************************************/
+//total number of seqs represented by seq
+int CountTable::getNumSeqs(string seqName) {
+    try {
+                
+        map<string, int>::iterator it = indexNameMap.find(seqName);
+        if (it == indexNameMap.end()) {
+            m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+        }else { 
+            return totals[it->second];
+        }
+
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "getNumSeqs");
+               exit(1);
+       }
+}
+/************************************************************/
+//returns names of seqs
+vector<string> CountTable::getNamesOfSeqs() {
+    try {
+        vector<string> names;
+        for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
+            names.push_back(it->first);
+        }
+                
+        return names;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "getNamesOfSeqs");
+               exit(1);
+       }
+}
+/************************************************************/
+//returns names of seqs
+int CountTable::mergeCounts(string seq1, string seq2) {
+    try {
+        map<string, int>::iterator it = indexNameMap.find(seq1);
+        if (it == indexNameMap.end()) {
+            m->mothurOut("[ERROR]: " + seq1 + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+        }else { 
+            map<string, int>::iterator it2 = indexNameMap.find(seq2);
+            if (it2 == indexNameMap.end()) {
+                m->mothurOut("[ERROR]: " + seq2 + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+            }else { 
+                //merge data
+                for (int i = 0; i < groups.size(); i++) {
+                    counts[it->second][i] += counts[it2->second][i];
+                    counts[it2->second][i] = 0;
+                }
+                totals[it->second] += totals[it2->second];
+                totals[it2->second] = 0;
+                uniques--;
+                indexNameMap.erase(it2); 
+            }
+        }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "getNamesOfSeqs");
+               exit(1);
+       }
+}
+
+/************************************************************/
+
+
diff --git a/counttable.h b/counttable.h
new file mode 100644 (file)
index 0000000..ffc0816
--- /dev/null
@@ -0,0 +1,78 @@
+#ifndef Mothur_counttable_h
+#define Mothur_counttable_h
+
+
+//
+//  counttable.h
+//  Mothur
+//
+//  Created by Sarah Westcott on 6/26/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+//This class is designed to read a count table file and store its data.
+//count table files look like:
+
+/*
+ Representative_Sequence       total   F003D000        F003D002        F003D004        F003D006        F003D008        F003D142        F003D144        F003D146        F003D148        F003D150        MOCK.GQY1XT001  
+ GQY1XT001C296C        6051    409     985     923     937     342     707     458     439     387     464     0       
+ GQY1XT001A3TJI        4801    396     170     413     442     306     769     581     576     497     651     0       
+ GQY1XT001CS2B8        3018    263     226     328     460     361     336     248     290     187     319     0       
+ GQY1XT001CD9IB        2736    239     177     256     405     306     286     263     248     164     392     0       
+ or if no group info was used to create it
+ Representative_Sequence       total   
+ GQY1XT001C296C        6051
+ GQY1XT001A3TJI        4801
+ GQY1XT001CS2B8        3018
+ GQY1XT001CD9IB        2736
+ GQY1XT001ARCB1        2183
+ GQY1XT001CNF2P        2796
+ GQY1XT001CJMDA        1667
+ GQY1XT001CBVJB        3758
+ */
+
+
+#include "mothurout.h"
+
+class CountTable {
+    
+    public:
+    
+        CountTable() { m = MothurOut::getInstance(); hasGroups = false; total = 0; }
+        ~CountTable() {}
+    
+        int readTable(string);
+    
+        bool hasGroupInfo() { return hasGroups; }
+        int getNumGroups() { return groups.size(); }
+        vector<string> getNamesOfGroups() {  return groups;   }  //returns group names, if no group info vector is blank.
+    
+        vector<int> getGroupCounts(string);  //returns group counts for a seq passed in, if no group info is in file vector is blank. Order is the same as the groups returned by getGroups function.
+        int getGroupCount(string, string); //returns number of seqs for that group for that seq
+        int getGroupCount(string); // returns total seqs for that group
+        int getNumSeqs(string); //returns total seqs for that seq
+        int getNumSeqs() { return total; } //return total number of seqs
+        int getNumUniqueSeqs() { return uniques; } //return number of unique/representative seqs
+        int getGroupIndex(string); //returns index in getGroupCounts vector of specific group
+        vector<string> getNamesOfSeqs();
+        int mergeCounts(string, string); //combines counts for 2 seqs, saving under the first name passed in.
+    
+    private:
+        string filename;
+        MothurOut* m;
+        bool hasGroups;
+        int total, uniques;
+        vector<string> groups;
+        vector< vector<int> > counts;
+        vector<int> totals;
+        vector<int> totalGroups;
+        map<string, int> indexNameMap;
+        map<string, int> indexGroupMap;
+    
+};
+
+#endif
index febd762592acdca2df39b7d8d076f0020dfa444d..5919f0c005888fba82da66d10c8dd657ab5c366b 100644 (file)
@@ -15,7 +15,8 @@ vector<string> CreateDatabaseCommand::setParameters(){
                CommandParameter pfasta("repfasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
                CommandParameter pname("repname", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pname);
                CommandParameter pcontaxonomy("contaxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pcontaxonomy);
-               CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
+               CommandParameter plist("list", "InputTypes", "", "", "ListShared", "ListShared", "none",false,false); parameters.push_back(plist);
+        CommandParameter pshared("shared", "InputTypes", "", "", "ListShared", "ListShared", "none",false,false); parameters.push_back(pshared);
                CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
                CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
@@ -34,8 +35,8 @@ vector<string> CreateDatabaseCommand::setParameters(){
 string CreateDatabaseCommand::getHelpString(){ 
        try {
                string helpString = "";
-               helpString += "The create.database command reads a listfile, *.cons.taxonomy, *.rep.fasta, *.rep.names and optional groupfile, and creates a database file.\n";
-               helpString += "The create.database command parameters are repfasta, list, repname, contaxonomy, group and label. List, repfasta, repnames, and contaxonomy are required.\n";
+               helpString += "The create.database command reads a list file or a shared file, *.cons.taxonomy, *.rep.fasta, *.rep.names and optional groupfile, and creates a database file.\n";
+               helpString += "The create.database command parameters are repfasta, list, shared, repname, contaxonomy, group and label. List, repfasta, repnames, and contaxonomy are required.\n";
         helpString += "The repfasta file is fasta file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
         helpString += "The repname file is the name file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
         helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile).\n";
@@ -159,6 +160,14 @@ CreateDatabaseCommand::CreateDatabaseCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["group"] = inputDir + it->second;            }
                                }
+                
+                it = parameters.find("shared");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["shared"] = inputDir + it->second;           }
+                               }
                        }
             
                        
@@ -167,14 +176,33 @@ CreateDatabaseCommand::CreateDatabaseCommand(string option)  {
                        
                        //check for required parameters
                        listfile = validParameter.validFile(parameters, "list", true);
-                       if (listfile == "not found") {                          
-                               //if there is a current list file, use it
+                       if (listfile == "not found") {  listfile = "";                  }
+                       else if (listfile == "not open") { listfile = ""; abort = true; }       
+                       else { m->setListFile(listfile); }
+            
+            sharedfile = validParameter.validFile(parameters, "shared", true);
+                       if (sharedfile == "not found") {        sharedfile = "";                        }
+                       else if (sharedfile == "not open") { sharedfile = ""; abort = true; }   
+                       else { m->setSharedFile(sharedfile); }
+            
+            if ((sharedfile == "") && (listfile == "")) { 
+                               //is there are current file available for either of these?
+                               //give priority to list, then shared
                                listfile = m->getListFile(); 
                                if (listfile != "") {  m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
-                               else {  m->mothurOut("You have no current listfile and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
+                               else { 
+                                       sharedfile = m->getSharedFile(); 
+                                       if (sharedfile != "") {  m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+                                       else { 
+                                               m->mothurOut("No valid current files. You must provide a shared or list file before you can use the create.database command."); m->mothurOutEndLine(); 
+                                               abort = true;
+                                       }
+                               }
                        }
-                       else if (listfile == "not open") { abort = true; }      
-                       else { m->setListFile(listfile); }
+                       else if ((sharedfile != "") && (listfile != "")) { m->mothurOut("When executing a create.database command you must enter ONLY ONE of the following: shared or list."); m->mothurOutEndLine(); abort = true; }
+            
+            if (sharedfile != "") { if (outputDir == "") { outputDir = m->hasPath(sharedfile); } }
+            else { if (outputDir == "") { outputDir = m->hasPath(listfile); } }
                        
                        contaxonomyfile = validParameter.validFile(parameters, "contaxonomy", true);
                        if (contaxonomyfile == "not found") {  //if there is a current list file, use it
@@ -219,7 +247,8 @@ int CreateDatabaseCommand::execute(){
         //taxonomies holds the taxonomy info for each Otu
         //classifyOtuSizes holds the size info of each Otu to help with error checking
         vector<string> taxonomies;
-        vector<int> classifyOtuSizes = readTax(taxonomies);
+        vector<string> otuLabels;
+        vector<int> classifyOtuSizes = readTax(taxonomies, otuLabels);
         
         if (m->control_pressed) { return 0; }
         
@@ -230,7 +259,7 @@ int CreateDatabaseCommand::execute(){
         
         //names redundants to uniques. backwards to how we normally do it, but each bin is the list file will be a key entry in the map.
         map<string, string> repNames;
-        int numUniqueNamesFile = m->readNames(repnamesfile, repNames);
+        int numUniqueNamesFile = m->readNames(repnamesfile, repNames, 1);
         
         //are there the same number of otus in the fasta and name files
         if (repOtusSizes.size() != numUniqueNamesFile) { m->mothurOut("[ERROR]: you have " + toString(numUniqueNamesFile) + " unique seqs in your repname file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file.  These should match.\n"); m->control_pressed = true; }
@@ -251,86 +280,130 @@ int CreateDatabaseCommand::execute(){
         
         if (m->control_pressed) { return 0; }
         
-        //at this point we are fairly sure the repfasta, repnames and contaxonomy files match so lets proceed with the listfile
-        ListVector* list = getList();
-        
-        if (m->control_pressed) { delete list; return 0; }
         
-        GroupMap* groupmap = NULL;
-        if (groupfile != "") {
-                       groupmap = new GroupMap(groupfile);
-                       groupmap->readMap();
-               }
-        
-        if (m->control_pressed) { delete list; if (groupfile != "") { delete groupmap; } return 0; }
-        
-        if (outputDir == "") { outputDir += m->hasPath(listfile); }
-        string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("database");
+        string outputFileName = "";
+        if (listfile != "") { outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("database"); }
+        else { outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + getOutputFileNameTag("database"); }
         outputNames.push_back(outputFileName); outputTypes["database"].push_back(outputFileName);
         
         ofstream out;
         m->openOutputFile(outputFileName, out);
         
         string header = "OTUNumber\tAbundance\t";
-        if (groupfile != "") { 
-            header = "OTUNumber\t";
-            for (int i = 0; i < groupmap->getNamesOfGroups().size(); i++) { header += (groupmap->getNamesOfGroups())[i] + '\t'; }
-        }
-        header += "repSeqName\trepSeq\tOTUConTaxonomy";
-        out << header << endl;
+
         
-        for (int i = 0; i < list->getNumBins(); i++) {
+        if (listfile != "") {
+            //at this point we are fairly sure the repfasta, repnames and contaxonomy files match so lets proceed with the listfile
+            ListVector* list = getList();
             
-            if (m->control_pressed) { break; }
+            if (otuLabels.size() != list->getNumBins()) { 
+                m->mothurOut("[ERROR]: you have " + toString(otuLabels.size()) + " otus in your contaxonomy file, but your list file has " + toString(list->getNumBins()) + " otus. These should match. Make sure you are using files for the same distance.\n"); m->control_pressed = true;  }
             
-            out << (i+1) << '\t';
+            if (m->control_pressed) { delete list; return 0; }
             
-            vector<string> binNames;
-            string bin = list->get(i);
-            
-            map<string, string>::iterator it = repNames.find(bin);
-            if (it == repNames.end()) {
-                m->mothurOut("[ERROR: OTU " + toString(i+1) + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
+            GroupMap* groupmap = NULL;
+            if (groupfile != "") {
+                groupmap = new GroupMap(groupfile);
+                groupmap->readMap();
             }
             
-            m->splitAtComma(bin, binNames);
+            if (m->control_pressed) { delete list; if (groupfile != "") { delete groupmap; } return 0; }
             
-            //sanity check
-            if (binNames.size() != classifyOtuSizes[i]) {
-                 m->mothurOut("[ERROR: OTU " + toString(i+1) + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
+            if (groupfile != "") { 
+                header = "OTUNumber\t";
+                for (int i = 0; i < groupmap->getNamesOfGroups().size(); i++) { header += (groupmap->getNamesOfGroups())[i] + '\t'; }
             }
+            header += "repSeqName\trepSeq\tOTUConTaxonomy";
+            out << header << endl;
             
-            //output abundances
-            if (groupfile != "") {
-                string groupAbunds = "";
-                map<string, int> counts;
-                //initialize counts to 0
-                for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { counts[(groupmap->getNamesOfGroups())[j]] = 0; }
+            for (int i = 0; i < list->getNumBins(); i++) {
+                
+                if (m->control_pressed) { break; }
+                
+                out << otuLabels[i] << '\t';
+                
+                vector<string> binNames;
+                string bin = list->get(i);
                 
-                //find abundances by group
-                bool error = false;
-                for (int j = 0; j < binNames.size(); j++) {
-                    string group = groupmap->getGroup(binNames[j]);
-                    if (group == "not found") {
-                        m->mothurOut("[ERROR]: " + binNames[j] + " is not in your groupfile, please correct.\n");
-                        error = true;
-                    }else { counts[group]++; }
+                map<string, string>::iterator it = repNames.find(bin);
+                if (it == repNames.end()) {
+                    m->mothurOut("[ERROR: OTU " + otuLabels[i] + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
                 }
                 
-                //output counts
-                for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { out << counts[(groupmap->getNamesOfGroups())[j]] << '\t';  }
+                m->splitAtComma(bin, binNames);
                 
-                if (error) { m->control_pressed = true; }
-            }else { out << binNames.size() << '\t'; }
+                //sanity check
+                if (binNames.size() != classifyOtuSizes[i]) {
+                    m->mothurOut("[ERROR: OTU " + otuLabels[i] + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
+                }
+                
+                //output abundances
+                if (groupfile != "") {
+                    string groupAbunds = "";
+                    map<string, int> counts;
+                    //initialize counts to 0
+                    for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { counts[(groupmap->getNamesOfGroups())[j]] = 0; }
+                    
+                    //find abundances by group
+                    bool error = false;
+                    for (int j = 0; j < binNames.size(); j++) {
+                        string group = groupmap->getGroup(binNames[j]);
+                        if (group == "not found") {
+                            m->mothurOut("[ERROR]: " + binNames[j] + " is not in your groupfile, please correct.\n");
+                            error = true;
+                        }else { counts[group]++; }
+                    }
+                    
+                    //output counts
+                    for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { out << counts[(groupmap->getNamesOfGroups())[j]] << '\t';  }
+                    
+                    if (error) { m->control_pressed = true; }
+                }else { out << binNames.size() << '\t'; }
+                
+                //output repSeq
+                out << it->second << '\t' << seqs[i].getAligned() << '\t' << taxonomies[i] << endl;
+            }
+            
+            
+            delete list;
+            if (groupfile != "") { delete groupmap; }
+           
+        }else {
+            vector<SharedRAbundVector*> lookup = getShared();
+            
+            header = "OTUNumber\t";
+            for (int i = 0; i < lookup.size(); i++) { header += lookup[i]->getGroup() + '\t'; }
+            header += "repSeqName\trepSeq\tOTUConTaxonomy";
+            out << header << endl;
             
-            //output repSeq
-            out << it->second << '\t' << seqs[i].getAligned() << '\t' << taxonomies[i] << endl;
+            for (int h = 0; h < lookup[0]->getNumBins(); h++) {
+                
+                if (m->control_pressed) { break; }
+                
+                int index = findIndex(otuLabels, m->currentBinLabels[h]);
+                if (index == -1) {  m->mothurOut("[ERROR]: " + m->currentBinLabels[h] + " is not in your constaxonomy file, aborting.\n"); m->control_pressed = true; }
+                
+                if (m->control_pressed) { break; }
+                
+                out << otuLabels[index] << '\t';
+                
+                int totalAbund = 0;
+                for (int i = 0; i < lookup.size(); i++) { 
+                    int abund = lookup[i]->getAbundance(h);
+                    totalAbund += abund; 
+                    out << abund << '\t';
+                }
+                
+                //sanity check
+                if (totalAbund != classifyOtuSizes[index]) {
+                    m->mothurOut("[ERROR: OTU " + m->currentBinLabels[h] + " contains " + toString(totalAbund) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
+                }
+                
+                //output repSeq
+                out << seqs[index].getName() << '\t' << seqs[index].getAligned() << '\t' << taxonomies[index] << endl;
+            }
         }
         out.close();
-        
-        delete list;
-        if (groupfile != "") { delete groupmap; }
-        
         if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; }
         
         m->mothurOutEndLine();
@@ -347,7 +420,21 @@ int CreateDatabaseCommand::execute(){
        }
 }
 //**********************************************************************************************************************
-vector<int> CreateDatabaseCommand::readTax(vector<string>& taxonomies){
+int CreateDatabaseCommand::findIndex(vector<string>& otuLabels, string label){
+       try {
+        int index = -1;
+        for (int i = 0; i < otuLabels.size(); i++) {
+            if (otuLabels[i] == label) { index = i; break; }
+        }
+               return index;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CreateDatabaseCommand", "findIndex");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<int> CreateDatabaseCommand::readTax(vector<string>& taxonomies, vector<string>& otuLabels){
        try {
                
         vector<int> sizes; 
@@ -369,6 +456,7 @@ vector<int> CreateDatabaseCommand::readTax(vector<string>& taxonomies){
             
             sizes.push_back(size);
             taxonomies.push_back(tax);
+            otuLabels.push_back(otu);
         }
         in.close();
         
@@ -493,5 +581,81 @@ ListVector* CreateDatabaseCommand::getList(){
        }
 }
 //**********************************************************************************************************************
+vector<SharedRAbundVector*> CreateDatabaseCommand::getShared(){
+       try {
+               InputData input(sharedfile, "sharedfile");
+               vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
+               string lastLabel = lookup[0]->getLabel();
+               
+               if (label == "") { label = lastLabel; return lookup; }
+               
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> labels; labels.insert(label);
+               set<string> processedLabels;
+               set<string> userLabels = labels;
+               
+               //as long as you are not at the end of the file or done wih the lines you want
+               while((lookup[0] != NULL) && (userLabels.size() != 0)) {
+                       if (m->control_pressed) {  return lookup;  }
+                       
+                       if(labels.count(lookup[0]->getLabel()) == 1){
+                               processedLabels.insert(lookup[0]->getLabel());
+                               userLabels.erase(lookup[0]->getLabel());
+                               break;
+                       }
+                       
+                       if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = lookup[0]->getLabel();
+                               
+                               for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+                               lookup = input.getSharedRAbundVectors(lastLabel);
+                               
+                               processedLabels.insert(lookup[0]->getLabel());
+                               userLabels.erase(lookup[0]->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               lookup[0]->setLabel(saveLabel);
+                               break;
+                       }
+                       
+                       lastLabel = lookup[0]->getLabel();                      
+                       
+                       //get next line to process
+                       //prevent memory leak
+                       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+                       lookup = input.getSharedRAbundVectors();
+               }
+               
+               
+               if (m->control_pressed) { return lookup;  }
+               
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                       m->mothurOut("Your file does not include the label " + *it); 
+                       if (processedLabels.count(lastLabel) != 1) {
+                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                               needToRun = true;
+                       }else {
+                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                       }
+               }
+               
+               //run last label if you need to
+               if (needToRun == true)  {
+                       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+                       lookup = input.getSharedRAbundVectors(lastLabel);
+               }       
+        
+        return lookup;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CreateDatabaseCommand", "getList");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
 
 
index 85e23675b2b770d476dfff45b901da03f6581546..741feba082958ae8e4f2ec73f7a188255208b069 100644 (file)
@@ -34,13 +34,15 @@ public:
 private:
        
        bool abort;
-       string listfile, groupfile, repfastafile, repnamesfile, contaxonomyfile, label, outputDir;
+       string sharedfile, listfile, groupfile, repfastafile, repnamesfile, contaxonomyfile, label, outputDir;
        
        vector<string> outputNames;
                
        vector<int> readFasta(vector<Sequence>&);
-    vector<int> readTax(vector<string>&);
+    vector<int> readTax(vector<string>&, vector<string>&);
        ListVector* getList();
+    vector<SharedRAbundVector*> getShared();
+    int findIndex(vector<string>&, string);
        
 };
 
index 99e89bafecdf8c2d5740aeb12446d04b8d9e4fdd..16407d59b9819183edb70748d0b3a0ca1ff9acda 100644 (file)
@@ -15,7 +15,7 @@ vector<string> DistanceCommand::setParameters(){
                CommandParameter pcolumn("column", "InputTypes", "", "", "none", "none", "OldFastaColumn",false,false); parameters.push_back(pcolumn);
                CommandParameter poldfasta("oldfasta", "InputTypes", "", "", "none", "none", "OldFastaColumn",false,false); parameters.push_back(poldfasta);
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
-               CommandParameter poutput("output", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(poutput);
+               CommandParameter poutput("output", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(poutput);
                CommandParameter pcalc("calc", "Multiple", "nogaps-eachgap-onegap", "onegap", "", "", "",false,false); parameters.push_back(pcalc);
                CommandParameter pcountends("countends", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pcountends);
                CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcompress);
@@ -210,6 +210,7 @@ DistanceCommand::DistanceCommand(string option) {
                        convert(temp, compress);
 
                        output = validParameter.validFile(parameters, "output", false);         if(output == "not found"){      output = "column"; }
+            if (output == "phylip") { output = "lt";  }
                        
                        if (((column != "") && (oldfastafile == "")) || ((column == "") && (oldfastafile != ""))) { m->mothurOut("If you provide column or oldfasta, you must provide both."); m->mothurOutEndLine(); abort=true; }
                        
index 63af3abb3a595cff133a2caac91dea1aa1e420c1..3cc77be4ab73ad4f8d17635aa5b2ace242b22ec8 100644 (file)
@@ -308,7 +308,7 @@ int GetCoreMicroBiomeCommand::execute(){
 int GetCoreMicroBiomeCommand::createTable(vector<SharedRAbundFloatVector*>& lookup){
        try {
         
-        string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + getOutputFileNameTag("coremicrobiome");
+        string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + "." + getOutputFileNameTag("coremicrobiome");
         outputNames.push_back(outputFileName);  outputTypes["coremicrobiome"].push_back(outputFileName);
                ofstream out;
                m->openOutputFile(outputFileName, out);
index ca832314d66dbe371e314db292ab6f441d2ee351..c3b5f0af58ab6ef0627a6a42784c76c2813184bf 100644 (file)
@@ -140,6 +140,8 @@ int GetCurrentCommand::execute(){
                                        m->setFlowFile("");
                 }else if (types[i] == "biom") {
                                        m->setBiomFile("");
+                }else if (types[i] == "counttable") {
+                                       m->setCountTableFile("");
                                }else if (types[i] == "processors") {
                                        m->setProcessors("1");
                                }else if (types[i] == "all") {
index a281f9f8a23f98d36f7a4b303bdf7804d945c4c7..7dc0b414f40e3db4b3daa0469589ba30dc5fe781 100644 (file)
@@ -33,9 +33,9 @@ string GetOtuLabelsCommand::getHelpString(){
                string helpString = "";
                helpString += "The get.otulabels command can be used to select specific otus with the output from classify.otu, otu.association, or corr.axes.\n";
                helpString += "The get.otulabels parameters are: constaxonomy, otucorr, corraxes, and accnos.\n";
-               helpString += "The constaxonomy parameter is input the results of the classify.otu command.\n";
-        helpString += "The otucorr parameter is input the results of the otu.association command.\n";
-        helpString += "The corraxes parameter is input the results of the corr.axes command.\n";
+               helpString += "The constaxonomy parameter is used to input the results of the classify.otu command.\n";
+        helpString += "The otucorr parameter is used to input the results of the otu.association command.\n";
+        helpString += "The corraxes parameter is used to input the results of the corr.axes command.\n";
                helpString += "The get.otulabels commmand should be in the following format: \n";
                helpString += "get.otulabels(accnos=yourListOfOTULabels, corraxes=yourCorrAxesFile)\n";
                return helpString;
index ed2f05d5559584101fc77c55c09b2af3d078729d..691d706ab00efa8823412753b72f08e5198873b1 100644 (file)
@@ -38,8 +38,8 @@ string MakeContigsCommand::getHelpString(){
                string helpString = "";
                helpString += "The make.contigs command reads a forward fastq file and a reverse fastq file and outputs new fasta and quality files.\n";
                helpString += "The make.contigs command parameters are ffastq, rfastq, align, match, mismatch, gapopen, gapextend and processors.\n";
-               helpString += "The ffastq and rfastq parameter is required.\n";
-               helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n";
+               helpString += "The ffastq and rfastq parameters are required.\n";
+               helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh and needleman. The default is needleman.\n";
                helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
                helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
                helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
@@ -212,7 +212,7 @@ int MakeContigsCommand::execute(){
         
         string outFastaFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + getOutputFileNameTag("fasta");
         string outQualFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + getOutputFileNameTag("qfile");
-        string outMisMatchFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + getOutputFileNameTag("mismatches");
+        string outMisMatchFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + getOutputFileNameTag("mismatch");
         outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
         outputNames.push_back(outQualFile); outputTypes["qfile"].push_back(outQualFile);
         outputNames.push_back(outMisMatchFile); outputTypes["mismatch"].push_back(outMisMatchFile);
index 6f8a42343be48654dc77ef2e021caf601a02ff2e..32ede6e5fa70a2ab42f4aa6ebcc0382b32dc8d76 100644 (file)
--- a/makefile
+++ b/makefile
@@ -15,8 +15,8 @@ USEREADLINE ?= yes
 CYGWIN_BUILD ?= no
 USECOMPRESSION ?= no
 MOTHUR_FILES="\"Enter_your_default_path_here\""
-RELEASE_DATE = "\"5/14/2012\""
-VERSION = "\"1.25.1\""
+RELEASE_DATE = "\"7/9/2012\""
+VERSION = "\"1.26.0\""
 FORTAN_COMPILER = gfortran
 FORTRAN_FLAGS = 
 
index eacad376e1e91a519feff3c8e57727662042599b..14840c16e162bc599c242e289e3c5899fb396de4 100644 (file)
@@ -43,6 +43,7 @@ set<string> MothurOut::getCurrentTypes()  {
         types.insert("tree");
         types.insert("flow");
         types.insert("biom");
+        types.insert("counttable");
         types.insert("processors");
 
                return types;
@@ -78,6 +79,7 @@ void MothurOut::printCurrentFiles()  {
                if (treefile != "")                     {  mothurOut("tree=" + treefile); mothurOutEndLine();                           }
                if (flowfile != "")                     {  mothurOut("flow=" + flowfile); mothurOutEndLine();                           }
         if (biomfile != "")                    {  mothurOut("biom=" + biomfile); mothurOutEndLine();                           }
+        if (counttablefile != "")      {  mothurOut("counttable=" + counttablefile); mothurOutEndLine();       }
                if (processors != "1")          {  mothurOut("processors=" + processors); mothurOutEndLine();           }
                
        }
@@ -112,6 +114,7 @@ bool MothurOut::hasCurrentFiles()  {
                if (treefile != "")                     {  return true;                 }
                if (flowfile != "")                     {  return true;                 }
         if (biomfile != "")                    {  return true;                 }
+        if (counttablefile != "")      {  return true;                 }
                if (processors != "1")          {  return true;                 }
                
                return hasCurrent;
@@ -147,6 +150,7 @@ void MothurOut::clearCurrentFiles()  {
                taxonomyfile = "";      
                flowfile = "";
         biomfile = "";
+        counttablefile = "";
                processors = "1";
        }
        catch(exception& e) {
@@ -1291,16 +1295,6 @@ vector<unsigned long long> MothurOut::setFilePosEachLine(string filename, int& n
                        positions.push_back(0);
                
                        while(!in.eof()){
-                               //unsigned long long lastpos = in.tellg();
-                               //input = getline(in); 
-                               //if (input.length() != 0) {
-                                       //unsigned long long pos = in.tellg(); 
-                                       //if (pos != -1) { positions.push_back(pos - input.length() - 1);       }
-                                       //else {  positions.push_back(lastpos);  }
-                               //}
-                               //gobble(in); //has to be here since windows line endings are 2 characters and mess up the positions
-                               
-                               
                                //getline counting reads
                                char d = in.get(); count++;
                                while ((d != '\n') && (d != '\r') && (d != '\f') && (d != in.eof()))    {
@@ -1503,7 +1497,7 @@ vector<string> MothurOut::splitWhiteSpace(string& rest, char buffer[], int size)
         for (int i = 0; i < size; i++) {
             if (!isspace(buffer[i]))  { rest += buffer[i];  }
             else {
-                pieces.push_back(rest);  rest = "";
+                if (rest != "") { pieces.push_back(rest);  rest = ""; }
                 while (i < size) {  //gobble white space
                     if (isspace(buffer[i])) { i++; }
                     else { rest = buffer[i];  break; } //cout << "next piece buffer = " << nextPiece << endl;
@@ -1527,7 +1521,7 @@ vector<string> MothurOut::splitWhiteSpace(string input){
         for (int i = 0; i < input.length(); i++) {
             if (!isspace(input[i]))  { rest += input[i];  }
             else {
-                pieces.push_back(rest);  rest = "";
+                if (rest != "") { pieces.push_back(rest);  rest = ""; }
                 while (i < input.length()) {  //gobble white space
                     if (isspace(input[i])) { i++; }
                     else { rest = input[i];  break; } //cout << "next piece buffer = " << nextPiece << endl;
@@ -1631,6 +1625,46 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, bool red
        }
 }
 /**********************************************************************************************************************/
+int MothurOut::readNames(string namefile, map<string, string>& nameMap, int flip) { 
+       try {
+               
+               //open input file
+               ifstream in;
+               openInputFile(namefile, in);
+        
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
+        string firstCol, secondCol;
+        
+               while (!in.eof()) {
+                       if (control_pressed) { break; }
+                       
+            in.read(buffer, 4096);
+            vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    nameMap[secondCol] = firstCol;
+                    pairDone = false; 
+                }
+            }
+               }
+               in.close();
+               
+               return nameMap.size();
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "readNames");
+               exit(1);
+       }
+}
+/**********************************************************************************************************************/
 int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<string, int>& nameCount) { 
        try {
                nameMap.clear(); nameCount.clear();
index b2c638a167d1ebf507106cc300231f0f693471b0..ac47d79f7d92b113f58dbe421fdbaad892b2b175 100644 (file)
@@ -68,7 +68,7 @@ class MothurOut {
                //map<string, string> names;
                vector<string> binLabelsInFile;
                vector<string> currentBinLabels;
-               string saveNextLabel, argv, sharedHeaderMode;
+               string saveNextLabel, argv, sharedHeaderMode, groupMode;
                bool printedHeaders, commandInputsConvertError;
                
                //functions from mothur.h
@@ -106,6 +106,7 @@ class MothurOut {
         int readNames(string, map<string, string>&, map<string, int>&);
                int readNames(string, map<string, string>&);
         int readNames(string, map<string, string>&, bool);
+        int readNames(string, map<string, string>&, int);
                int readNames(string, map<string, vector<string> >&);
                int readNames(string, vector<seqPriorityNode>&, map<string, string>&);
                int mothurRemove(string);
@@ -173,11 +174,12 @@ class MothurOut {
                string getTaxonomyFile()        { return taxonomyfile;          }
                string getFlowFile()            { return flowfile;                      }
         string getBiomFile()           { return biomfile;                      }
+        string getCountTableFile()     { return counttablefile;        }
                string getProcessors()          { return processors;            }
                
                void setListFile(string f)                      { listfile = getFullPathName(f);                        }
                void setTreeFile(string f)                      { treefile = getFullPathName(f);                        }
-               void setGroupFile(string f)                     { groupfile = getFullPathName(f);                       }               
+               void setGroupFile(string f)                     { groupfile = getFullPathName(f);       groupMode = "group";            }               
                void setPhylipFile(string f)            { phylipfile = getFullPathName(f);                      }
                void setColumnFile(string f)            { columnfile = getFullPathName(f);                      }
                void setNameFile(string f)                      { namefile = getFullPathName(f);                        }       
@@ -196,6 +198,7 @@ class MothurOut {
                void setTaxonomyFile(string f)          { taxonomyfile = getFullPathName(f);            }
                void setFlowFile(string f)                      { flowfile = getFullPathName(f);                        }
         void setBiomFile(string f)                     { biomfile = getFullPathName(f);                        }
+        void setCountTableFile(string f)       { counttablefile = getFullPathName(f);  groupMode = "count";    }
         void setProcessors(string p)           { processors = p; mothurOut("\nUsing " + toString(p) + " processors.\n");       }
                
                void printCurrentFiles();
@@ -231,12 +234,14 @@ class MothurOut {
                        processors = "1";
                        flowfile = "";
             biomfile = "";
+            counttablefile = "";
                        gui = false;
                        printedHeaders = false;
                        commandInputsConvertError = false;
             mothurCalling = false;
             debug = false;
                        sharedHeaderMode = "";
+            groupMode = "group";
                }
                ~MothurOut();
 
@@ -245,7 +250,7 @@ class MothurOut {
                string releaseDate, version;
        
                string accnosfile, phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, designfile, taxonomyfile, biomfile;
-               string orderfile, treefile, sharedfile, ordergroupfile, relabundfile, fastafile, qualfile, sfffile, oligosfile, processors, flowfile;
+               string orderfile, treefile, sharedfile, ordergroupfile, relabundfile, fastafile, qualfile, sfffile, oligosfile, processors, flowfile, counttablefile;
 
                vector<string> Groups;
                vector<string> namesOfGroups;
index 3d99551cc6a2a5f01932329c34c339a6fa5d9450..126c4e0c594b6745ec6a7c4fcd5228c717d67677 100644 (file)
@@ -22,7 +22,7 @@ void NameAssignment::readMap(){
                int rowIndex = 0;
                
                while(fileHandle){
-                       fileHandle >> firstCol;                         //read from first column
+                       fileHandle >> firstCol; m->gobble(fileHandle);                  //read from first column
                        fileHandle >> secondCol;                        //read from second column
                                                
                        itData = (*this).find(firstCol);
index 2d13cd0cc5d2e1706d424c92440cd97c426b8fa8..e3850e52c56f629a6e5395203f31cc1299a6193b 100644 (file)
@@ -124,6 +124,8 @@ map<string, string> OptionParser::getParameters() {
                         it->second = m->getTaxonomyFile();
                     }else if (it->first == "biom") {
                         it->second = m->getBiomFile();
+                    }else if (it->first == "counttable") {
+                            it->second = m->getCountTableFile();
                     }else {
                         m->mothurOut("[ERROR]: mothur does not save a current file for " + it->first); m->mothurOutEndLine();
                     }
index 1fa96e3e9d4c6cf8ce9a4e99af30d01c5562a009..f69faff477aee954801f0d04bb3bce67f785d1e5 100644 (file)
@@ -19,7 +19,7 @@ vector<string> PairwiseSeqsCommand::setParameters(){
                CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen);
                CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
-               CommandParameter poutput("output", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(poutput);
+               CommandParameter poutput("output", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(poutput);
                CommandParameter pcalc("calc", "Multiple", "nogaps-eachgap-onegap", "onegap", "", "", "",false,false); parameters.push_back(pcalc);
                CommandParameter pcountends("countends", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pcountends);
                CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcompress);
@@ -249,6 +249,7 @@ PairwiseSeqsCommand::PairwiseSeqsCommand(string option)  {
                        align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
                        
                        output = validParameter.validFile(parameters, "output", false);         if(output == "not found"){      output = "column"; }
+            if (output=="phylip") { output = "lt"; }
                        if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column."); m->mothurOutEndLine(); output = "column"; }
                        
                        calc = validParameter.validFile(parameters, "calc", false);                     
index a312df07ec1508d1d257bc443ac895467be61525..6451041461a21255ff929feba717a0fd02fd96fa 100644 (file)
@@ -11,6 +11,7 @@
 #include "sharedsobs.h"
 #include "sharednseqs.h"
 #include "sharedutilities.h"
+#include "subsample.h"
 
 //**********************************************************************************************************************
 vector<string> RareFactSharedCommand::setParameters(){ 
@@ -21,6 +22,8 @@ vector<string> RareFactSharedCommand::setParameters(){
                CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
                CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
                CommandParameter pcalc("calc", "Multiple", "sharednseqs-sharedobserved", "sharedobserved", "", "", "",true,false); parameters.push_back(pcalc);
+        CommandParameter psubsampleiters("subsampleiters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(psubsampleiters);
+        CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
                CommandParameter pjumble("jumble", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pjumble);
                CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
         CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
@@ -50,6 +53,8 @@ string RareFactSharedCommand::getHelpString(){
                helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
                helpString += "Example rarefaction.shared(label=unique-0.01-0.03,  iters=10000, groups=B-C, jumble=T, calc=sharedobserved).\n";
                helpString += "The default values for iters is 1000, freq is 100, and calc is sharedobserved which calculates the shared rarefaction curve for the observed richness.\n";
+        helpString += "The subsampleiters parameter allows you to choose the number of times you would like to run the subsample.\n";
+        helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
                helpString += "The default value for groups is all the groups in your groupfile, and jumble is true.\n";
                helpString += validCalculator.printCalc("sharedrarefaction");
                helpString += "The label parameter is used to analyze specific labels in your input.\n";
@@ -219,6 +224,18 @@ RareFactSharedCommand::RareFactSharedCommand(string option)  {
             
             temp = validParameter.validFile(parameters, "groupmode", false);           if (temp == "not found") { temp = "T"; }
                        groupMode = m->isTrue(temp);
+            
+            temp = validParameter.validFile(parameters, "subsampleiters", false);                      if (temp == "not found") { temp = "1000"; }
+                       m->mothurConvert(temp, iters); 
+            
+            temp = validParameter.validFile(parameters, "subsample", false);           if (temp == "not found") { temp = "F"; }
+                       if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
+            else {  
+                if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
+                else { subsample = false; }
+            }
+            
+            if (subsample == false) { iters = 1; }
                                
                }
 
@@ -301,7 +318,35 @@ int RareFactSharedCommand::process(GroupMap& designMap, string thisSet){
                 }
             }
         }
-               
+
+        /******************************************************/
+        if (subsample) { 
+            if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
+                subsampleSize = subset[0]->getNumSeqs();
+                for (int i = 1; i < subset.size(); i++) {
+                    int thisSize = subset[i]->getNumSeqs();
+                    
+                    if (thisSize < subsampleSize) {    subsampleSize = thisSize;       }
+                }
+            }else {
+                newGroups.clear();
+                vector<SharedRAbundVector*> temp;
+                for (int i = 0; i < subset.size(); i++) {
+                    if (subset[i]->getNumSeqs() < subsampleSize) { 
+                        m->mothurOut(subset[i]->getGroup() + " contains " + toString(subset[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
+                        delete subset[i];
+                    }else { 
+                        newGroups.push_back(subset[i]->getGroup()); 
+                        temp.push_back(subset[i]);
+                    }
+                } 
+                subset = temp;
+            }
+            
+            if (subset.size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
+        }
+        /******************************************************/
+                       
                ValidCalculators validCalculator;
                for (int i=0; i<Estimators.size(); i++) {
                        if (validCalculator.isValidCalculator("sharedrarefaction", Estimators[i]) == true) { 
@@ -347,6 +392,8 @@ int RareFactSharedCommand::process(GroupMap& designMap, string thisSet){
                                rCurve->getSharedCurve(freq, nIters);
                                delete rCurve;
                 
+                if (subsample) { subsampleLookup(subset, fileNameRoot);  }
+                    
                                processedLabels.insert(subset[0]->getLabel());
                                userLabels.erase(subset[0]->getLabel());
                        }
@@ -372,6 +419,8 @@ int RareFactSharedCommand::process(GroupMap& designMap, string thisSet){
                 rCurve->getSharedCurve(freq, nIters);
                 delete rCurve;
                 
+                if (subsample) { subsampleLookup(subset, fileNameRoot);  }
+                
                 processedLabels.insert(subset[0]->getLabel());
                 userLabels.erase(subset[0]->getLabel());
                 
@@ -444,6 +493,9 @@ int RareFactSharedCommand::process(GroupMap& designMap, string thisSet){
                        rCurve = new Rarefact(subset, rDisplays);
                        rCurve->getSharedCurve(freq, nIters);
                        delete rCurve;
+            
+            if (subsample) { subsampleLookup(subset, fileNameRoot);  }
+                
                        for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
                }
                
@@ -458,6 +510,158 @@ int RareFactSharedCommand::process(GroupMap& designMap, string thisSet){
        }
 }
 //**********************************************************************************************************************
+int RareFactSharedCommand::subsampleLookup(vector<SharedRAbundVector*>& thisLookup, string fileNameRoot) {
+       try {
+        
+        map<string, vector<string> > filenames;
+        for (int thisIter = 0; thisIter < iters; thisIter++) {
+            
+            vector<SharedRAbundVector*> thisItersLookup = thisLookup;
+            
+             //we want the summary results for the whole dataset, then the subsampling
+            SubSample sample;
+            vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
+                
+            //make copy of lookup so we don't get access violations
+            vector<SharedRAbundVector*> newLookup;
+            for (int k = 0; k < thisItersLookup.size(); k++) {
+                SharedRAbundVector* temp = new SharedRAbundVector();
+                temp->setLabel(thisItersLookup[k]->getLabel());
+                temp->setGroup(thisItersLookup[k]->getGroup());
+                newLookup.push_back(temp);
+            }
+                
+            //for each bin
+            for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
+                if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
+                for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
+                }
+                
+            tempLabels = sample.getSample(newLookup, subsampleSize);
+            thisItersLookup = newLookup;
+            
+            
+            Rarefact* rCurve;
+            vector<Display*> rDisplays;
+            
+            string thisfileNameRoot = fileNameRoot + toString(thisIter);
+            
+            ValidCalculators validCalculator;
+            for (int i=0; i<Estimators.size(); i++) {
+                if (validCalculator.isValidCalculator("sharedrarefaction", Estimators[i]) == true) { 
+                    if (Estimators[i] == "sharedobserved") { 
+                        rDisplays.push_back(new RareDisplay(new SharedSobs(), new SharedThreeColumnFile(thisfileNameRoot+getOutputFileNameTag("sharedrarefaction"), "")));
+                        filenames["sharedrarefaction"].push_back(thisfileNameRoot+getOutputFileNameTag("sharedrarefaction"));
+                    }else if (Estimators[i] == "sharednseqs") { 
+                        rDisplays.push_back(new RareDisplay(new SharedNSeqs(), new SharedThreeColumnFile(thisfileNameRoot+getOutputFileNameTag("sharedr_nseqs"), "")));
+                        filenames["sharedr_nseqs"].push_back(thisfileNameRoot+getOutputFileNameTag("sharedr_nseqs"));
+                    }
+                }
+            }
+            
+            rCurve = new Rarefact(thisItersLookup, rDisplays);
+                       rCurve->getSharedCurve(freq, nIters);
+                       delete rCurve;
+            
+            //clean up memory
+            for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
+            thisItersLookup.clear();
+            for(int i=0;i<rDisplays.size();i++){       delete rDisplays[i];    }
+        }
+        
+        //create std and ave outputs
+        vector< vector< vector< double > > > results; //iter -> numSampled -> data
+        for (map<string, vector<string> >::iterator it = filenames.begin(); it != filenames.end(); it++) {
+            vector<string> thisTypesFiles = it->second;
+            vector<string> columnHeaders;
+            for (int i = 0; i < thisTypesFiles.size(); i++) {
+                ifstream in;
+                m->openInputFile(thisTypesFiles[i], in);
+                
+                string headers = m->getline(in); m->gobble(in);
+                columnHeaders = m->splitWhiteSpace(headers);
+                int numCols = columnHeaders.size();
+                
+                vector<vector<double> > thisFilesLines;
+                while (!in.eof()) {
+                    if (m->control_pressed) { break; }
+                    vector<double> data; data.resize(numCols, 0);
+                    //read numSampled line
+                    for (int j = 0; j < numCols; j++) { in >> data[j]; m->gobble(in); }
+                    thisFilesLines.push_back(data);
+                }
+                in.close();
+                results.push_back(thisFilesLines);
+                m->mothurRemove(thisTypesFiles[i]);
+            }
+            
+            if (!m->control_pressed) {
+                //process results
+                string outputFile = fileNameRoot + "ave-std." + thisLookup[0]->getLabel() + "." + getOutputFileNameTag(it->first);
+                ofstream out;
+                m->openOutputFile(outputFile, out);
+                outputNames.push_back(outputFile); outputTypes[it->first].push_back(outputFile);
+                
+                out << columnHeaders[0] << '\t' << "method\t";
+                for (int i = 1; i < columnHeaders.size(); i++) { out << columnHeaders[i] << '\t'; }
+                out << endl;
+            
+                vector< vector<double> > aveResults; aveResults.resize(results[0].size());
+                for (int i = 0; i < aveResults.size(); i++) { aveResults[i].resize(results[0][i].size(), 0.0); }
+                
+                for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
+                    for (int i = 0; i < aveResults.size(); i++) {  //initialize sums to zero.
+                        aveResults[i][0] = results[thisIter][i][0];
+                        for (int j = 1; j < aveResults[i].size(); j++) {
+                            aveResults[i][j] += results[thisIter][i][j];
+                        }
+                    }
+                }
+                
+                for (int i = 0; i < aveResults.size(); i++) {  //finds average.
+                    for (int j = 1; j < aveResults[i].size(); j++) {
+                        aveResults[i][j] /= (float) iters;
+                    }
+                }
+                
+                //standard deviation
+                vector< vector<double> > stdResults; stdResults.resize(results[0].size());
+                for (int i = 0; i < stdResults.size(); i++) { stdResults[i].resize(results[0][i].size(), 0.0); }
+                
+                for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+                    for (int i = 0; i < stdResults.size(); i++) {  
+                        stdResults[i][0] = aveResults[i][0];
+                        for (int j = 1; j < stdResults[i].size(); j++) {
+                            stdResults[i][j] += ((results[thisIter][i][j] - aveResults[i][j]) * (results[thisIter][i][j] - aveResults[i][j]));
+                        }
+                    }
+                }
+                
+                for (int i = 0; i < stdResults.size(); i++) {  //finds average.
+                    out << aveResults[i][0] << '\t' << "ave\t";
+                    for (int j = 1; j < aveResults[i].size(); j++) { out << aveResults[i][j] << '\t'; }
+                    out << endl;
+                    out << stdResults[i][0] << '\t' << "std\t";
+                    for (int j = 1; j < stdResults[i].size(); j++) {
+                        stdResults[i][j] /= (float) iters;
+                        stdResults[i][j] = sqrt(stdResults[i][j]);
+                        out << stdResults[i][j] << '\t';
+                    }
+                    out << endl;
+                }
+                out.close();
+            }
+        }
+        
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "RareFactSharedCommand", "subsample");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 vector<string> RareFactSharedCommand::createGroupFile(vector<string>& outputNames) {
        try {
                
index a5b45468d7059ad2c9e8877376c0154102d05316..af73e132001d1e80bf239362b5541cd08c99d85e 100644 (file)
@@ -37,18 +37,19 @@ public:
 private:
        
        vector<SharedRAbundVector*> lookup;
-       int nIters;
+       int nIters, subsampleSize, iters;
        string format;
        float freq;
        
      map<int, string> file2Group; //index in outputNames[i] -> group
-       bool abort, allLines, jumble, groupMode;
+       bool abort, allLines, jumble, groupMode, subsample;
        set<string> labels; //holds labels to be used
        string label, calc, groups, outputDir, sharedfile, designfile;
        vector<string>  Estimators, Groups, outputNames, Sets;
     
     int process(GroupMap&, string);
     vector<string> createGroupFile(vector<string>&);
+    int subsampleLookup(vector<SharedRAbundVector*>&, string);
 
 };
 
index 01d8529dded5e9c7e17bd02724158156566dece7..a05e08154b10c6c0123a5f3f93847db520ee7959 100644 (file)
@@ -148,7 +148,7 @@ RemoveOtuLabelsCommand::RemoveOtuLabelsCommand(string option)  {
             }
             
             vector<string> tempOutNames;
-            outputTypes["contaxonomy"] = tempOutNames; 
+            outputTypes["constaxonomy"] = tempOutNames; 
             outputTypes["otucorr"] = tempOutNames;
             outputTypes["corraxes"] = tempOutNames;
             
index f3330f7672e1fbaef8300f70b88c625ac9857b1c..1150e53735481aea3377ec81f78c0d2544f324e6 100644 (file)
@@ -9,6 +9,7 @@
 
 #include "sharedcommand.h"
 #include "sharedutilities.h"
+#include "counttable.h"
 
 //********************************************************************************************************************
 //sorts lowest to highest
@@ -20,7 +21,8 @@ vector<string> SharedCommand::setParameters(){
        try {
         CommandParameter pbiom("biom", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "none",false,false); parameters.push_back(pbiom);
                CommandParameter plist("list", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "ListGroup",false,false); parameters.push_back(plist);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "ListGroup",false,false); parameters.push_back(pgroup);
+        CommandParameter pcount("count", "InputTypes", "", "", "", "GroupCount", "",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "none", "GroupCount", "ListGroup",false,false); parameters.push_back(pgroup);
                //CommandParameter pordergroup("ordergroup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pordergroup);
                CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
                CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
@@ -41,7 +43,8 @@ string SharedCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The make.shared command reads a list and group file or a biom file and creates a shared file. If a list and group are provided a rabund file is created for each group.\n";
-               helpString += "The make.shared command parameters are list, group, biom, groups and label. list and group are required unless a current file is available or you provide a biom file.\n";
+               helpString += "The make.shared command parameters are list, group, biom, groups, count and label. list and group or count are required unless a current file is available or you provide a biom file.\n";
+        helpString += "The count parameter allows you to provide a count file containing the group info for the list file.\n";
                helpString += "The groups parameter allows you to indicate which groups you want to include, group names should be separated by dashes. ex. groups=A-B-C. Default is all groups in your groupfile.\n";
                helpString += "The label parameter is only valid with the list and group option and allows you to indicate which labels you want to include, label names should be separated by dashes. Default is all labels in your list file.\n";
                //helpString += "The ordergroup parameter allows you to indicate the order of the groups in the sharedfile, by default the groups are listed alphabetically.\n";
@@ -137,13 +140,13 @@ SharedCommand::SharedCommand(string option)  {
                                         if (path == "") {      parameters["group"] = inputDir + it->second;            }
                                 }
                         
-                                /*it = parameters.find("ordergroup");
+                                it = parameters.find("count");
                                 //user has given a template file
                                 if(it != parameters.end()){ 
                                         path = m->hasPath(it->second);
                                         //if the user has not given a path then, add inputdir. else leave path alone.
-                                        if (path == "") {      parameters["ordergroup"] = inputDir + it->second;               }
-                                }*/
+                                        if (path == "") {      parameters["count"] = inputDir + it->second;            }
+                                }
                  
                  it = parameters.find("biom");
                                 //user has given a template file
@@ -181,6 +184,11 @@ SharedCommand::SharedCommand(string option)  {
                         if (groupfile == "not open") { groupfile = ""; abort = true; } 
                         else if (groupfile == "not found") { groupfile = ""; }
                         else {  m->setGroupFile(groupfile); }
+            
+             countfile = validParameter.validFile(parameters, "count", true);
+             if (countfile == "not open") { countfile = ""; abort = true; }    
+             else if (countfile == "not found") { countfile = ""; }
+             else {  m->setCountTableFile(countfile); }
                         
             if ((biomfile == "") && (listfile == "")) { 
                                //is there are current file available for either of these?
@@ -199,12 +207,16 @@ SharedCommand::SharedCommand(string option)  {
                        else if ((biomfile != "") && (listfile != "")) { m->mothurOut("When executing a make.shared command you must enter ONLY ONE of the following: list or biom."); m->mothurOutEndLine(); abort = true; }
                        
                        if (listfile != "") {
-                               if (groupfile == "") { 
+                               if ((groupfile == "") && (countfile == "")) { 
                                        groupfile = m->getGroupFile(); 
                                        if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
                                        else { 
-                                               m->mothurOut("You need to provide a groupfle if you are going to use the list format."); m->mothurOutEndLine(); 
-                                               abort = true; 
+                                               countfile = m->getCountTableFile(); 
+                        if (countfile != "") {  m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+                        else { 
+                            m->mothurOut("You need to provide a groupfile or countfile if you are going to use the list format."); m->mothurOutEndLine(); 
+                            abort = true; 
+                        }      
                                        }       
                                }
                        }
@@ -301,9 +313,7 @@ int SharedCommand::createSharedFromBiom(string filename) {
         
         ifstream in;
         m->openInputFile(biomfile, in);
-        
-        m->getline(in); m->gobble(in);  //grab first '{'
-        
+         
         string matrixFormat = "";
         int numRows = 0;
         int numCols = 0;
@@ -311,83 +321,159 @@ int SharedCommand::createSharedFromBiom(string filename) {
         int shapeNumCols = 0;
         vector<string> otuNames;
         vector<string> groupNames;
-        while (!in.eof()) {
-            
+        map<string, string> fileLines;
+        vector<string> names;
+        int countOpenBrace = 0;
+        int countClosedBrace = 0;
+        int openParen = -1; //account for opening brace
+        int closeParen = 0;
+        bool ignoreCommas = false;
+        bool atComma = false;
+        string line = "";
+        string matrixElementType = "";
+        
+        while (!in.eof()) { //split file by tags, so each "line" will have something like "id":"/Users/SarahsWork/Desktop/release/final.tx.1.subsample.1.pick.shared-1"
             if (m->control_pressed) { break; }
             
-            string line = m->getline(in); m->gobble(in);
+            char c = in.get(); m->gobble(in);
             
-            string tag = getTag(line);
+            if (c == '[')               { countOpenBrace++;     }
+            else if (c == ']')          { countClosedBrace++;   }
+            else if (c == '{')          { openParen++;          }
+            else if (c == '}')          { closeParen++;         }
+            else if ((!ignoreCommas) && (c == ','))          { atComma = true;       }  
             
-            if (tag == "type") {
-                //check to make sure this is an OTU table
-                string type = getTag(line);
-                if (type != "OTU table") { m->mothurOut("[ERROR]: " + type + " is not a valid biom type for mothur. Only type allowed is OTU table.\n"); m->control_pressed = true; }
-            }else if (tag == "matrix_type") {
-                //get type and check type
-                matrixFormat = getTag(line);
-                if ((matrixFormat != "sparse") && (matrixFormat != "dense")) { m->mothurOut("[ERROR]: " + matrixFormat + " is not a valid biom matrix_type for mothur. Types allowed are sparse and dense.\n"); m->control_pressed = true; }
-            }else if (tag == "matrix_element_type") {
-                //get type and check type
-                string matrixElementType = getTag(line);
-                if (matrixElementType != "int") { m->mothurOut("[ERROR]: " + matrixElementType + " is not a valid matrix_element_type for mothur. Only type allowed is int.\n"); m->control_pressed = true; }
-            }else if (tag == "rows") {
-                //read otu names
-                otuNames = readRows(line, in, numRows);  
-            }else if (tag == "columns") {
-                //read sample names
-                groupNames = readRows(line, in, numCols); 
-                
-                //if users selected groups, then remove the groups not wanted.
-                SharedUtil util;
-                vector<string> Groups = m->getGroups();
-                vector<string> allGroups = groupNames;
-                util.setGroups(Groups, allGroups);
-                m->setGroups(Groups);
-                
-                //fill filehandles with neccessary ofstreams
-                int i;
-                ofstream* temp;
-                for (i=0; i<Groups.size(); i++) {
-                    temp = new ofstream;
-                    filehandles[Groups[i]] = temp;
-                }
-                
-                //set fileroot
-                fileroot = outputDir + m->getRootName(m->getSimpleName(biomfile));
-                
-                //clears file before we start to write to it below
-                for (int i=0; i<Groups.size(); i++) {
-                    m->mothurRemove((fileroot + Groups[i] + ".rabund"));
-                    outputNames.push_back((fileroot + Groups[i] + ".rabund"));
-                    outputTypes["rabund"].push_back((fileroot + Groups[i] + ".rabund"));
-                }
-
-            }else if (tag == "shape") {
-                getDims(line, shapeNumRows, shapeNumCols);
-                
-                //check shape
-                if (shapeNumCols != numCols) {
-                    m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumCols) + " columns, but I only read " + toString(numCols) + " columns.\n"); m->control_pressed = true;
-                }
-                
-                if (shapeNumRows != numRows) {
-                    m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumRows) + " rows, but I only read " + toString(numRows) + " rows.\n"); m->control_pressed = true;
+            if ((countOpenBrace != countClosedBrace) && (countOpenBrace != countClosedBrace)) { ignoreCommas = true;  }
+            else if ((countOpenBrace == countClosedBrace) && (countOpenBrace == countClosedBrace)) { ignoreCommas = false;  }
+            if (atComma && !ignoreCommas) { 
+                if (fileLines.size() == 0) { //clip first {
+                    line = line.substr(1);
                 }
-            }else if (tag == "data") {
-                m->currentBinLabels = otuNames;
+                string tag = getTag(line);
+                fileLines[tag] = line;
+                line = "";
+                atComma = false;
+                ignoreCommas = false;
                 
-                //read data
-                vector<SharedRAbundVector*> lookup = readData(matrixFormat, line, in, groupNames, otuNames.size());
-
-                m->mothurOutEndLine(); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                lookup[0]->printHeaders(out); 
-                printSharedData(lookup, out);
-            }
+            }else {  line += c;  }
+            
+        }
+        if (line != "") {
+            line = line.substr(0, line.length()-1);
+            string tag = getTag(line);
+            fileLines[tag] = line;
         }
         in.close();
         
-                
+        map<string, string>::iterator it;
+        it = fileLines.find("type");
+        if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a type provided.\n"); }
+        else {
+            string thisLine = it->second;
+            string type = getTag(thisLine);
+            if ((type != "OTU table") && (type != "OTUtable")) { m->mothurOut("[ERROR]: " + type + " is not a valid biom type for mothur. Only type allowed is OTU table.\n"); m->control_pressed = true;  }
+        }
+        
+        if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
+        
+        it = fileLines.find("matrix_type");
+        if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_type provided.\n"); }
+        else {
+            string thisLine = it->second;
+            matrixFormat = getTag(thisLine);
+            if ((matrixFormat != "sparse") && (matrixFormat != "dense")) { m->mothurOut("[ERROR]: " + matrixFormat + " is not a valid biom matrix_type for mothur. Types allowed are sparse and dense.\n"); m->control_pressed = true; }
+        }
+        
+        if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
+        
+        it = fileLines.find("matrix_element_type");
+        if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_element_type provided.\n"); }
+        else {
+            string thisLine = it->second;
+            matrixElementType = getTag(thisLine);
+            if ((matrixElementType != "int") && (matrixElementType != "float")) { m->mothurOut("[ERROR]: " + matrixElementType + " is not a valid biom matrix_element_type for mothur. Types allowed are int and float.\n"); m->control_pressed = true; }
+            if (matrixElementType == "float") { m->mothurOut("[WARNING]: the shared file only uses integers, any float values will be rounded down to the nearest integer.\n"); }
+        }
+        
+        if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
+        
+        it = fileLines.find("rows");
+        if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a rows provided.\n"); }
+        else {
+            string thisLine = it->second;
+            otuNames = readRows(thisLine, numRows);  
+        }
+        
+        if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
+        
+        it = fileLines.find("columns");
+        if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a columns provided.\n"); }
+        else {
+            string thisLine = it->second;
+            //read sample names
+            groupNames = readRows(thisLine, numCols); 
+            
+            //if users selected groups, then remove the groups not wanted.
+            SharedUtil util;
+            vector<string> Groups = m->getGroups();
+            vector<string> allGroups = groupNames;
+            util.setGroups(Groups, allGroups);
+            m->setGroups(Groups);
+            
+            //fill filehandles with neccessary ofstreams
+            int i;
+            ofstream* temp;
+            for (i=0; i<Groups.size(); i++) {
+                temp = new ofstream;
+                filehandles[Groups[i]] = temp;
+            }
+            
+            //set fileroot
+            fileroot = outputDir + m->getRootName(m->getSimpleName(biomfile));
+            
+            //clears file before we start to write to it below
+            for (int i=0; i<Groups.size(); i++) {
+                m->mothurRemove((fileroot + Groups[i] + ".rabund"));
+                outputNames.push_back((fileroot + Groups[i] + ".rabund"));
+                outputTypes["rabund"].push_back((fileroot + Groups[i] + ".rabund"));
+            }
+        }
+        
+        if (m->control_pressed) { for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } out.close(); m->mothurRemove(filename); return 0; }
+
+        it = fileLines.find("shape");
+        if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a shape provided.\n"); }
+        else {
+            string thisLine = it->second;
+            getDims(thisLine, shapeNumRows, shapeNumCols);
+            
+            //check shape
+            if (shapeNumCols != numCols) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumCols) + " columns, but I only read " + toString(numCols) + " columns.\n"); m->control_pressed = true; }
+            
+            if (shapeNumRows != numRows) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumRows) + " rows, but I only read " + toString(numRows) + " rows.\n"); m->control_pressed = true; }
+        }
+        
+        if (m->control_pressed) { for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } out.close(); m->mothurRemove(filename); return 0; }
+        
+        it = fileLines.find("data");
+        if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a data provided.\n"); }
+        else {
+            string thisLine = it->second;
+            m->currentBinLabels = otuNames;
+            
+            //read data
+            vector<SharedRAbundVector*> lookup = readData(matrixFormat, thisLine, matrixElementType, groupNames, otuNames.size());
+            
+            m->mothurOutEndLine(); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+            lookup[0]->printHeaders(out); 
+            printSharedData(lookup, out);
+        }
+        
+        for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } 
+        out.close();
+        
+        if (m->control_pressed) {  m->mothurRemove(filename); return 0; }
+
         return 0;
     }
        catch(exception& e) {
@@ -396,7 +482,7 @@ int SharedCommand::createSharedFromBiom(string filename) {
        }
 }
 //**********************************************************************************************************************
-vector<SharedRAbundVector*> SharedCommand::readData(string matrixFormat, string line, ifstream& in, vector<string>& groupNames, int numOTUs) {
+vector<SharedRAbundVector*> SharedCommand::readData(string matrixFormat, string line, string matrixElementType, vector<string>& groupNames, int numOTUs) {
        try {
         
         vector<SharedRAbundVector*> lookup; 
@@ -427,7 +513,9 @@ vector<SharedRAbundVector*> SharedCommand::readData(string matrixFormat, string
                 else if ((line[i] == ']') && (inBrackets)) { 
                     inBrackets = false; 
                     int temp;
-                    m->mothurConvert(num, temp);
+                    float temp2;
+                    if (matrixElementType == "float") { m->mothurConvert(num, temp2); temp = (int)temp2; }
+                    else { m->mothurConvert(num, temp); }
                     nums.push_back(temp);
                     num = "";
                     
@@ -463,58 +551,6 @@ vector<SharedRAbundVector*> SharedCommand::readData(string matrixFormat, string
             }
         }
         
-        //same as above just reading from file.
-        while (!in.eof()) {
-            
-            char c = in.get(); m->gobble(in);
-            
-            if (m->control_pressed) { return lookup; }
-            
-            //look for opening [ to indicate data is starting
-            if ((c == '[') && (!dataStart)) { dataStart = true; c = in.get();  if (in.eof()) { break; } }
-            else if ((c == ']') && dataStart && (!inBrackets)) { break; } //we are done reading data
-              
-            if (dataStart) {
-                if ((c == '[') && (!inBrackets)) { inBrackets = true; c = in.get();  if (in.eof()) { break; }  }
-                else if ((c == ']') && (inBrackets)) { 
-                    inBrackets = false; 
-                    int temp;
-                    m->mothurConvert(num, temp);
-                    nums.push_back(temp);
-                    num = "";
-                    
-                    //save info to vectors
-                    if (matrixFormat == "dense") {
-                        
-                        //sanity check
-                        if (nums.size() != lookup.size()) { m->mothurOut("[ERROR]: trouble parsing OTU data.  OTU " + toString(otuCount) + " causing errors.\n"); m->control_pressed = true; }
-                        
-                        //set abundances for this otu
-                        //nums contains [abundSample0, abundSample1, abundSample2, ...] for current OTU
-                        for (int j = 0; j < lookup.size(); j++) { lookup[j]->set(otuCount, nums[j], groupNames[j]); }
-                        
-                        otuCount++;
-                    }else {
-                        //sanity check
-                        if (nums.size() != 3) { m->mothurOut("[ERROR]: trouble parsing OTU data.\n"); m->control_pressed = true; }
-                        
-                        //nums contains [otuNum, sampleNum, abundance]
-                        lookup[nums[1]]->set(nums[0], nums[2], groupNames[nums[1]]);
-                    }
-                    nums.clear();
-                }
-                
-                if (inBrackets) {
-                    if (c == ',') {
-                        int temp;
-                        m->mothurConvert(num, temp);
-                        nums.push_back(temp);
-                        num = "";
-                    }else { if (!isspace(c)) { num += c; }  }
-                }
-            }
-        }
-        
         SharedUtil util;
         
                bool remove = false;
@@ -628,7 +664,7 @@ int SharedCommand::getDims(string line, int& shapeNumRows, int& shapeNumCols) {
        }
 }
 //**********************************************************************************************************************
-vector<string> SharedCommand::readRows(string line, ifstream& in, int& numRows) {
+vector<string> SharedCommand::readRows(string line, int& numRows) {
        try {
         /*"rows":[
          {"id":"Otu01", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Porphyromonadaceae", "unclassified"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
@@ -679,48 +715,6 @@ vector<string> SharedCommand::readRows(string line, ifstream& in, int& numRows)
             }
         }
         
-        //keep reading
-        if (!end) {
-            while (!in.eof()) {
-                
-                if (m->control_pressed) { break; }
-                
-                char c = in.get(); m->gobble(in);
-                
-                if (c == '[')               { countOpenBrace++;     }
-                else if (c == ']')          { countClosedBrace++;   }
-                else if (c == '{')          { openParen++;          }
-                else if (c == '}')          { closeParen++;         }
-                else if (openParen != 0)    { nextRow += c;         }  //you are reading the row info
-                
-                
-                //you have reached the end of the rows info
-                if ((countOpenBrace == countClosedBrace) && (countClosedBrace != 0)) { end = true; break; }
-                if ((openParen == closeParen) && (closeParen != 0)) { //process row 
-                    numRows++;
-                    vector<string> items;
-                    m->splitAtChar(nextRow, items, ','); //parse by comma, will return junk for metadata but we aren't using that anyway
-                    string part = items[0]; items.clear();
-                    m->splitAtChar(part, items, ':'); //split part we want containing the ids
-                    string name = items[1];
-                    
-                    //remove "" if needed
-                    int pos = name.find("\"");
-                    if (pos != string::npos) {
-                        string newName = "";
-                        for (int k = 0; k < name.length(); k++) {
-                            if (name[k] != '\"') { newName += name[k]; }
-                        }
-                        name = newName;
-                    }
-                    names.push_back(name);
-                    nextRow = "";
-                    openParen = 0;
-                    closeParen = 0;
-                }  
-            }
-        }
-        
         return names;
     }
        catch(exception& e) {
@@ -729,7 +723,7 @@ vector<string> SharedCommand::readRows(string line, ifstream& in, int& numRows)
        }
 }
 //**********************************************************************************************************************
-//designed for things like "type": "OTU table", returns map type -> OTU table
+//designed for things like "type": "OTU table", returns type 
 string SharedCommand::getTag(string& line) {
        try {
         bool inQuotes = false;
@@ -762,18 +756,29 @@ int SharedCommand::createSharedFromListGroup(string filename) {
         ofstream out;
         m->openOutputFile(filename, out);
         
-        GroupMap* groupMap = new GroupMap(groupfile);
+        GroupMap* groupMap = NULL;
+        CountTable* countTable = NULL;
+        if (groupfile != "") {
+            groupMap = new GroupMap(groupfile);
+        
+            int groupError = groupMap->readMap();
+            if (groupError == 1) { delete groupMap; return 0; }
+            vector<string> allGroups = groupMap->getNamesOfGroups();
+            m->setAllGroups(allGroups);
+        }else{
+            countTable = new CountTable();
+            countTable->readTable(countfile);
+        }
         
-        int groupError = groupMap->readMap();
-        if (groupError == 1) { delete groupMap; return 0; }
-        vector<string> allGroups = groupMap->getNamesOfGroups();
-        m->setAllGroups(allGroups);
+        if (m->control_pressed) { return 0; }
         
         pickedGroups = false;
         
         //if hte user has not specified any groups then use them all
         if (Groups.size() == 0) {
-            Groups = groupMap->getNamesOfGroups(); m->setGroups(Groups);
+            if (groupfile != "") { Groups = groupMap->getNamesOfGroups();  }
+            else {  Groups = countTable->getNamesOfGroups();  }
+            m->setGroups(Groups);
         }else { pickedGroups = true; }
         
         //fill filehandles with neccessary ofstreams
@@ -805,7 +810,7 @@ int SharedCommand::createSharedFromListGroup(string filename) {
         vector<SharedRAbundVector*> lookup; 
         
         if (m->control_pressed) { 
-            delete SharedList; delete groupMap; 
+            delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
             for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
             out.close(); m->mothurRemove(filename); 
             for (int i=0; i<Groups.size(); i++) {  m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund")));             }
@@ -813,31 +818,27 @@ int SharedCommand::createSharedFromListGroup(string filename) {
         }
         
         //sanity check
-        vector<string> groupMapNamesSeqs = groupMap->getNamesSeqs();
-        int error = ListGroupSameSeqs(groupMapNamesSeqs, SharedList);
+        vector<string> namesSeqs;
+        int numGroupNames = 0;
+        if (m->groupMode == "group") { namesSeqs = groupMap->getNamesSeqs(); numGroupNames = groupMap->getNumSeqs(); }
+        else { namesSeqs = countTable->getNamesOfSeqs(); numGroupNames = countTable->getNumUniqueSeqs(); }
+        int error = ListGroupSameSeqs(namesSeqs, SharedList);
         
-        if ((!pickedGroups) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) {  //if the user has not specified any groups and their files don't match exit with error
-            m->mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine(); 
+        if ((!pickedGroups) && (SharedList->getNumSeqs() != numGroupNames)) {  //if the user has not specified any groups and their files don't match exit with error
+            m->mothurOut("Your group file contains " + toString(numGroupNames) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine(); 
             
-            out.close();
-            m->mothurRemove(filename); //remove blank shared file you made
-            
-            createMisMatchFile(SharedList, groupMap);
+            out.close(); m->mothurRemove(filename); //remove blank shared file you made
             
             //delete memory
-            for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
-                delete it3->second;
-            }
-            
-            delete SharedList; delete groupMap; 
-            
+            for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
+            delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
             return 0; 
         }
         
         if (error == 1) { m->control_pressed = true; }
         
         //if user has specified groups make new groupfile for them
-        if (pickedGroups) { //make new group file
+        if ((pickedGroups) && (m->groupMode == "group")) { //make new group file
             string groups = "";
             if (m->getNumGroups() < 4) {
                 for (int i = 0; i < m->getNumGroups(); i++) {
@@ -868,7 +869,7 @@ int SharedCommand::createSharedFromListGroup(string filename) {
         
         while((SharedList != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
             if (m->control_pressed) { 
-                delete SharedList; delete groupMap;
+                delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
                 out.close(); m->mothurRemove(filename); 
                 for (int i=0; i<Groups.size(); i++) {  m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund")));         }
@@ -885,7 +886,7 @@ int SharedCommand::createSharedFromListGroup(string filename) {
                 }
                 
                 if (m->control_pressed) { 
-                    delete SharedList; delete groupMap; 
+                    delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                     for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
                     for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
                     out.close(); m->mothurRemove(filename); 
@@ -915,7 +916,7 @@ int SharedCommand::createSharedFromListGroup(string filename) {
                 
                 
                 if (m->control_pressed) { 
-                    delete SharedList; delete groupMap; 
+                    delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                     for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
                     for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
                     out.close(); m->mothurRemove(filename); 
@@ -962,7 +963,7 @@ int SharedCommand::createSharedFromListGroup(string filename) {
             }
             
             if (m->control_pressed) { 
-                delete groupMap;
+                if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;   }
                 out.close(); m->mothurRemove(filename); 
                 for (int i=0; i<Groups.size(); i++) {  m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund")));         }
@@ -981,7 +982,7 @@ int SharedCommand::createSharedFromListGroup(string filename) {
             delete it3->second;
         }
         
-        delete groupMap;
+        if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                
         if (m->control_pressed) { 
             m->mothurRemove(filename); 
@@ -1061,104 +1062,6 @@ void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup, ofst
        }
 }
 //**********************************************************************************************************************
-int SharedCommand::createMisMatchFile(SharedListVector* SharedList, GroupMap* groupMap) {
-       try {
-               ofstream outMisMatch;
-               string outputMisMatchName = outputDir + m->getRootName(m->getSimpleName(listfile));
-               
-               //you have sequences in your list file that are not in your group file
-               if (SharedList->getNumSeqs() > groupMap->getNumSeqs()) { 
-                       outputMisMatchName += "missing.group";
-                       m->mothurOut("For a list of names that are in your list file and not in your group file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
-                       
-                       m->openOutputFile(outputMisMatchName, outMisMatch);
-                       
-                       set<string> listNames;
-                       set<string>::iterator itList;
-                       
-                       //go through list and if group returns "not found" output it
-                       for (int i = 0; i < SharedList->getNumBins(); i++) {
-                               if (m->control_pressed) { outMisMatch.close(); m->mothurRemove(outputMisMatchName); return 0; } 
-                       
-                               string names = SharedList->get(i); 
-                               
-                vector<string> binNames;
-                m->splitAtComma(names, binNames);
-                
-                               for (int j = 0; j < binNames.size(); j++) { 
-                                       string name = binNames[j];
-                                       string group = groupMap->getGroup(name);
-                                       
-                                       if(group == "not found") {      outMisMatch << name << endl;  }
-                                       
-                                       itList = listNames.find(name);
-                                       if (itList != listNames.end()) {  m->mothurOut(name + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
-                                       else { listNames.insert(name); }
-                               }
-                       }
-                       
-                       outMisMatch.close();
-                       
-               
-               }else {//you have sequences in your group file that are not in you list file
-                       
-                       outputMisMatchName += "missing.name";
-                       m->mothurOut("For a list of names that are in your group file and not in your list file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
-                       
-                       map<string, string> namesInList;
-                       map<string, string>::iterator itList;
-                       
-                       //go through listfile and get names
-                       for (int i = 0; i < SharedList->getNumBins(); i++) {
-                               if (m->control_pressed) {  return 0; } 
-
-                               
-                               string names = SharedList->get(i); 
-               
-                               vector<string> binNames;
-                m->splitAtComma(names, binNames);
-                
-                               for (int j = 0; j < binNames.size(); j++) { 
-
-                                       string name = binNames[j];
-                                       
-                                       itList = namesInList.find(name);
-                                       if (itList != namesInList.end()) {  m->mothurOut(name + " is in your list file more than once.  Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
-
-                                       namesInList[name] = name;
-                                       
-                               }
-                       }
-                       
-                       //get names of sequences in groupfile
-                       vector<string> seqNames = groupMap->getNamesSeqs();
-               
-                       map<string, string>::iterator itMatch;
-                       
-                       m->openOutputFile(outputMisMatchName, outMisMatch);
-                       
-                       //loop through names in seqNames and if they aren't in namesIn list output them
-                       for (int i = 0; i < seqNames.size(); i++) {
-                               if (m->control_pressed) { outMisMatch.close(); m->mothurRemove(outputMisMatchName); return 0; } 
-                               
-                               itMatch = namesInList.find(seqNames[i]);
-                               
-                               if (itMatch == namesInList.end()) {
-                               
-                                       outMisMatch << seqNames[i] << endl; 
-                               }
-                       }               
-                       outMisMatch.close();
-               }
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SharedCommand", "createMisMatchFile");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
 int SharedCommand::ListGroupSameSeqs(vector<string>& groupMapsSeqs, SharedListVector* SharedList) {
        try {
                int error = 0; 
index 1100c3d30afc344119538e89e7c40a8dfa839578..9d9c194dbbaab4ff7a079058c1c43421937659fe 100644 (file)
@@ -40,7 +40,6 @@ public:
        
 private:
        void printSharedData(vector<SharedRAbundVector*>, ofstream&);
-       int createMisMatchFile(SharedListVector*, GroupMap*);
        int readOrderFile();
        bool isValidGroup(string, vector<string>);
        int eliminateZeroOTUS(vector<SharedRAbundVector*>&);
@@ -48,13 +47,13 @@ private:
     int createSharedFromListGroup(string);
     int createSharedFromBiom(string);
     string getTag(string&);
-    vector<string> readRows(string, ifstream&, int&); 
+    vector<string> readRows(string, int&); 
     int getDims(string, int&, int&);
-    vector<SharedRAbundVector*> readData(string, string, ifstream&, vector<string>&, int);
+    vector<SharedRAbundVector*> readData(string, string, string, vector<string>&, int);
        
        vector<string> Groups, outputNames, order;
        set<string> labels;
-       string fileroot, outputDir, listfile, groupfile, biomfile, ordergroupfile;
+       string fileroot, outputDir, listfile, groupfile, biomfile, ordergroupfile, countfile;
        bool firsttime, pickedGroups, abort, allLines;
        map<string, ofstream*> filehandles;
        map<string, ofstream*>::iterator it3;
index 6dfcb97f0aff013d15c1c641e93a24b0d7b229a3..2cecb5de49d2aca77c33f21b77d584d6c15f893e 100644 (file)
 
 /***********************************************************************/
 
-SharedListVector::SharedListVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0){ groupmap = NULL; }
+SharedListVector::SharedListVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0){ groupmap = NULL; countTable = NULL; }
 
 /***********************************************************************/
 
-SharedListVector::SharedListVector(int n):     DataVector(), data(n, "") , maxRank(0), numBins(0), numSeqs(0){ groupmap = NULL; }
+SharedListVector::SharedListVector(int n):     DataVector(), data(n, "") , maxRank(0), numBins(0), numSeqs(0){ groupmap = NULL; countTable = NULL; }
 
 /***********************************************************************/
 SharedListVector::SharedListVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) {
        try {
                //set up groupmap for later.
-               groupmap = new GroupMap(m->getGroupFile());
-               groupmap->readMap(); 
+        if (m->groupMode == "group") {
+            groupmap = new GroupMap(m->getGroupFile());
+            groupmap->readMap(); 
+        }else {
+            countTable = new CountTable();
+            countTable->readTable(m->getCountTableFile());
+        }
 
                int hold;
                string inputData;
@@ -188,27 +193,34 @@ SAbundVector SharedListVector::getSAbundVector(){
 /***********************************************************************/
 SharedOrderVector* SharedListVector::getSharedOrderVector(){
        try {
-               string groupName, names, name;
-       
                SharedOrderVector* order = new SharedOrderVector();
                order->setLabel(label);
        
                for(int i=0;i<numBins;i++){
                        int binSize = m->getNumNames(get(i));   //find number of individual in given bin        
-                       names = get(i);
-                       while (names.find_first_of(',') != -1) { 
-                               name = names.substr(0,names.find_first_of(','));
-                               names = names.substr(names.find_first_of(',')+1, names.length());
-                               groupName = groupmap->getGroup(name);
-                               
-                               if(groupName == "not found") {  m->mothurOut("Error: Sequence '" + name + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
+                       string names = get(i);
+            vector<string> binNames;
+            m->splitAtComma(names, binNames);
+            if (m->groupMode != "group") {
+                binSize = 0;
+                for (int j = 0; j < binNames.size(); j++) {  binSize += countTable->getNumSeqs(binNames[i]);  }
+            }
+                       for (int j = 0; j < binNames.size(); j++) { 
+                if (m->control_pressed) { return order; }
+                if (m->groupMode == "group") {
+                    string groupName = groupmap->getGroup(binNames[i]);
+                    if(groupName == "not found") {     m->mothurOut("Error: Sequence '" + binNames[i] + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
                                
-                               order->push_back(i, binSize, groupName);  //i represents what bin you are in
+                    order->push_back(i, binSize, groupName);  //i represents what bin you are in
+                }else {
+                    vector<int> groupAbundances = countTable->getGroupCounts(binNames[i]);
+                    vector<string> groupNames = countTable->getNamesOfGroups();
+                    for (int k = 0; k < groupAbundances.size(); k++) { //groupAbundances.size() == 0 if there is a file mismatch and m->control_pressed is true.
+                        if (m->control_pressed) { return order; }
+                        for (int l = 0; l < groupAbundances[k]; l++) {  order->push_back(i, binSize, groupNames[k]);  }
+                    }
+                }
                        }
-                       //get last name
-                       groupName = groupmap->getGroup(names);
-                       if(groupName == "not found") {  m->mothurOut("Error: Sequence '" + names + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
-                       order->push_back(i, binSize, groupName);
                }
 
                random_shuffle(order->begin(), order->end());
@@ -225,25 +237,23 @@ SharedOrderVector* SharedListVector::getSharedOrderVector(){
 SharedRAbundVector SharedListVector::getSharedRAbundVector(string groupName) {
        try {
                SharedRAbundVector rav(data.size());
-               string group, names, name;
                
                for(int i=0;i<numBins;i++){
-                       names = get(i);  
-                       while (names.find_first_of(',') != -1) { 
-                               name = names.substr(0,names.find_first_of(','));
-                               names = names.substr(names.find_first_of(',')+1, names.length());
-                               group = groupmap->getGroup(name);
-                               if(group == "not found") {      m->mothurOut("Error: Sequence '" + name + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
-                               if (group == groupName) { //this name is in the group you want the vector for.
-                                       rav.set(i, rav.getAbundance(i) + 1, group);  //i represents what bin you are in
-                               }
-                       }
-                       
-                       //get last name
-                       groupName = groupmap->getGroup(names);
-                       if(groupName == "not found") {  m->mothurOut("Error: Sequence '" + names + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
-                       if (group == groupName) { //this name is in the group you want the vector for.
-                                       rav.set(i, rav.getAbundance(i) + 1, group);  //i represents what bin you are in
+                       string names = get(i);
+            vector<string> binNames;
+            m->splitAtComma(names, binNames);
+            for (int j = 0; j < binNames.size(); j++) { 
+                               if (m->control_pressed) { return rav; }
+                if (m->groupMode == "group") {
+                    string group = groupmap->getGroup(binNames[j]);
+                    if(group == "not found") { m->mothurOut("Error: Sequence '" + binNames[j] + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
+                    if (group == groupName) { //this name is in the group you want the vector for.
+                        rav.set(i, rav.getAbundance(i) + 1, group);  //i represents what bin you are in
+                    }
+                }else {
+                    int count = countTable->getGroupCount(binNames[j], groupName);
+                    rav.set(i, rav.getAbundance(i) + count, groupName);
+                }
                        }
                }
                
@@ -264,11 +274,13 @@ vector<SharedRAbundVector*> SharedListVector::getSharedRAbundVector() {
                SharedUtil* util;
                util = new SharedUtil();
                vector<SharedRAbundVector*> lookup;  //contains just the groups the user selected
+        vector<SharedRAbundVector*> lookupDelete;
                map<string, SharedRAbundVector*> finder;  //contains all groups in groupmap
-               string group, names, name;
                
                vector<string> Groups = m->getGroups();
-               vector<string> allGroups = groupmap->getNamesOfGroups();
+        vector<string> allGroups;
+               if (m->groupMode == "group") {  allGroups = groupmap->getNamesOfGroups();  }
+        else {  allGroups = countTable->getNamesOfGroups();  }
                util->setGroups(Groups, allGroups);
                m->setGroups(Groups);
                delete util;
@@ -280,47 +292,31 @@ vector<SharedRAbundVector*> SharedListVector::getSharedRAbundVector() {
                        finder[allGroups[i]]->setGroup(allGroups[i]);
                        if (m->inUsersGroups(allGroups[i], m->getGroups())) {  //if this group is in user groups
                                lookup.push_back(finder[allGroups[i]]);
-                       }
+                       }else {
+                lookupDelete.push_back(finder[allGroups[i]]);
+            }
                }
        
                //fill vectors
                for(int i=0;i<numBins;i++){
-                       names = get(i);  
-                       int nameLength = names.size();
-                       string seqName = "";
-                       
-                       for(int j=0;j<nameLength;j++){
-                               if(names[j] == ','){
-                                       group = groupmap->getGroup(seqName);
-                                       if(group == "not found") {      m->mothurOut("Error: Sequence '" + seqName + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
-                                       finder[group]->set(i, finder[group]->getAbundance(i) + 1, group);  //i represents what bin you are in
-                                       
-                                       seqName = "";
-                               }
-                               else{
-                                       seqName += names[j];
-                               }
+                       string names = get(i);  
+                       vector<string> binNames;
+            m->splitAtComma(names, binNames);
+            for (int j = 0; j < binNames.size(); j++) { 
+                if (m->groupMode == "group") {
+                    string group = groupmap->getGroup(binNames[j]);
+                    if(group == "not found") { m->mothurOut("Error: Sequence '" + binNames[j] + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
+                    finder[group]->set(i, finder[group]->getAbundance(i) + 1, group);  //i represents what bin you are in      
+                }else{
+                    vector<int> counts = countTable->getGroupCounts(binNames[j]);
+                    for (int k = 0; k < allGroups.size(); k++) {
+                        finder[allGroups[k]]->set(i, finder[allGroups[k]]->getAbundance(i) + counts[k], allGroups[k]);
+                    }
+                }
                        }
-                       group = groupmap->getGroup(seqName);
-                       if(group == "not found") {      m->mothurOut("Error: Sequence '" + seqName + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
-                       finder[group]->set(i, finder[group]->getAbundance(i) + 1, group);  //i represents what bin you are in
-                       
-                       
-                       
-//                     while (names.find_first_of(',') != -1) { 
-//                             name = names.substr(0,names.find_first_of(','));
-//                             names = names.substr(names.find_first_of(',')+1, names.length());
-//                             group = groupmap->getGroup(name);
-//                             if(group == "not found") {      m->mothurOut("Error: Sequence '" + name + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
-//                             finder[group]->set(i, finder[group]->getAbundance(i) + 1, group);  //i represents what bin you are in
-//                     }
-                       
-                       //get last name
-//                     group = groupmap->getGroup(names);
-//                     if(group == "not found") {      m->mothurOut("Error: Sequence '" + names + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
-//                     finder[group]->set(i, finder[group]->getAbundance(i) + 1, group);  //i represents what bin you are in
-                       
                }
+        
+        for (int j = 0; j < lookupDelete.size(); j++) {  delete lookupDelete[j];  }
 
                return lookup;
        }
@@ -355,7 +351,14 @@ OrderVector SharedListVector::getOrderVector(map<string,int>* orderMap = NULL){
                        OrderVector ov;
                
                        for(int i=0;i<data.size();i++){
-                               int binSize = m->getNumNames(data[i]);          
+                string names = data[i];
+                vector<string> binNames;
+                m->splitAtComma(names, binNames);
+                               int binSize = binNames.size();  
+                if (m->groupMode != "group") {
+                    binSize = 0;
+                    for (int j = 0; j < binNames.size(); j++) {  binSize += countTable->getNumSeqs(binNames[i]);  }
+                }
                                for(int j=0;j<binSize;j++){
                                        ov.push_back(i);
                                }
@@ -372,31 +375,15 @@ OrderVector SharedListVector::getOrderVector(map<string,int>* orderMap = NULL){
                
                        for(int i=0;i<data.size();i++){
                                string listOTU = data[i];
-                               int length = listOTU.size();
-                               
-                               string seqName="";
-                       
-                               for(int j=0;j<length;j++){
-                               
-                                       if(listOTU[j] != ','){
-                                               seqName += listOTU[j];
-                                       }
-                                       else{
-                                               if(orderMap->count(seqName) == 0){
-                                                       m->mothurOut(seqName + " not found, check *.names file\n");
-                                                       exit(1);
-                                               }
-                                       
-                                               ov.set((*orderMap)[seqName], i);
-                                               seqName = "";
-                                       }                                               
-                               }
-                       
-                               if(orderMap->count(seqName) == 0){
-                                       m->mothurOut(seqName + " not found, check *.names file\n");
-                                       exit(1);
+                               vector<string> binNames;
+                m->splitAtComma(listOTU, binNames);
+                for (int j = 0; j < binNames.size(); j++) { 
+                    if(orderMap->count(binNames[j]) == 0){
+                        m->mothurOut(binNames[j] + " not found, check *.names file\n");
+                        exit(1);
+                    }
+                    ov.set((*orderMap)[binNames[j]], i);
                                }
-                               ov.set((*orderMap)[seqName], i);        
                        }
                
                        ov.setLabel(label);
index 56ea802a1b1bca511454f83ddcb9e8f29bd5ed27..81779257346d4b25dbecf092a6c730162d7d4991 100644 (file)
@@ -12,6 +12,7 @@
 
 #include "datavector.hpp"
 #include "groupmap.h"
+#include "counttable.h"
 #include "sharedrabundvector.h"
 #include "sharedsabundvector.h"
 
@@ -32,8 +33,8 @@ public:
        SharedListVector();
        SharedListVector(int);
        SharedListVector(ifstream&);
-       SharedListVector(const SharedListVector& lv) : DataVector(lv.label), data(lv.data), maxRank(lv.maxRank), numBins(lv.numBins), numSeqs(lv.numSeqs){ groupmap = NULL; };
-       ~SharedListVector(){ if (groupmap != NULL) { delete groupmap; } };
+       SharedListVector(const SharedListVector& lv) : DataVector(lv.label), data(lv.data), maxRank(lv.maxRank), numBins(lv.numBins), numSeqs(lv.numSeqs){ groupmap = NULL; countTable = NULL; };
+       ~SharedListVector(){ if (groupmap != NULL) { delete groupmap; } if (countTable != NULL) { delete countTable; } };
        
        int getNumBins()                                                        {       return numBins;         }
        int getNumSeqs()                                                        {       return numSeqs;         }
@@ -58,6 +59,7 @@ public:
 private:
        vector<string> data;  //data[i] is a list of names of sequences in the ith OTU.
        GroupMap* groupmap;
+    CountTable* countTable;
        int maxRank;
        int numBins;
        int numSeqs;
index 97629b600031c06c063f405175dd9fa369ae7c93..115e930969323ba4655e0b5ba5c2a8e1166a1397 100644 (file)
@@ -1839,14 +1839,17 @@ void ShhherCommand::writeGroups(){
     try {
         string thisOutputDir = outputDir;
         if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
-        string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
-        string groupFileName = fileRoot + getOutputFileNameTag("group");
+        string fileRoot = m->getRootName(m->getSimpleName(flowFileName));
+        int pos = fileRoot.find_first_of('.');
+        string fileGroup = fileRoot;
+        if (pos != string::npos) {  fileGroup = fileRoot.substr(pos+1, (fileRoot.length()-1-(pos+1)));  }
+        string groupFileName = thisOutputDir + fileRoot + getOutputFileNameTag("group");
         ofstream groupFile;
         m->openOutputFile(groupFileName, groupFile);
         
         for(int i=0;i<numSeqs;i++){
             if (m->control_pressed) { break; }
-            groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
+            groupFile << seqNameVector[i] << '\t' << fileGroup << endl;
         }
         groupFile.close();
         outputNames.push_back(groupFileName);
@@ -2319,15 +2322,18 @@ int ShhherCommand::driver(vector<string> filenames, string thisCompositeFASTAFil
                 string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("fasta");
                 string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("name");
                 string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("counts");
-                string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
-                string groupFileName = fileRoot + getOutputFileNameTag("group");
+                string fileRoot = m->getRootName(m->getSimpleName(flowFileName));
+                int pos = fileRoot.find_first_of('.');
+                string fileGroup = fileRoot;
+                if (pos != string::npos) {  fileGroup = fileRoot.substr(pos+1, (fileRoot.length()-1-(pos+1)));  }
+                string groupFileName = thisOutputDir + fileRoot + getOutputFileNameTag("group");
 
                 
                 writeQualities(numOTUs, numFlowCells, qualityFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI); if (m->control_pressed) { break; }
                 writeSequences(thisCompositeFASTAFileName, numOTUs, numFlowCells, fastaFileName, otuCounts, uniqueFlowgrams, seqNameVector, aaI, centroids);if (m->control_pressed) { break; }
                 writeNames(thisCompositeNamesFileName, numOTUs, nameFileName, otuCounts, seqNameVector, aaI, nSeqsPerOTU);                             if (m->control_pressed) { break; }
                 writeClusters(otuCountsFileName, numOTUs, numFlowCells,otuCounts, centroids, uniqueFlowgrams, seqNameVector, aaI, nSeqsPerOTU, lengths, flowDataIntI);                 if (m->control_pressed) { break; }
-                writeGroups(groupFileName, fileRoot, numSeqs, seqNameVector);                                          if (m->control_pressed) { break; }
+                writeGroups(groupFileName, fileGroup, numSeqs, seqNameVector);                                         if (m->control_pressed) { break; }
                 
                 if (large) {
                     if (g > 0) {
index 0650ea0e8e42e564213d285183e48594f54fc722..0749cb79ff41e03c55ca2a9defeb1b70e44645d8 100644 (file)
@@ -22,7 +22,7 @@ vector<string> UnifracUnweightedCommand::setParameters(){
                CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
-               CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
+               CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(pdistance);
         CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
         CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
         CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
@@ -204,6 +204,7 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option)  {
                        string temp = validParameter.validFile(parameters, "distance", false);                  
                        if (temp == "not found") { phylip = false; outputForm = ""; }
                        else{
+                if (temp=="phylip") { temp = "lt"; }
                                if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
                                else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
                        }
index 13d10fd5f4d6c690777fcc4730f06fea8832341f..d1e883382890081495dac72238cd05b3c5ad644e 100644 (file)
@@ -24,7 +24,7 @@ vector<string> UnifracWeightedCommand::setParameters(){
         CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
         CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
         CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
-               CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
+               CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(pdistance);
                CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
@@ -204,6 +204,7 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) {
                        string temp = validParameter.validFile(parameters, "distance", false);                  
                        if (temp == "not found") { phylip = false; outputForm = ""; }
                        else{
+                if (temp=="phylip") { temp = "lt"; }
                                if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
                                else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
                        }
index 18bb635010bc07a1787e2847ae84c4eaf923e8cf..c918500270a537ac909326f3edc10ad068dc3ac5 100644 (file)
@@ -250,8 +250,7 @@ int VennCommand::execute(){
                                        }else if (Estimators[i] == "chao") { 
                                                vennCalculators.push_back(new Chao1());
                                        }else if (Estimators[i] == "ace") {
-                                               if(abund < 5)
-                                                       abund = 10;
+                                               if(abund < 5) { abund = 10; }
                                                vennCalculators.push_back(new Ace(abund));
                                        }
                                }