]> git.donarmstrong.com Git - mothur.git/blobdiff - mothurout.cpp
added primer.design command. fixed bug with linux unifrac subsampling, metastats...
[mothur.git] / mothurout.cpp
index 64f0bc88713d2c3f56cfb6f98cc1cbc387ee7966..54cdd33bbe72cfecb241ed0d24d537bf4654af06 100644 (file)
@@ -43,6 +43,7 @@ set<string> MothurOut::getCurrentTypes()  {
         types.insert("tree");
         types.insert("flow");
         types.insert("biom");
+        types.insert("count");
         types.insert("processors");
 
                return types;
@@ -78,6 +79,7 @@ void MothurOut::printCurrentFiles()  {
                if (treefile != "")                     {  mothurOut("tree=" + treefile); mothurOutEndLine();                           }
                if (flowfile != "")                     {  mothurOut("flow=" + flowfile); mothurOutEndLine();                           }
         if (biomfile != "")                    {  mothurOut("biom=" + biomfile); mothurOutEndLine();                           }
+        if (counttablefile != "")      {  mothurOut("count=" + counttablefile); mothurOutEndLine();    }
                if (processors != "1")          {  mothurOut("processors=" + processors); mothurOutEndLine();           }
                
        }
@@ -112,6 +114,7 @@ bool MothurOut::hasCurrentFiles()  {
                if (treefile != "")                     {  return true;                 }
                if (flowfile != "")                     {  return true;                 }
         if (biomfile != "")                    {  return true;                 }
+        if (counttablefile != "")      {  return true;                 }
                if (processors != "1")          {  return true;                 }
                
                return hasCurrent;
@@ -147,6 +150,7 @@ void MothurOut::clearCurrentFiles()  {
                taxonomyfile = "";      
                flowfile = "";
         biomfile = "";
+        counttablefile = "";
                processors = "1";
        }
        catch(exception& e) {
@@ -935,7 +939,7 @@ string MothurOut::getFullPathName(string fileName){
                                }
                        
                                for (int i = index; i >= 0; i--) {
-                                       newFileName = dirs[i] +  "\\" + newFileName;            
+                                       newFileName = dirs[i] +  "\\\\" + newFileName;          
                                }
                                
                                return newFileName;
@@ -1048,6 +1052,9 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle){
 
 int MothurOut::renameFile(string oldName, string newName){
        try {
+        
+        if (oldName == newName) { return 0; }
+        
                ifstream inTest;
                int exist = openInputFile(newName, inTest, "");
                inTest.close();
@@ -1176,7 +1183,7 @@ string MothurOut::sortFile(string distFile, string outputDir){
 
                        string firstName, secondName;
                        float dist;
-                       while (input) {
+                       while (!input.eof()) {
                                input >> firstName >> secondName >> dist;
                                output << dist << '\t' << firstName << '\t' << secondName << endl;
                                gobble(input);
@@ -1192,16 +1199,17 @@ string MothurOut::sortFile(string distFile, string outputDir){
                
                        //read in sorted file and put distance at end again
                        ifstream input2;
+            ofstream output2;
                        openInputFile(tempOutfile, input2);
-                       openOutputFile(outfile, output);
+                       openOutputFile(outfile, output2);
                
-                       while (input2) {
+            while (!input2.eof()) {
                                input2 >> dist >> firstName >> secondName;
-                               output << firstName << '\t' << secondName << '\t' << dist << endl;
+                               output2 << firstName << '\t' << secondName << '\t' << dist << endl;
                                gobble(input2);
                        }
                        input2.close();
-                       output.close();
+                       output2.close();
                
                        //remove temp files
                        mothurRemove(tempDistFile);
@@ -1291,16 +1299,6 @@ vector<unsigned long long> MothurOut::setFilePosEachLine(string filename, int& n
                        positions.push_back(0);
                
                        while(!in.eof()){
-                               //unsigned long long lastpos = in.tellg();
-                               //input = getline(in); 
-                               //if (input.length() != 0) {
-                                       //unsigned long long pos = in.tellg(); 
-                                       //if (pos != -1) { positions.push_back(pos - input.length() - 1);       }
-                                       //else {  positions.push_back(lastpos);  }
-                               //}
-                               //gobble(in); //has to be here since windows line endings are 2 characters and mess up the positions
-                               
-                               
                                //getline counting reads
                                char d = in.get(); count++;
                                while ((d != '\n') && (d != '\r') && (d != '\f') && (d != in.eof()))    {
@@ -1503,7 +1501,7 @@ vector<string> MothurOut::splitWhiteSpace(string& rest, char buffer[], int size)
         for (int i = 0; i < size; i++) {
             if (!isspace(buffer[i]))  { rest += buffer[i];  }
             else {
-                pieces.push_back(rest);  rest = "";
+                if (rest != "") { pieces.push_back(rest);  rest = ""; }
                 while (i < size) {  //gobble white space
                     if (isspace(buffer[i])) { i++; }
                     else { rest = buffer[i];  break; } //cout << "next piece buffer = " << nextPiece << endl;
@@ -1527,7 +1525,7 @@ vector<string> MothurOut::splitWhiteSpace(string input){
         for (int i = 0; i < input.length(); i++) {
             if (!isspace(input[i]))  { rest += input[i];  }
             else {
-                pieces.push_back(rest);  rest = "";
+                if (rest != "") { pieces.push_back(rest);  rest = ""; }
                 while (i < input.length()) {  //gobble white space
                     if (isspace(input[i])) { i++; }
                     else { rest = input[i];  break; } //cout << "next piece buffer = " << nextPiece << endl;
@@ -1544,10 +1542,49 @@ vector<string> MothurOut::splitWhiteSpace(string input){
                exit(1);
        }
 }
+/***********************************************************************/
+vector<string> MothurOut::splitWhiteSpaceWithQuotes(string input){
+       try {
+        vector<string> pieces;
+        string rest = "";
+        
+        int pos = input.find('\'');
+        int pos2 = input.find('\"');
+        
+        if ((pos == string::npos) && (pos2 == string::npos)) { return splitWhiteSpace(input); } //no quotes to worry about
+        else {
+            for (int i = 0; i < input.length(); i++) {
+                if ((input[i] == '\'') || (input[i] == '\"') || (rest == "\'") || (rest == "\"")) { //grab everything til end or next ' or "
+                    rest += input[i];
+                    for (int j = i+1; j < input.length(); j++) {
+                        if ((input[j] == '\'') || (input[j] == '\"')) {  //then quit
+                            rest += input[j];
+                            i = j+1;
+                            j+=input.length();
+                        }else { rest += input[j]; }
+                    }
+                }else if (!isspace(input[i]))  { rest += input[i];  }
+                else {
+                    if (rest != "") { pieces.push_back(rest);  rest = ""; }
+                    while (i < input.length()) {  //gobble white space
+                        if (isspace(input[i])) { i++; }
+                        else { rest = input[i];  break; } //cout << "next piece buffer = " << nextPiece << endl;
+                    } 
+                }
+            }
+            
+            if (rest != "") { pieces.push_back(rest); }
+        }
+        return pieces;
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "splitWhiteSpace");
+               exit(1);
+       }
+}
 //**********************************************************************************************************************
 int MothurOut::readTax(string namefile, map<string, string>& taxMap) {
        try {
-               
         //open input file
                ifstream in;
                openInputFile(namefile, in);
@@ -1569,15 +1606,53 @@ int MothurOut::readTax(string namefile, map<string, string>& taxMap) {
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
                     //are there confidence scores, if so remove them
                     if (secondCol.find_first_of('(') != -1) {  removeConfidences(secondCol);   }
-                    taxMap[firstCol] = secondCol;
-                    if (debug) {  mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n");  }
+                    map<string, string>::iterator itTax = taxMap.find(firstCol);
+                    
+                    if(itTax == taxMap.end()) {
+                        bool ignore = false;
+                        if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
+                        }
+                        if (!ignore) { taxMap[firstCol] = secondCol; }
+                        if (debug) {  mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n");  }
+                    }else {
+                        mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true;
+                    }
                     pairDone = false; 
                 }
             }
                }
                in.close();
+        
+        if (rest != "") {
+            vector<string> pieces = splitWhiteSpace(rest);
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    checkName(firstCol);
+                    //are there confidence scores, if so remove them
+                    if (secondCol.find_first_of('(') != -1) {  removeConfidences(secondCol);   }
+                    map<string, string>::iterator itTax = taxMap.find(firstCol);
+                    
+                    if(itTax == taxMap.end()) {
+                        bool ignore = false;
+                        if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
+                        }
+                        if (!ignore) { taxMap[firstCol] = secondCol; }
+                        if (debug) {  mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n");  }
+                    }else {
+                        mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true;
+                    }
+
+                    pairDone = false; 
+                }
+            } 
+        }
                
                return taxMap.size();
 
@@ -1590,7 +1665,6 @@ int MothurOut::readTax(string namefile, map<string, string>& taxMap) {
 /**********************************************************************************************************************/
 int MothurOut::readNames(string namefile, map<string, string>& nameMap, bool redund) { 
        try {
-               
                //open input file
                ifstream in;
                openInputFile(namefile, in);
@@ -1612,6 +1686,9 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, bool red
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    
                     //parse names into vector
                     vector<string> theseNames;
                     splitAtComma(secondCol, theseNames);
@@ -1621,6 +1698,26 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, bool red
             }
                }
                in.close();
+        
+        if (rest != "") {
+            vector<string> pieces = splitWhiteSpace(rest);
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    
+                    //parse names into vector
+                    vector<string> theseNames;
+                    splitAtComma(secondCol, theseNames);
+                    for (int i = 0; i < theseNames.size(); i++) {   nameMap[theseNames[i]] = firstCol;  }
+                    pairDone = false; 
+                }
+            }  
+        }
                
                return nameMap.size();
                
@@ -1633,7 +1730,6 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, bool red
 /**********************************************************************************************************************/
 int MothurOut::readNames(string namefile, map<string, string>& nameMap, int flip) { 
        try {
-               
                //open input file
                ifstream in;
                openInputFile(namefile, in);
@@ -1655,12 +1751,30 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, int flip
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     nameMap[secondCol] = firstCol;
                     pairDone = false; 
                 }
             }
                }
                in.close();
+        
+        if (rest != "") {
+            vector<string> pieces = splitWhiteSpace(rest);
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    nameMap[secondCol] = firstCol;
+                    pairDone = false; 
+                }
+            } 
+        }
                
                return nameMap.size();
                
@@ -1673,7 +1787,7 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, int flip
 /**********************************************************************************************************************/
 int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<string, int>& nameCount) { 
        try {
-               nameMap.clear(); nameCount.clear();
+               nameMap.clear(); nameCount.clear();
                //open input file
                ifstream in;
                openInputFile(namefile, in);
@@ -1695,6 +1809,8 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<stri
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     //parse names into vector
                     vector<string> theseNames;
                     splitAtComma(secondCol, theseNames);
@@ -1706,6 +1822,26 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<stri
                }
                in.close();
                
+        if (rest != "") {
+            vector<string> pieces = splitWhiteSpace(rest);
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    //parse names into vector
+                    vector<string> theseNames;
+                    splitAtComma(secondCol, theseNames);
+                    for (int i = 0; i < theseNames.size(); i++) {  nameMap[theseNames[i]] = firstCol;  }
+                    nameCount[firstCol] = theseNames.size();
+                    pairDone = false; 
+                }
+            }
+
+        }
                return nameMap.size();
                
        }
@@ -1717,7 +1853,6 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<stri
 /**********************************************************************************************************************/
 int MothurOut::readNames(string namefile, map<string, string>& nameMap) { 
        try {
-               
                //open input file
                ifstream in;
                openInputFile(namefile, in);
@@ -1738,10 +1873,27 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
                 if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
-                if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; }
+                if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    nameMap[firstCol] = secondCol; pairDone = false; }
             }
                }
                in.close();
+        
+        if (rest != "") {
+            vector<string> pieces = splitWhiteSpace(rest);
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    nameMap[firstCol] = secondCol; pairDone = false; }
+            }
+        }
                
                return nameMap.size();
                
@@ -1753,8 +1905,7 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
 }
 /**********************************************************************************************************************/
 int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap) { 
-       try {
-               
+       try {        
                //open input file
                ifstream in;
                openInputFile(namefile, in);
@@ -1776,6 +1927,8 @@ int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap)
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     vector<string> temp;
                     splitAtComma(secondCol, temp);
                     nameMap[firstCol] = temp;
@@ -1785,6 +1938,24 @@ int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap)
                }
                in.close();
         
+        if (rest != "") {
+            vector<string> pieces = splitWhiteSpace(rest);
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    vector<string> temp;
+                    splitAtComma(secondCol, temp);
+                    nameMap[firstCol] = temp;
+                    pairDone = false;  
+                } 
+            }
+        }
+        
                return nameMap.size();
        }
        catch(exception& e) {
@@ -1795,8 +1966,68 @@ int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap)
 /**********************************************************************************************************************/
 map<string, int> MothurOut::readNames(string namefile) { 
        try {
+               map<string, int> nameMap;
+               
+               //open input file
+               ifstream in;
+               openInputFile(namefile, in);
+               
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
+        string firstCol, secondCol;
+        
+               while (!in.eof()) {
+                       if (control_pressed) { break; }
+                       
+            in.read(buffer, 4096);
+            vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    int num = getNumNames(secondCol);
+                    nameMap[firstCol] = num;
+                    pairDone = false;  
+                } 
+            }
+               }
+        in.close();
+        
+        if (rest != "") {
+            vector<string> pieces = splitWhiteSpace(rest);
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    int num = getNumNames(secondCol);
+                    nameMap[firstCol] = num;
+                    pairDone = false;  
+                } 
+            }
+        }
                
+               return nameMap;
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "readNames");
+               exit(1);
+       }
+}
+/**********************************************************************************************************************/
+map<string, int> MothurOut::readNames(string namefile, unsigned long int& numSeqs) { 
+       try {
                map<string, int> nameMap;
+        numSeqs = 0;
                
                //open input file
                ifstream in;
@@ -1819,13 +2050,33 @@ map<string, int> MothurOut::readNames(string namefile) {
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     int num = getNumNames(secondCol);
                     nameMap[firstCol] = num;
                     pairDone = false;  
+                    numSeqs += num;
                 } 
             }
                }
         in.close();
+        
+        if (rest != "") {
+            vector<string> pieces = splitWhiteSpace(rest);
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    int num = getNumNames(secondCol);
+                    nameMap[firstCol] = num;
+                    pairDone = false;  
+                    numSeqs += num;
+                } 
+            }
+        }
                
                return nameMap;
                
@@ -1835,6 +2086,19 @@ map<string, int> MothurOut::readNames(string namefile) {
                exit(1);
        }
 }
+/************************************************************/
+int MothurOut::checkName(string& name) {
+    try {
+        for (int i = 0; i < name.length(); i++) {
+            if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; }
+        }        
+        return 0;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "checkName");
+               exit(1);
+       }
+}
 /**********************************************************************************************************************/
 int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, map<string, string>& fastamap) { 
        try {
@@ -1861,6 +2125,8 @@ int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, m
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     int num = getNumNames(secondCol);
                     
                     map<string, string>::iterator it = fastamap.find(firstCol);
@@ -1878,6 +2144,31 @@ int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, m
                }
         in.close();
         
+        if (rest != "") {
+            vector<string> pieces = splitWhiteSpace(rest);
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    int num = getNumNames(secondCol);
+                    
+                    map<string, string>::iterator it = fastamap.find(firstCol);
+                    if (it == fastamap.end()) {
+                        error = 1;
+                        mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
+                    }else {
+                        seqPriorityNode temp(num, it->second, firstCol);
+                        nameVector.push_back(temp);
+                    }
+                    
+                    pairDone = false;  
+                } 
+            }
+        }
                return error;
        }
        catch(exception& e) {
@@ -1888,7 +2179,7 @@ int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, m
 //**********************************************************************************************************************
 set<string> MothurOut::readAccnos(string accnosfile){
        try {
-               set<string> names;
+               set<string> names;
                ifstream in;
                openInputFile(accnosfile, in);
                string name;
@@ -1902,10 +2193,14 @@ set<string> MothurOut::readAccnos(string accnosfile){
             in.read(buffer, 4096);
             vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
             
-            for (int i = 0; i < pieces.size(); i++) {  names.insert(pieces[i]);  }
+            for (int i = 0; i < pieces.size(); i++) {  checkName(pieces[i]); names.insert(pieces[i]);  }
         }
                in.close();     
                
+        if (rest != "") {
+            vector<string> pieces = splitWhiteSpace(rest);
+            for (int i = 0; i < pieces.size(); i++) {  checkName(pieces[i]); names.insert(pieces[i]);  } 
+        }
                return names;
        }
        catch(exception& e) {
@@ -1930,9 +2225,14 @@ int MothurOut::readAccnos(string accnosfile, vector<string>& names){
             in.read(buffer, 4096);
             vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
             
-            for (int i = 0; i < pieces.size(); i++) {  names.push_back(pieces[i]);  }
+            for (int i = 0; i < pieces.size(); i++) {  checkName(pieces[i]); names.push_back(pieces[i]);  }
         }
                in.close();     
+        
+        if (rest != "") {
+            vector<string> pieces = splitWhiteSpace(rest);
+            for (int i = 0; i < pieces.size(); i++) {  checkName(pieces[i]); names.push_back(pieces[i]);  }
+        }
                
                return 0;
        }
@@ -1984,6 +2284,32 @@ int MothurOut::getNumChar(string line, char c){
                exit(1);
        }
 }
+//**********************************************************************************************************************
+bool MothurOut::isSubset(vector<string> bigset, vector<string> subset) {
+       try {
+               
+        
+               if (subset.size() > bigset.size()) { return false;  }
+               
+               //check if each guy in suset is also in bigset
+               for (int i = 0; i < subset.size(); i++) {
+                       bool match = false;
+                       for (int j = 0; j < bigset.size(); j++) {
+                               if (subset[i] == bigset[j]) { match = true; break; }
+                       }
+                       
+                       //you have a guy in subset that had no match in bigset
+                       if (match == false) { return false; }
+               }
+               
+               return true;
+        
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "isSubset");
+               exit(1);
+       }
+}
 /***********************************************************************/
 int MothurOut::mothurRemove(string filename){
        try {
@@ -2023,6 +2349,28 @@ bool MothurOut::mothurConvert(string item, int& num){
                exit(1);
        }
 }
+/***********************************************************************/
+bool MothurOut::mothurConvert(string item, intDist& num){
+       try {
+               bool error = false;
+               
+               if (isNumeric1(item)) {
+                       convert(item, num);
+               }else {
+                       num = 0;
+                       error = true;
+                       mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine();
+                       commandInputsConvertError = true;
+               }
+               
+               return error;
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "mothurConvert");
+               exit(1);
+       }
+}
+
 /***********************************************************************/
 bool MothurOut::isNumeric1(string stringToCheck){
        try {
@@ -2252,6 +2600,9 @@ void MothurOut::getNumSeqs(ifstream& file, int& numSeqs){
 //This function parses the estimator options and puts them in a vector
 void MothurOut::splitAtChar(string& estim, vector<string>& container, char symbol) {
        try {
+        
+        if (symbol == '-') { splitAtDash(estim, container); return; }
+        
                string individual = "";
                int estimLength = estim.size();
                for(int i=0;i<estimLength;i++){
@@ -2279,30 +2630,29 @@ void MothurOut::splitAtDash(string& estim, vector<string>& container) {
        try {
                string individual = "";
                int estimLength = estim.size();
+               bool prevEscape = false;
                for(int i=0;i<estimLength;i++){
-                       if(estim[i] == '-'){
-                               container.push_back(individual);
-                               individual = "";                                
+                       if(prevEscape){
+                               individual += estim[i];
+                               prevEscape = false;
                        }
                        else{
-                               individual += estim[i];
+                               if(estim[i] == '\\'){
+                                       prevEscape = true;
+                               }
+                               else if(estim[i] == '-'){
+                                       container.push_back(individual);
+                                       individual = "";
+                                       prevEscape = false;                             
+                               }
+                               else{
+                                       individual += estim[i];
+                                       prevEscape = false;
+                               }
                        }
                }
                container.push_back(individual);
-
-       
-       /*      string individual;
-               
-               while (estim.find_first_of('-') != -1) {
-                       individual = estim.substr(0,estim.find_first_of('-'));
-                       if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
-                               estim = estim.substr(estim.find_first_of('-')+1, estim.length());
-                               container.push_back(individual);
-                       }
-               }
-               //get last one
-               container.push_back(estim); */
-       }
+       }
        catch(exception& e) {
                errorOut(e, "MothurOut", "splitAtDash");
                exit(1);
@@ -2315,29 +2665,29 @@ void MothurOut::splitAtDash(string& estim, set<string>& container) {
        try {
                string individual = "";
                int estimLength = estim.size();
+               bool prevEscape = false;
                for(int i=0;i<estimLength;i++){
-                       if(estim[i] == '-'){
-                               container.insert(individual);
-                               individual = "";                                
+                       if(prevEscape){
+                               individual += estim[i];
+                               prevEscape = false;
                        }
                        else{
-                               individual += estim[i];
+                               if(estim[i] == '\\'){
+                                       prevEscape = true;
+                               }
+                               else if(estim[i] == '-'){
+                                       container.insert(individual);
+                                       individual = "";
+                                       prevEscape = false;                             
+                               }
+                               else{
+                                       individual += estim[i];
+                                       prevEscape = false;
+                               }
                        }
                }
                container.insert(individual);
-
-       //      string individual;
-               
-       //      while (estim.find_first_of('-') != -1) {
-       //              individual = estim.substr(0,estim.find_first_of('-'));
-       //              if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
-       //                      estim = estim.substr(estim.find_first_of('-')+1, estim.length());
-       //                      container.insert(individual);
-       //              }
-       //      }
-               //get last one
-       //      container.insert(estim);
-       
+        
        }
        catch(exception& e) {
                errorOut(e, "MothurOut", "splitAtDash");
@@ -2348,19 +2698,32 @@ void MothurOut::splitAtDash(string& estim, set<string>& container) {
 //This function parses the line options and puts them in a set
 void MothurOut::splitAtDash(string& estim, set<int>& container) {
        try {
-               string individual;
+               string individual = "";
                int lineNum;
-               
-               while (estim.find_first_of('-') != -1) {
-                       individual = estim.substr(0,estim.find_first_of('-'));
-                       if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
-                               estim = estim.substr(estim.find_first_of('-')+1, estim.length());
-                               convert(individual, lineNum); //convert the string to int
-                               container.insert(lineNum);
+               int estimLength = estim.size();
+               bool prevEscape = false;
+               for(int i=0;i<estimLength;i++){
+                       if(prevEscape){
+                               individual += estim[i];
+                               prevEscape = false;
+                       }
+                       else{
+                               if(estim[i] == '\\'){
+                                       prevEscape = true;
+                               }
+                               else if(estim[i] == '-'){
+                                       convert(individual, lineNum); //convert the string to int
+                                       container.insert(lineNum);
+                                       individual = "";
+                                       prevEscape = false;                             
+                               }
+                               else{
+                                       individual += estim[i];
+                                       prevEscape = false;
+                               }
                        }
                }
-               //get last one
-               convert(estim, lineNum); //convert the string to int
+               convert(individual, lineNum); //convert the string to int
                container.insert(lineNum);
        }
        catch(exception& e) {
@@ -2498,6 +2861,35 @@ bool MothurOut::inUsersGroups(string groupname, vector<string> Groups) {
                exit(1);
        }       
 }
+/**************************************************************************************************/
+
+bool MothurOut::inUsersGroups(vector<int> set, vector< vector<int> > sets) {
+       try {
+               for (int i = 0; i < sets.size(); i++) {
+                       if (set == sets[i]) { return true; }
+               }
+               return false;
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "inUsersGroups");
+               exit(1);
+       }       
+}
+/**************************************************************************************************/
+
+bool MothurOut::inUsersGroups(int groupname, vector<int> Groups) {
+       try {
+               for (int i = 0; i < Groups.size(); i++) {
+                       if (groupname == Groups[i]) { return true; }
+               }
+               return false;
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "inUsersGroups");
+               exit(1);
+       }       
+}
+
 /**************************************************************************************************/
 //returns true if any of the strings in first vector are in second vector
 bool MothurOut::inUsersGroups(vector<string> groupnames, vector<string> Groups) {
@@ -2642,6 +3034,253 @@ bool MothurOut::checkReleaseVersion(ifstream& file, string version) {
                exit(1);
        }
 }
+/**************************************************************************************************/
+vector<double> MothurOut::getAverages(vector< vector<double> >& dists) {
+       try{
+        vector<double> averages; //averages.resize(numComp, 0.0);
+        for (int i = 0; i < dists[0].size(); i++) { averages.push_back(0.0); }
+      
+        for (int thisIter = 0; thisIter < dists.size(); thisIter++) {
+            for (int i = 0; i < dists[thisIter].size(); i++) {  
+                averages[i] += dists[thisIter][i];
+            }
+        }
+        
+        //finds average.
+        for (int i = 0; i < averages.size(); i++) {  averages[i] /= (double) dists.size(); }
+        
+        return averages;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists) {
+       try{
+        
+        vector<double> averages = getAverages(dists);
+        
+        //find standard deviation
+        vector<double> stdDev; //stdDev.resize(numComp, 0.0);
+        for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
+        
+        for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+            for (int j = 0; j < dists[thisIter].size(); j++) {
+                stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
+            }
+        }
+        for (int i = 0; i < stdDev.size(); i++) {  
+            stdDev[i] /= (double) dists.size(); 
+            stdDev[i] = sqrt(stdDev[i]);
+        }
+        
+        return stdDev;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists, vector<double>& averages) {
+       try{
+        //find standard deviation
+        vector<double> stdDev; //stdDev.resize(numComp, 0.0);
+        for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
+        
+        for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+            for (int j = 0; j < dists[thisIter].size(); j++) {
+                stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
+            }
+        }
+        for (int i = 0; i < stdDev.size(); i++) {  
+            stdDev[i] /= (double) dists.size(); 
+            stdDev[i] = sqrt(stdDev[i]);
+        }
+        
+        return stdDev;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals, string mode) {
+       try{
+        
+        vector< vector<seqDist>  > calcAverages; //calcAverages.resize(calcDistsTotals[0].size()); 
+        for (int i = 0; i < calcDistsTotals[0].size(); i++) {  //initialize sums to zero.
+            //calcAverages[i].resize(calcDistsTotals[0][i].size());
+            vector<seqDist> temp;
+            for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
+                seqDist tempDist;
+                tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
+                tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
+                tempDist.dist = 0.0;
+                temp.push_back(tempDist);
+            }
+            calcAverages.push_back(temp);
+        }
+        
+        if (mode == "average") {
+            for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
+                for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
+                    for (int j = 0; j < calcAverages[i].size(); j++) {
+                        calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
+                    }
+                }
+            }
+            
+            for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
+                for (int j = 0; j < calcAverages[i].size(); j++) {
+                    calcAverages[i][j].dist /= (float) calcDistsTotals.size();
+                }
+            }
+        }else { //find median
+            for (int i = 0; i < calcAverages.size(); i++) { //for each calc
+                for (int j = 0; j < calcAverages[i].size(); j++) {  //for each comparison
+                    vector<double> dists;
+                    for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //for each subsample
+                        dists.push_back(calcDistsTotals[thisIter][i][j].dist);
+                    }
+                    sort(dists.begin(), dists.end());
+                    calcAverages[i][j].dist = dists[(calcDistsTotals.size()/2)];
+                }
+            }
+        }
+
+        return calcAverages;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals) {
+       try{
+        
+        vector< vector<seqDist>  > calcAverages; //calcAverages.resize(calcDistsTotals[0].size()); 
+        for (int i = 0; i < calcDistsTotals[0].size(); i++) {  //initialize sums to zero.
+            //calcAverages[i].resize(calcDistsTotals[0][i].size());
+            vector<seqDist> temp;
+            for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
+                seqDist tempDist;
+                tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
+                tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
+                tempDist.dist = 0.0;
+                temp.push_back(tempDist);
+            }
+            calcAverages.push_back(temp);
+        }
+        
+        
+        for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
+                for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
+                    for (int j = 0; j < calcAverages[i].size(); j++) {
+                        calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
+                    }
+                }
+        }
+            
+        for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
+                for (int j = 0; j < calcAverages[i].size(); j++) {
+                    calcAverages[i][j].dist /= (float) calcDistsTotals.size();
+                }
+        }
+        
+        return calcAverages;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals) {
+       try{
+        
+        vector< vector<seqDist> > calcAverages = getAverages(calcDistsTotals);
+        
+        //find standard deviation
+        vector< vector<seqDist>  > stdDev;  
+        for (int i = 0; i < calcDistsTotals[0].size(); i++) {  //initialize sums to zero.
+            vector<seqDist> temp;
+            for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
+                seqDist tempDist;
+                tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
+                tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
+                tempDist.dist = 0.0;
+                temp.push_back(tempDist);
+            }
+            stdDev.push_back(temp);
+        }
+        
+        for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+            for (int i = 0; i < stdDev.size(); i++) {  
+                for (int j = 0; j < stdDev[i].size(); j++) {
+                    stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
+                }
+            }
+        }
+        
+        for (int i = 0; i < stdDev.size(); i++) {  //finds average.
+            for (int j = 0; j < stdDev[i].size(); j++) {
+                stdDev[i][j].dist /= (float) calcDistsTotals.size();
+                stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
+            }
+        }
+
+        return stdDev;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals, vector< vector<seqDist> >& calcAverages) {
+       try{
+        //find standard deviation
+        vector< vector<seqDist>  > stdDev;  
+        for (int i = 0; i < calcDistsTotals[0].size(); i++) {  //initialize sums to zero.
+            vector<seqDist> temp;
+            for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
+                seqDist tempDist;
+                tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
+                tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
+                tempDist.dist = 0.0;
+                temp.push_back(tempDist);
+            }
+            stdDev.push_back(temp);
+        }
+        
+        for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+            for (int i = 0; i < stdDev.size(); i++) {  
+                for (int j = 0; j < stdDev[i].size(); j++) {
+                    stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
+                }
+            }
+        }
+        
+        for (int i = 0; i < stdDev.size(); i++) {  //finds average.
+            for (int j = 0; j < stdDev[i].size(); j++) {
+                stdDev[i][j].dist /= (float) calcDistsTotals.size();
+                stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
+            }
+        }
+        
+        return stdDev;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+
 /**************************************************************************************************/
 bool MothurOut::isContainingOnlyDigits(string input) {
        try{
@@ -2700,8 +3339,53 @@ int MothurOut::removeConfidences(string& tax) {
        }
 }
 /**************************************************************************************************/
-
-
+string MothurOut::removeQuotes(string tax) {
+       try {
+               
+               string taxon;
+               string newTax = "";
+               
+               for (int i = 0; i < tax.length(); i++) {
+                       
+                       if (control_pressed) { return newTax; }
+            
+            if ((tax[i] != '\'') && (tax[i] != '\"')) { newTax += tax[i]; }
+                       
+        }
+               
+               return newTax;
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "removeQuotes");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+// function for calculating standard deviation
+double MothurOut::getStandardDeviation(vector<int>& featureVector){
+    try {
+        //finds sum
+        double average = 0; 
+        for (int i = 0; i < featureVector.size(); i++) { average += featureVector[i]; }
+        average /= (double) featureVector.size();
+        
+        //find standard deviation
+        double stdDev = 0;
+        for (int i = 0; i < featureVector.size(); i++) { //compute the difference of each dist from the mean, and square the result of each
+            stdDev += ((featureVector[i] - average) * (featureVector[i] - average));
+        }
+          
+        stdDev /= (double) featureVector.size(); 
+        stdDev = sqrt(stdDev);
+        
+        return stdDev;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getStandardDeviation");
+               exit(1);
+       }
+}
+/**************************************************************************************************/