]> git.donarmstrong.com Git - mothur.git/blobdiff - mothurout.cpp
added kruskal.wallis command. added worked on make.lefse. working of lefse command...
[mothur.git] / mothurout.cpp
index 124fbb876b38def3d61f1bf425ef15f36237fa5f..1f1c96b6f203e4f1fa8b1dca2e018c735caa6b9f 100644 (file)
@@ -23,6 +23,7 @@ set<string> MothurOut::getCurrentTypes()  {
         
         set<string> types;
         types.insert("fasta");
+        types.insert("summary");
         types.insert("accnos");
         types.insert("column");
         types.insert("design");
@@ -81,6 +82,7 @@ void MothurOut::printCurrentFiles()  {
         if (biomfile != "")                    {  mothurOut("biom=" + biomfile); mothurOutEndLine();                           }
         if (counttablefile != "")      {  mothurOut("count=" + counttablefile); mothurOutEndLine();    }
                if (processors != "1")          {  mothurOut("processors=" + processors); mothurOutEndLine();           }
+        if (summaryfile != "")         {  mothurOut("summary=" + summaryfile); mothurOutEndLine();             }
                
        }
        catch(exception& e) {
@@ -115,6 +117,7 @@ bool MothurOut::hasCurrentFiles()  {
                if (flowfile != "")                     {  return true;                 }
         if (biomfile != "")                    {  return true;                 }
         if (counttablefile != "")      {  return true;                 }
+        if (summaryfile != "") {  return true;                 }
                if (processors != "1")          {  return true;                 }
                
                return hasCurrent;
@@ -151,6 +154,7 @@ void MothurOut::clearCurrentFiles()  {
                flowfile = "";
         biomfile = "";
         counttablefile = "";
+        summaryfile = "";
                processors = "1";
        }
        catch(exception& e) {
@@ -344,6 +348,27 @@ void MothurOut::mothurOut(string output) {
        }
 }
 /*********************************************************************************************/
+void MothurOut::mothurOutJustToScreen(string output) {
+       try {
+               
+#ifdef USE_MPI
+        int pid;
+        MPI_Comm_rank(MPI_COMM_WORLD, &pid);
+        
+        if (pid == 0) { //only one process should output to screen
+#endif
+            logger() << output;
+            
+#ifdef USE_MPI
+        }
+#endif
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "MothurOut");
+               exit(1);
+       }
+}
+/*********************************************************************************************/
 void MothurOut::mothurOutEndLine() {
        try {
                #ifdef USE_MPI
@@ -440,10 +465,24 @@ void MothurOut::errorOut(exception& e, string object, string function) {
        //double vm, rss;
        //mem_usage(vm, rss);
        
-       mothurOut("[ERROR]: ");
-       mothurOut(toString(e.what()));
-       mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
-       mothurOutEndLine();
+    string errorType = toString(e.what());
+    
+    int pos = errorType.find("bad_alloc");
+    mothurOut("[ERROR]: ");
+    mothurOut(errorType);
+    
+    if (pos == string::npos) { //not bad_alloc
+        mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
+        mothurOutEndLine();
+    }else { //bad alloc
+        if (object == "cluster"){
+            mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory.  There are two common causes for this, file size and format.\n\nFile Size:\nThe cluster command loads your distance matrix into RAM, and your distance file is most likely too large to fit in RAM. There are two options to help with this. The first is to use a cutoff. By using a cutoff mothur will only load distances that are below the cutoff. If that is still not enough, there is a command called cluster.split, http://www.mothur.org/wiki/cluster.split which divides the distance matrix, and clusters the smaller pieces separately. You may also be able to reduce the size of the original distance matrix by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. \n\nWrong Format:\nThis error can be caused by trying to read a column formatted distance matrix using the phylip parameter. By default, the dist.seqs command generates a column formatted distance matrix. To make a phylip formatted matrix set the dist.seqs command parameter output to lt.  \n\nIf you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
+        }else if (object == "shhh.flows"){
+                mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. The shhh.flows command is very memory intensive. This error is most commonly caused by trying to process a dataset too large, using multiple processors, or failing to run trim.flows before shhh.flows. If you are running our 32bit version, your memory usage is limited to 4G.  If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue.  If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Running trim.flows with an oligos file, and then shhh.flows with the file option may also resolve the issue. If for some reason you are unable to run shhh.flows with your data, a good alternative is to use the trim.seqs command using a 50-bp sliding window and to trim the sequence when the average quality score over that window drops below 35. Our results suggest that the sequencing error rates by this method are very good, but not quite as good as by shhh.flows and that the resulting sequences tend to be a bit shorter. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry. ");
+        }else {
+            mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory.  This is most commonly caused by trying to process a dataset too large, using multiple processors, or a file format issue. If you are running our 32bit version, your memory usage is limited to 4G.  If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue.  If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Also, you may be able to reduce the size of your dataset by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
+        }
+    }
 }
 /*********************************************************************************************/
 //The following was originally from http://stackoverflow.com/questions/669438/how-to-get-memory-usage-at-run-time-in-c 
@@ -757,6 +796,39 @@ bool MothurOut::dirCheck(string& dirName){
        }       
     
 }
+//**********************************************************************************************************************
+
+map<string, vector<string> > MothurOut::parseClasses(string classes){
+       try {
+        map<string, vector<string> > parts;
+        
+        //treatment<Early|Late>-age<young|old>
+        vector<string> pieces; splitAtDash(classes, pieces); // -> treatment<Early|Late>, age<young|old>
+        
+        for (int i = 0; i < pieces.size(); i++) {
+            string category = ""; string value = "";
+            bool foundOpen = false;
+            for (int j = 0; j < pieces[i].length(); j++) {
+                if (control_pressed) { return parts; }
+                
+                if (pieces[i][j] == '<')        { foundOpen = true;         }
+                else if (pieces[i][j] == '>')   { j += pieces[i].length();  }
+                else {
+                    if (!foundOpen) { category += pieces[i][j]; }
+                    else { value += pieces[i][j]; }
+                }
+            }
+            vector<string> values; splitAtChar(value, values, '|');
+            parts[category] = values;
+        }
+        
+        return parts;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "parseClasses");
+               exit(1);
+       }
+}
 /***********************************************************************/
 
 string MothurOut::hasPath(string longName){
@@ -939,7 +1011,7 @@ string MothurOut::getFullPathName(string fileName){
                                }
                        
                                for (int i = index; i >= 0; i--) {
-                                       newFileName = dirs[i] +  "\\\\" + newFileName;          
+                                       newFileName = dirs[i] +  "\\" + newFileName;            
                                }
                                
                                return newFileName;
@@ -1156,7 +1228,42 @@ int MothurOut::appendFiles(string temp, string filename) {
                exit(1);
        }       
 }
-
+/**************************************************************************************************/
+int MothurOut::appendFilesWithoutHeaders(string temp, string filename) {
+       try{
+               ofstream output;
+               ifstream input;
+        
+               //open output file in append mode
+               openOutputFileAppend(filename, output);
+               int ableToOpen = openInputFile(temp, input, "no error");
+               //int ableToOpen = openInputFile(temp, input);
+               
+               int numLines = 0;
+               if (ableToOpen == 0) { //you opened it
+        
+            string headers = getline(input); gobble(input);
+            if (debug) { mothurOut("[DEBUG]: skipping headers " + headers +'\n'); }
+            
+            char buffer[4096];
+            while (!input.eof()) {
+                input.read(buffer, 4096);
+                output.write(buffer, input.gcount());
+                //count number of lines
+                for (int i = 0; i < input.gcount(); i++) {  if (buffer[i] == '\n') {numLines++;} }
+            }
+                       input.close();
+               }
+               
+               output.close();
+               
+               return numLines;
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "appendFiles");
+               exit(1);
+       }       
+}
 /**************************************************************************************************/
 string MothurOut::sortFile(string distFile, string outputDir){
        try {   
@@ -1243,15 +1350,15 @@ vector<unsigned long long> MothurOut::setFilePosFasta(string filename, int& num)
                                char c = inFASTA.get(); count++;
                                if (c == '>') {
                                        positions.push_back(count-1);
-                                       //cout << count << endl;
+                                       if (debug) { mothurOut("[DEBUG]: numSeqs = " + toString(positions.size()) +  " count = " + toString(count) + ".\n"); }
                                }
                        }
                        inFASTA.close();
                
                        num = positions.size();
-               
-                       /*FILE * pFile;
-                       long size;
+            if (debug) { mothurOut("[DEBUG]: num = " + toString(num) + ".\n"); }
+                       FILE * pFile;
+                       unsigned long long size;
                
                        //get num bytes in file
                        pFile = fopen (filename.c_str(),"rb");
@@ -1260,9 +1367,9 @@ vector<unsigned long long> MothurOut::setFilePosFasta(string filename, int& num)
                                fseek (pFile, 0, SEEK_END);
                                size=ftell (pFile);
                                fclose (pFile);
-                       }*/
+                       }
                        
-                       unsigned long long size = positions[(positions.size()-1)];
+                       /*unsigned long long size = positions[(positions.size()-1)];
                        ifstream in;
                        openInputFile(filename, in);
                        
@@ -1272,8 +1379,10 @@ vector<unsigned long long> MothurOut::setFilePosFasta(string filename, int& num)
                                if(in.eof())            {       break;  }
                                else                            {       size++; }
                        }
-                       in.close();
-               
+                       in.close();*/
+        
+            if (debug) { mothurOut("[DEBUG]: size = " + toString(size) + ".\n"); }
+        
                        positions.push_back(size);
                        positions[0] = 0;
                
@@ -1284,6 +1393,67 @@ vector<unsigned long long> MothurOut::setFilePosFasta(string filename, int& num)
                exit(1);
        }
 }
+//**********************************************************************************************************************
+vector<consTax> MothurOut::readConsTax(string inputfile){
+       try {
+               
+        vector<consTax> taxes;
+        
+        ifstream in;
+        openInputFile(inputfile, in);
+        
+        //read headers
+        getline(in);
+        
+        while (!in.eof()) {
+            
+            if (control_pressed) { break; }
+            
+            string otu = ""; string tax = "unknown";
+            int size = 0;
+            
+            in >> otu >> size >> tax; gobble(in);
+            consTax temp(otu, tax, size);
+            taxes.push_back(temp);
+        }
+        in.close();
+        
+        return taxes;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "readConsTax");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int MothurOut::readConsTax(string inputfile, map<string, consTax2>& taxes){
+       try {
+        ifstream in;
+        openInputFile(inputfile, in);
+        
+        //read headers
+        getline(in);
+        
+        while (!in.eof()) {
+            
+            if (control_pressed) { break; }
+            
+            string otu = ""; string tax = "unknown";
+            int size = 0;
+            
+            in >> otu >> size >> tax; gobble(in);
+            consTax2 temp(tax, size);
+            taxes[otu] = temp;
+        }
+        in.close();
+        
+        return 0;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "readConsTax");
+               exit(1);
+       }
+}
 /**************************************************************************************************/
 vector<unsigned long long> MothurOut::setFilePosEachLine(string filename, int& num) {
        try {
@@ -1418,6 +1588,83 @@ vector<unsigned long long> MothurOut::divideFile(string filename, int& proc) {
        }
 }
 /**************************************************************************************************/
+
+vector<unsigned long long> MothurOut::divideFilePerLine(string filename, int& proc) {
+       try{
+               vector<unsigned long long> filePos;
+               filePos.push_back(0);
+               
+               FILE * pFile;
+               unsigned long long size;
+               
+               filename = getFullPathName(filename);
+        
+               //get num bytes in file
+               pFile = fopen (filename.c_str(),"rb");
+               if (pFile==NULL) perror ("Error opening file");
+               else{
+                       fseek (pFile, 0, SEEK_END);
+                       size=ftell (pFile);
+                       fclose (pFile);
+               }
+               
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+        
+               //estimate file breaks
+               unsigned long long chunkSize = 0;
+               chunkSize = size / proc;
+        
+               //file to small to divide by processors
+               if (chunkSize == 0)  {  proc = 1;       filePos.push_back(size); return filePos;        }
+        
+               //for each process seekg to closest file break and search for next '>' char. make that the filebreak
+               for (int i = 0; i < proc; i++) {
+                       unsigned long long spot = (i+1) * chunkSize;
+                       
+                       ifstream in;
+                       openInputFile(filename, in);
+                       in.seekg(spot);
+                       
+                       //look for next line break
+                       unsigned long long newSpot = spot;
+                       while (!in.eof()) {
+                char c = in.get();
+                               
+                               if ((c == '\n') || (c == '\r') || (c == '\f'))  { gobble(in); newSpot = in.tellg(); break; }
+                else if (int(c) == -1) { break; }
+            }
+            
+                       //there was not another line before the end of the file
+                       unsigned long long sanityPos = in.tellg();
+            
+                       if (sanityPos == -1) {  break;  }
+                       else {  filePos.push_back(newSpot);  }
+                       
+                       in.close();
+               }
+               
+               //save end pos
+               filePos.push_back(size);
+               
+               //sanity check filePos
+               for (int i = 0; i < (filePos.size()-1); i++) {
+                       if (filePos[(i+1)] <= filePos[i]) {  filePos.erase(filePos.begin()+(i+1)); i--; }
+               }
+        
+               proc = (filePos.size() - 1);
+#else
+               mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine();
+               proc=1;
+               filePos.push_back(size);
+#endif
+               return filePos;
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "divideFile");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
 int MothurOut::divideFile(string filename, int& proc, vector<string>& files) {
        try{
                
@@ -1606,10 +1853,20 @@ int MothurOut::readTax(string namefile, map<string, string>& taxMap) {
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
                     //are there confidence scores, if so remove them
                     if (secondCol.find_first_of('(') != -1) {  removeConfidences(secondCol);   }
-                    taxMap[firstCol] = secondCol;
-                    if (debug) {  mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n");  }
+                    map<string, string>::iterator itTax = taxMap.find(firstCol);
+                    
+                    if(itTax == taxMap.end()) {
+                        bool ignore = false;
+                        if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
+                        }
+                        if (!ignore) { taxMap[firstCol] = secondCol; }
+                        if (debug) {  mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n");  }
+                    }else {
+                        mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true;
+                    }
                     pairDone = false; 
                 }
             }
@@ -1624,10 +1881,21 @@ int MothurOut::readTax(string namefile, map<string, string>& taxMap) {
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
                     //are there confidence scores, if so remove them
                     if (secondCol.find_first_of('(') != -1) {  removeConfidences(secondCol);   }
-                    taxMap[firstCol] = secondCol;
-                    if (debug) {  mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n");  }
+                    map<string, string>::iterator itTax = taxMap.find(firstCol);
+                    
+                    if(itTax == taxMap.end()) {
+                        bool ignore = false;
+                        if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
+                        }
+                        if (!ignore) { taxMap[firstCol] = secondCol; }
+                        if (debug) {  mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n");  }
+                    }else {
+                        mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true;
+                    }
+
                     pairDone = false; 
                 }
             } 
@@ -1665,6 +1933,9 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, bool red
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    
                     //parse names into vector
                     vector<string> theseNames;
                     splitAtComma(secondCol, theseNames);
@@ -1683,10 +1954,13 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, bool red
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    
                     //parse names into vector
                     vector<string> theseNames;
                     splitAtComma(secondCol, theseNames);
-                    for (int i = 0; i < theseNames.size(); i++) {  nameMap[theseNames[i]] = firstCol;  }
+                    for (int i = 0; i < theseNames.size(); i++) {   nameMap[theseNames[i]] = firstCol;  }
                     pairDone = false; 
                 }
             }  
@@ -1724,6 +1998,8 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, int flip
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     nameMap[secondCol] = firstCol;
                     pairDone = false; 
                 }
@@ -1739,6 +2015,8 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, int flip
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     nameMap[secondCol] = firstCol;
                     pairDone = false; 
                 }
@@ -1778,6 +2056,8 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<stri
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     //parse names into vector
                     vector<string> theseNames;
                     splitAtComma(secondCol, theseNames);
@@ -1797,6 +2077,8 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<stri
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     //parse names into vector
                     vector<string> theseNames;
                     splitAtComma(secondCol, theseNames);
@@ -1838,7 +2120,10 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
                 if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
-                if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; }
+                if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    nameMap[firstCol] = secondCol; pairDone = false; }
             }
                }
                in.close();
@@ -1850,7 +2135,10 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
                 if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
-                if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; }
+                if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    nameMap[firstCol] = secondCol; pairDone = false; }
             }
         }
                
@@ -1886,6 +2174,8 @@ int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap)
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     vector<string> temp;
                     splitAtComma(secondCol, temp);
                     nameMap[firstCol] = temp;
@@ -1903,6 +2193,8 @@ int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap)
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     vector<string> temp;
                     splitAtComma(secondCol, temp);
                     nameMap[firstCol] = temp;
@@ -1944,6 +2236,8 @@ map<string, int> MothurOut::readNames(string namefile) {
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     int num = getNumNames(secondCol);
                     nameMap[firstCol] = num;
                     pairDone = false;  
@@ -1959,6 +2253,8 @@ map<string, int> MothurOut::readNames(string namefile) {
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     int num = getNumNames(secondCol);
                     nameMap[firstCol] = num;
                     pairDone = false;  
@@ -1975,6 +2271,84 @@ map<string, int> MothurOut::readNames(string namefile) {
        }
 }
 /**********************************************************************************************************************/
+map<string, int> MothurOut::readNames(string namefile, unsigned long int& numSeqs) { 
+       try {
+               map<string, int> nameMap;
+        numSeqs = 0;
+               
+               //open input file
+               ifstream in;
+               openInputFile(namefile, in);
+               
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
+        string firstCol, secondCol;
+        
+               while (!in.eof()) {
+                       if (control_pressed) { break; }
+                       
+            in.read(buffer, 4096);
+            vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    int num = getNumNames(secondCol);
+                    nameMap[firstCol] = num;
+                    pairDone = false;  
+                    numSeqs += num;
+                } 
+            }
+               }
+        in.close();
+        
+        if (rest != "") {
+            vector<string> pieces = splitWhiteSpace(rest);
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    int num = getNumNames(secondCol);
+                    nameMap[firstCol] = num;
+                    pairDone = false;  
+                    numSeqs += num;
+                } 
+            }
+        }
+               
+               return nameMap;
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "readNames");
+               exit(1);
+       }
+}
+/************************************************************/
+int MothurOut::checkName(string& name) {
+    try {
+        if (modifyNames) {
+            for (int i = 0; i < name.length(); i++) {
+                if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; }
+            }
+        }
+        return 0;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "checkName");
+               exit(1);
+       }
+}
+/**********************************************************************************************************************/
 int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, map<string, string>& fastamap) { 
        try {
                int error = 0;
@@ -2000,6 +2374,8 @@ int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, m
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     int num = getNumNames(secondCol);
                     
                     map<string, string>::iterator it = fastamap.find(firstCol);
@@ -2025,6 +2401,8 @@ int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, m
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     int num = getNumNames(secondCol);
                     
                     map<string, string>::iterator it = fastamap.find(firstCol);
@@ -2064,13 +2442,15 @@ set<string> MothurOut::readAccnos(string accnosfile){
             in.read(buffer, 4096);
             vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
             
-            for (int i = 0; i < pieces.size(); i++) {  names.insert(pieces[i]);  }
+            for (int i = 0; i < pieces.size(); i++) {  checkName(pieces[i]);
+                names.insert(pieces[i]);
+            }
         }
                in.close();     
                
         if (rest != "") {
             vector<string> pieces = splitWhiteSpace(rest);
-            for (int i = 0; i < pieces.size(); i++) {  names.insert(pieces[i]);  } 
+            for (int i = 0; i < pieces.size(); i++) {  checkName(pieces[i]); names.insert(pieces[i]);  } 
         }
                return names;
        }
@@ -2096,13 +2476,13 @@ int MothurOut::readAccnos(string accnosfile, vector<string>& names){
             in.read(buffer, 4096);
             vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
             
-            for (int i = 0; i < pieces.size(); i++) {  names.push_back(pieces[i]);  }
+            for (int i = 0; i < pieces.size(); i++) {  checkName(pieces[i]); names.push_back(pieces[i]);  }
         }
                in.close();     
         
         if (rest != "") {
             vector<string> pieces = splitWhiteSpace(rest);
-            for (int i = 0; i < pieces.size(); i++) {  names.push_back(pieces[i]);  }
+            for (int i = 0; i < pieces.size(); i++) {  checkName(pieces[i]); names.push_back(pieces[i]);  }
         }
                
                return 0;
@@ -2502,7 +2882,7 @@ void MothurOut::splitAtDash(string& estim, vector<string>& container) {
                string individual = "";
                int estimLength = estim.size();
                bool prevEscape = false;
-               for(int i=0;i<estimLength;i++){
+               /*for(int i=0;i<estimLength;i++){
                        if(prevEscape){
                                individual += estim[i];
                                prevEscape = false;
@@ -2521,7 +2901,28 @@ void MothurOut::splitAtDash(string& estim, vector<string>& container) {
                                        prevEscape = false;
                                }
                        }
-               }
+               }*/
+        
+        
+        for(int i=0;i<estimLength;i++){
+            if(estim[i] == '-'){
+                if (prevEscape) {  individual += estim[i]; prevEscape = false;  } //add in dash because it was escaped.
+                else {
+                    container.push_back(individual);
+                    individual = "";
+                }
+            }else if(estim[i] == '\\'){
+                if (i < estimLength-1) { 
+                    if (estim[i+1] == '-') { prevEscape=true; }  //are you a backslash before a dash, if yes ignore
+                    else { individual += estim[i]; prevEscape = false;  } //if no, add in
+                }else { individual += estim[i]; }
+            }else {
+                individual += estim[i];
+            }
+        }
+        
+
+        
                container.push_back(individual);
        }
        catch(exception& e) {
@@ -2537,6 +2938,7 @@ void MothurOut::splitAtDash(string& estim, set<string>& container) {
                string individual = "";
                int estimLength = estim.size();
                bool prevEscape = false;
+        /*
                for(int i=0;i<estimLength;i++){
                        if(prevEscape){
                                individual += estim[i];
@@ -2557,7 +2959,25 @@ void MothurOut::splitAtDash(string& estim, set<string>& container) {
                                }
                        }
                }
-               container.insert(individual);
+               */
+        
+        for(int i=0;i<estimLength;i++){
+            if(estim[i] == '-'){
+                if (prevEscape) {  individual += estim[i]; prevEscape = false;  } //add in dash because it was escaped.
+                else {
+                    container.insert(individual);
+                    individual = "";
+                }
+            }else if(estim[i] == '\\'){
+                if (i < estimLength-1) { 
+                    if (estim[i+1] == '-') { prevEscape=true; }  //are you a backslash before a dash, if yes ignore
+                    else { individual += estim[i]; prevEscape = false;  } //if no, add in
+                }else { individual += estim[i]; }
+            }else {
+                individual += estim[i];
+            }
+        }
+        container.insert(individual);
         
        }
        catch(exception& e) {
@@ -2573,6 +2993,7 @@ void MothurOut::splitAtDash(string& estim, set<int>& container) {
                int lineNum;
                int estimLength = estim.size();
                bool prevEscape = false;
+        /*
                for(int i=0;i<estimLength;i++){
                        if(prevEscape){
                                individual += estim[i];
@@ -2593,7 +3014,26 @@ void MothurOut::splitAtDash(string& estim, set<int>& container) {
                                        prevEscape = false;
                                }
                        }
-               }
+               }*/
+        
+        for(int i=0;i<estimLength;i++){
+            if(estim[i] == '-'){
+                if (prevEscape) {  individual += estim[i]; prevEscape = false;  } //add in dash because it was escaped.
+                else {
+                    convert(individual, lineNum); //convert the string to int
+                    container.insert(lineNum);
+                    individual = "";
+                }
+            }else if(estim[i] == '\\'){
+                if (i < estimLength-1) { 
+                    if (estim[i+1] == '-') { prevEscape=true; }  //are you a backslash before a dash, if yes ignore
+                    else { individual += estim[i]; prevEscape = false;  } //if no, add in
+                }else { individual += estim[i]; }
+            }else {
+                individual += estim[i];
+            }
+        }
+        
                convert(individual, lineNum); //convert the string to int
                container.insert(lineNum);
        }
@@ -2602,6 +3042,7 @@ void MothurOut::splitAtDash(string& estim, set<int>& container) {
                exit(1);
        }       
 }
+
 /***********************************************************************/
 string MothurOut::makeList(vector<string>& names) {
        try {
@@ -2669,11 +3110,11 @@ void MothurOut::splitAtChar(string& prefix, string& suffix, char c){
                        string space = " ";
                        while(suffix.at(0) == ' ')
                                suffix = suffix.substr(1, suffix.length());
-               }
+               }else {  suffix = "";  }
         
-       }
+    }
        catch(exception& e) {
-               errorOut(e, "MothurOut", "splitAtComma");
+               errorOut(e, "MothurOut", "splitAtChar");
                exit(1);
        }       
 }
@@ -2689,7 +3130,7 @@ void MothurOut::splitAtComma(string& prefix, string& suffix){
                        string space = " ";
                        while(suffix.at(0) == ' ')
                                suffix = suffix.substr(1, suffix.length());
-               }
+               }else {  suffix = "";  }
 
        }
        catch(exception& e) {
@@ -2905,6 +3346,273 @@ bool MothurOut::checkReleaseVersion(ifstream& file, string version) {
                exit(1);
        }
 }
+/**************************************************************************************************/
+vector<double> MothurOut::getAverages(vector< vector<double> >& dists) {
+       try{
+        vector<double> averages; //averages.resize(numComp, 0.0);
+        for (int i = 0; i < dists[0].size(); i++) { averages.push_back(0.0); }
+      
+        for (int thisIter = 0; thisIter < dists.size(); thisIter++) {
+            for (int i = 0; i < dists[thisIter].size(); i++) {  
+                averages[i] += dists[thisIter][i];
+            }
+        }
+        
+        //finds average.
+        for (int i = 0; i < averages.size(); i++) {  averages[i] /= (double) dists.size(); }
+        
+        return averages;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+double MothurOut::getAverage(vector<double> dists) {
+       try{
+        double average = 0;
+        
+        for (int i = 0; i < dists.size(); i++) {
+            average += dists[i];
+        }
+       
+        //finds average.
+        average /= (double) dists.size(); 
+        
+        return average;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverage");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists) {
+       try{
+        
+        vector<double> averages = getAverages(dists);
+        
+        //find standard deviation
+        vector<double> stdDev; //stdDev.resize(numComp, 0.0);
+        for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
+        
+        for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+            for (int j = 0; j < dists[thisIter].size(); j++) {
+                stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
+            }
+        }
+        for (int i = 0; i < stdDev.size(); i++) {  
+            stdDev[i] /= (double) dists.size(); 
+            stdDev[i] = sqrt(stdDev[i]);
+        }
+        
+        return stdDev;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists, vector<double>& averages) {
+       try{
+        //find standard deviation
+        vector<double> stdDev; //stdDev.resize(numComp, 0.0);
+        for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
+        
+        for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+            for (int j = 0; j < dists[thisIter].size(); j++) {
+                stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
+            }
+        }
+        for (int i = 0; i < stdDev.size(); i++) {  
+            stdDev[i] /= (double) dists.size(); 
+            stdDev[i] = sqrt(stdDev[i]);
+        }
+        
+        return stdDev;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals, string mode) {
+       try{
+        
+        vector< vector<seqDist>  > calcAverages; //calcAverages.resize(calcDistsTotals[0].size()); 
+        for (int i = 0; i < calcDistsTotals[0].size(); i++) {  //initialize sums to zero.
+            //calcAverages[i].resize(calcDistsTotals[0][i].size());
+            vector<seqDist> temp;
+            for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
+                seqDist tempDist;
+                tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
+                tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
+                tempDist.dist = 0.0;
+                temp.push_back(tempDist);
+            }
+            calcAverages.push_back(temp);
+        }
+        
+        if (mode == "average") {
+            for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
+                for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
+                    for (int j = 0; j < calcAverages[i].size(); j++) {
+                        calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
+                    }
+                }
+            }
+            
+            for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
+                for (int j = 0; j < calcAverages[i].size(); j++) {
+                    calcAverages[i][j].dist /= (float) calcDistsTotals.size();
+                }
+            }
+        }else { //find median
+            for (int i = 0; i < calcAverages.size(); i++) { //for each calc
+                for (int j = 0; j < calcAverages[i].size(); j++) {  //for each comparison
+                    vector<double> dists;
+                    for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //for each subsample
+                        dists.push_back(calcDistsTotals[thisIter][i][j].dist);
+                    }
+                    sort(dists.begin(), dists.end());
+                    calcAverages[i][j].dist = dists[(calcDistsTotals.size()/2)];
+                }
+            }
+        }
+
+        return calcAverages;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals) {
+       try{
+        
+        vector< vector<seqDist>  > calcAverages; //calcAverages.resize(calcDistsTotals[0].size()); 
+        for (int i = 0; i < calcDistsTotals[0].size(); i++) {  //initialize sums to zero.
+            //calcAverages[i].resize(calcDistsTotals[0][i].size());
+            vector<seqDist> temp;
+            for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
+                seqDist tempDist;
+                tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
+                tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
+                tempDist.dist = 0.0;
+                temp.push_back(tempDist);
+            }
+            calcAverages.push_back(temp);
+        }
+        
+        
+        for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
+                for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
+                    for (int j = 0; j < calcAverages[i].size(); j++) {
+                        calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
+                    }
+                }
+        }
+            
+        for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
+                for (int j = 0; j < calcAverages[i].size(); j++) {
+                    calcAverages[i][j].dist /= (float) calcDistsTotals.size();
+                }
+        }
+        
+        return calcAverages;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals) {
+       try{
+        
+        vector< vector<seqDist> > calcAverages = getAverages(calcDistsTotals);
+        
+        //find standard deviation
+        vector< vector<seqDist>  > stdDev;  
+        for (int i = 0; i < calcDistsTotals[0].size(); i++) {  //initialize sums to zero.
+            vector<seqDist> temp;
+            for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
+                seqDist tempDist;
+                tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
+                tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
+                tempDist.dist = 0.0;
+                temp.push_back(tempDist);
+            }
+            stdDev.push_back(temp);
+        }
+        
+        for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+            for (int i = 0; i < stdDev.size(); i++) {  
+                for (int j = 0; j < stdDev[i].size(); j++) {
+                    stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
+                }
+            }
+        }
+        
+        for (int i = 0; i < stdDev.size(); i++) {  //finds average.
+            for (int j = 0; j < stdDev[i].size(); j++) {
+                stdDev[i][j].dist /= (float) calcDistsTotals.size();
+                stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
+            }
+        }
+
+        return stdDev;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals, vector< vector<seqDist> >& calcAverages) {
+       try{
+        //find standard deviation
+        vector< vector<seqDist>  > stdDev;  
+        for (int i = 0; i < calcDistsTotals[0].size(); i++) {  //initialize sums to zero.
+            vector<seqDist> temp;
+            for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
+                seqDist tempDist;
+                tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
+                tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
+                tempDist.dist = 0.0;
+                temp.push_back(tempDist);
+            }
+            stdDev.push_back(temp);
+        }
+        
+        for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+            for (int i = 0; i < stdDev.size(); i++) {  
+                for (int j = 0; j < stdDev[i].size(); j++) {
+                    stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
+                }
+            }
+        }
+        
+        for (int i = 0; i < stdDev.size(); i++) {  //finds average.
+            for (int j = 0; j < stdDev[i].size(); j++) {
+                stdDev[i][j].dist /= (float) calcDistsTotals.size();
+                stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
+            }
+        }
+        
+        return stdDev;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+
 /**************************************************************************************************/
 bool MothurOut::isContainingOnlyDigits(string input) {
        try{