]> git.donarmstrong.com Git - mothur.git/blobdiff - getoturepcommand.cpp
get.oturep change and trim.seqs fix
[mothur.git] / getoturepcommand.cpp
index 6b9cee90c40f8f2848a076596d9ed3c88665ffb0..44c58c31d26e66342e7a1bf73feb21f7f3919bfa 100644 (file)
@@ -12,7 +12,7 @@
 #include "readcolumn.h"
 #include "formatphylip.h"
 #include "formatcolumn.h"
-
+#include "sharedutilities.h"
 
 
 //********************************************************************************************************************
@@ -36,19 +36,68 @@ inline bool compareGroup(repStruct left, repStruct right){
        return (left.group < right.group);      
 }
 //**********************************************************************************************************************
-GetOTURepCommand::GetOTURepCommand(string option){
+GetOTURepCommand::GetOTURepCommand(){  
+       try {
+               abort = true;
+               //initialize outputTypes
+               vector<string> tempOutNames;
+               outputTypes["fasta"] = tempOutNames;
+               outputTypes["name"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> GetOTURepCommand::getValidParameters(){ 
+       try {
+               string Array[] =  {"fasta","list","label","name", "group", "weighted","sorted", "phylip","column","large","cutoff","precision","groups","outputdir","inputdir"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOTURepCommand", "getValidParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> GetOTURepCommand::getRequiredParameters(){      
+       try {
+               string Array[] =  {"fasta","list"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOTURepCommand", "getRequiredParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> GetOTURepCommand::getRequiredFiles(){   
+       try {
+               vector<string> myArray;
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOTURepCommand", "getRequiredFiles");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+GetOTURepCommand::GetOTURepCommand(string option)  {
        try{
                globaldata = GlobalData::getInstance();
                abort = false;
                allLines = 1;
                labels.clear();
-               
+                               
                //allow user to run help
                if (option == "help") { 
                        help(); abort = true;
                } else {
                        //valid paramters for this command
-                       string Array[] =  {"fasta","list","label","name", "group", "sorted", "phylip","column","large","cutoff","precision","outputdir","inputdir"};
+                       string Array[] =  {"fasta","list","label","name","weighted", "group", "sorted", "phylip","column","large","cutoff","precision","groups","outputdir","inputdir"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -62,6 +111,11 @@ GetOTURepCommand::GetOTURepCommand(string option){
                                if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
                        }
                        
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["fasta"] = tempOutNames;
+                       outputTypes["name"] = tempOutNames;
+                       
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
                        if (inputDir == "not found"){   inputDir = "";          }
@@ -70,7 +124,7 @@ GetOTURepCommand::GetOTURepCommand(string option){
                                it = parameters.find("list");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["list"] = inputDir + it->second;             }
                                }
@@ -78,7 +132,7 @@ GetOTURepCommand::GetOTURepCommand(string option){
                                it = parameters.find("fasta");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
                                }
@@ -86,7 +140,7 @@ GetOTURepCommand::GetOTURepCommand(string option){
                                it = parameters.find("phylip");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
                                }
@@ -94,7 +148,7 @@ GetOTURepCommand::GetOTURepCommand(string option){
                                it = parameters.find("column");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["column"] = inputDir + it->second;           }
                                }
@@ -102,7 +156,7 @@ GetOTURepCommand::GetOTURepCommand(string option){
                                it = parameters.find("name");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
@@ -110,7 +164,7 @@ GetOTURepCommand::GetOTURepCommand(string option){
                                it = parameters.find("group");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["group"] = inputDir + it->second;            }
                                }
@@ -122,11 +176,11 @@ GetOTURepCommand::GetOTURepCommand(string option){
                        
                        //check for required parameters
                        fastafile = validParameter.validFile(parameters, "fasta", true);
-                       if (fastafile == "not found") { mothurOut("fasta is a required parameter for the get.oturep command."); mothurOutEndLine(); abort = true; }
+                       if (fastafile == "not found") { m->mothurOut("fasta is a required parameter for the get.oturep command."); m->mothurOutEndLine(); abort = true; }
                        else if (fastafile == "not open") { abort = true; }     
                
                        listfile = validParameter.validFile(parameters, "list", true);
-                       if (listfile == "not found") { mothurOut("list is a required parameter for the get.oturep command."); mothurOutEndLine(); abort = true; }
+                       if (listfile == "not found") { m->mothurOut("list is a required parameter for the get.oturep command."); m->mothurOutEndLine(); abort = true; }
                        else if (listfile == "not open") { abort = true; }      
                        
                        phylipfile = validParameter.validFile(parameters, "phylip", true);
@@ -143,8 +197,8 @@ GetOTURepCommand::GetOTURepCommand(string option){
                        if (namefile == "not open") { abort = true; }   
                        else if (namefile == "not found") { namefile = ""; }
                        
-                       if ((phylipfile == "") && (columnfile == "")) { mothurOut("When executing a get.oturep command you must enter a phylip or a column."); mothurOutEndLine(); abort = true; }
-                       else if ((phylipfile != "") && (columnfile != "")) { mothurOut("When executing a get.oturep command you must enter ONLY ONE of the following: phylip or column."); mothurOutEndLine(); abort = true; }
+                       if ((phylipfile == "") && (columnfile == "")) { m->mothurOut("When executing a get.oturep command you must enter a phylip or a column."); m->mothurOutEndLine(); abort = true; }
+                       else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a get.oturep command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
                
                        if (columnfile != "") {  if (namefile == "") {  cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; }  }
 
@@ -153,33 +207,44 @@ GetOTURepCommand::GetOTURepCommand(string option){
                        label = validParameter.validFile(parameters, "label", false);                   
                        if (label == "not found") { label = ""; allLines = 1;  }
                        else { 
-                               if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
+                               if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
                                else { allLines = 1;  }
                        }
                        
                        groupfile = validParameter.validFile(parameters, "group", true);
                        if (groupfile == "not open") { groupfile = ""; abort = true; }
                        else if (groupfile == "not found") { groupfile = ""; }
-                       else {
-                               //read in group map info.
-                               groupMap = new GroupMap(groupfile);
-                               int error = groupMap->readMap();
-                               if (error == 1) { delete groupMap; abort = true; }
-                       }
-                       
+                                               
                        sorted = validParameter.validFile(parameters, "sorted", false);         if (sorted == "not found"){     sorted = "";    }
                        if ((sorted != "") && (sorted != "name") && (sorted != "bin") && (sorted != "size") && (sorted != "group")) {
-                               mothurOut(sorted + " is not a valid option for the sorted parameter. The only options are: name, bin, size and group. I will not sort."); mothurOutEndLine();
+                               m->mothurOut(sorted + " is not a valid option for the sorted parameter. The only options are: name, bin, size and group. I will not sort."); m->mothurOutEndLine();
                                sorted = "";
                        }
                        
                        if ((sorted == "group") && (groupfile == "")) {
-                               mothurOut("You must provide a groupfile to sort by group. I will not sort."); mothurOutEndLine();
+                               m->mothurOut("You must provide a groupfile to sort by group. I will not sort."); m->mothurOutEndLine();
                                sorted = "";
                        }
                        
+                       groups = validParameter.validFile(parameters, "groups", false);                 
+                       if (groups == "not found") { groups = ""; }
+                       else { 
+                               if (groupfile == "") {
+                                       m->mothurOut("You must provide a groupfile to use groups."); m->mothurOutEndLine();
+                                       abort = true;
+                               }else { 
+                                       m->splitAtDash(groups, Groups);
+                               }
+                       }
+                       globaldata->Groups = Groups;
+                       
                        string temp = validParameter.validFile(parameters, "large", false);             if (temp == "not found") {      temp = "F";     }
-                       large = isTrue(temp);
+                       large = m->isTrue(temp);
+                       
+                       temp = validParameter.validFile(parameters, "weighted", false);         if (temp == "not found") {      if (namefile == "") { temp = "F"; } else { temp = "t"; }        }
+                       weighted = m->isTrue(temp);
+                       
+                       if ((weighted) && (namefile == "")) { m->mothurOut("You cannot set weighted to true unless you provide a namesfile."); m->mothurOutEndLine(); abort = true; }
                        
                        temp = validParameter.validFile(parameters, "precision", false);                        if (temp == "not found") { temp = "100"; }
                        convert(temp, precision); 
@@ -190,7 +255,7 @@ GetOTURepCommand::GetOTURepCommand(string option){
                }
        }
        catch(exception& e) {
-               errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
+               m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
                exit(1);
        }
 }
@@ -199,21 +264,28 @@ GetOTURepCommand::GetOTURepCommand(string option){
 
 void GetOTURepCommand::help(){
        try {
-               mothurOut("The get.oturep command parameters are phylip, column, list, fasta, name, group, large, cutoff, precision, sorted and label.  The fasta and list parameters are required, as well as phylip or column and name.\n");
-               mothurOut("The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n");
-               mothurOut("The phylip or column parameter is required, but only one may be used.  If you use a column file the name filename is required. \n");
-               mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
-               mothurOut("The get.oturep command should be in the following format: get.oturep(phylip=yourDistanceMatrix, fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n");
-               mothurOut("Example get.oturep(phylip=amazon.dist, fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups).\n");
-               mothurOut("The default value for label is all labels in your inputfile.\n");
-               mothurOut("The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n");
-               mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n");
-               mothurOut("The get.oturep command outputs a .fastarep and .rep.names file for each distance you specify, selecting one OTU representative for each bin.\n");
-               mothurOut("If you provide a groupfile, then it also appends the names of the groups present in that bin.\n");
-               mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
+               m->mothurOut("The get.oturep command parameters are phylip, column, list, fasta, name, group, large, weighted, cutoff, precision, groups, sorted and label.  The fasta and list parameters are required, as well as phylip or column and name.\n");
+               m->mothurOut("The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n");
+               m->mothurOut("The phylip or column parameter is required, but only one may be used.  If you use a column file the name filename is required. \n");
+               m->mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
+               m->mothurOut("The get.oturep command should be in the following format: get.oturep(phylip=yourDistanceMatrix, fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n");
+               m->mothurOut("Example get.oturep(phylip=amazon.dist, fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups).\n");
+               m->mothurOut("The default value for label is all labels in your inputfile.\n");
+               m->mothurOut("The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n");
+               m->mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n");
+               m->mothurOut("The weighted parameter allows you to indicate that want to find the weighted representative. You must provide a namesfile to set weighted to true.  The default value is false with no namesfile and true when a name file is provided.\n");
+               m->mothurOut("The representative is found by selecting the sequence that has the smallest total distance to all other sequences in the OTU. If a tie occurs the smallest average distance is used.\n");
+               m->mothurOut("For weighted = false, mothur assumes the distance file contains only unique sequences, the list file may contain all sequences, but only the uniques are considered to become the representative. If your distance file contains all the sequences it would become weighted=true.\n");
+               m->mothurOut("For weighted = true, mothur assumes the distance file contains only unique sequences, the list file must contain all sequences, all sequences are considered to become the representative, but unique name will be used in the output for consistency.\n");
+               m->mothurOut("If your distance file contains all the sequence and you do not provide a name file, the weighted representative will be given, unless your listfile is unique. If you provide a namefile, then you can select weighted or unweighted.\n");
+               m->mothurOut("The group parameter allows you provide a group file.\n");
+               m->mothurOut("The groups parameter allows you to indicate that you want representative sequences for each group specified for each OTU, group name should be separated by dashes. ex. groups=A-B-C.\n");
+               m->mothurOut("The get.oturep command outputs a .fastarep and .rep.names file for each distance you specify, selecting one OTU representative for each bin.\n");
+               m->mothurOut("If you provide a groupfile, then it also appends the names of the groups present in that bin.\n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
        }
        catch(exception& e) {
-               errorOut(e, "GetOTURepCommand", "help");
+               m->errorOut(e, "GetOTURepCommand", "help");
                exit(1);
        }
 }
@@ -234,7 +306,7 @@ int GetOTURepCommand::execute(){
                        //read distance files
                        if (format == "column") { readMatrix = new ReadColumnMatrix(distFile); }        
                        else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(distFile); }
-                       else { mothurOut("File format error."); mothurOutEndLine(); return 0;  }
+                       else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
                        
                        readMatrix->setCutoff(cutoff);
        
@@ -244,6 +316,8 @@ int GetOTURepCommand::execute(){
                        }else{  nameMap = NULL;         }
                        
                        readMatrix->read(nameMap);
+                       
+                       if (m->control_pressed) { delete readMatrix; return 0; }
 
                        //get matrix
                        if (globaldata->gListVector != NULL) {  delete globaldata->gListVector;  }
@@ -257,16 +331,23 @@ int GetOTURepCommand::execute(){
                        // via the index of a sequence in the distance matrix
                        seqVec = vector<SeqMap>(globaldata->gListVector->size()); 
                        for (MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++) {
+                               if (m->control_pressed) { delete readMatrix; return 0; }
                                seqVec[currentCell->row][currentCell->column] = currentCell->dist;
                        }
+                       //add dummy map for unweighted calc
+                       SeqMap dummy;
+                       seqVec.push_back(dummy);
                        
                        delete matrix;
                        delete readMatrix;
+                       delete nameMap;
+                       
+                       if (m->control_pressed) { return 0; }
                }else {
                        //process file and set up indexes
                        if (format == "column") { formatMatrix = new FormatColumnMatrix(distFile); }    
                        else if (format == "phylip") { formatMatrix = new FormatPhylipMatrix(distFile); }
-                       else { mothurOut("File format error."); mothurOutEndLine(); return 0;  }
+                       else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
                        
                        formatMatrix->setCutoff(cutoff);
        
@@ -276,6 +357,8 @@ int GetOTURepCommand::execute(){
                        }else{  nameMap = NULL;         }
                        
                        formatMatrix->read(nameMap);
+                       
+                       if (m->control_pressed) { delete formatMatrix;  return 0; }
 
                        //get matrix
                        if (globaldata->gListVector != NULL) {  delete globaldata->gListVector;  }
@@ -286,13 +369,18 @@ int GetOTURepCommand::execute(){
                        //positions in file where the distances for each sequence begin
                        //rowPositions[1] = position in file where distance related to sequence 1 start.
                        rowPositions = formatMatrix->getRowPositions();
+                       rowPositions.push_back(-1); //dummy row for unweighted calc
                        
                        delete formatMatrix;
+                       delete nameMap;
                        
                        //openfile for getMap to use
-                       openInputFile(distFile, inRow);
+                       m->openInputFile(distFile, inRow);
+                       
+                       if (m->control_pressed) { inRow.close(); remove(distFile.c_str()); return 0; }
                }
                
+               
                //globaldata->gListVector bin 0 = first name read in distance matrix, globaldata->gListVector bin 1 = second name read in distance matrix
                if (globaldata->gListVector != NULL) {
                        vector<string> names;
@@ -302,25 +390,36 @@ int GetOTURepCommand::execute(){
                                names.clear();
                                binnames = globaldata->gListVector->get(i);
                                
-                               splitAtComma(binnames, names);
+                               m->splitAtComma(binnames, names);
                                
                                for (int j = 0; j < names.size(); j++) {
                                        nameToIndex[names[j]] = i;
                                }
                        }
-               } else { mothurOut("error, no listvector."); mothurOutEndLine(); }
-               
-               fasta = new FastaMap();
+               } else { m->mothurOut("error, no listvector."); m->mothurOutEndLine(); }
                
-               //read fastafile
-               fasta->readFastaFile(fastafile);
                                
-               //if user gave a namesfile then use it
-               if (namefile != "") {   readNamesFile();        }
+               if (m->control_pressed) { 
+                       if (large) {  inRow.close(); remove(distFile.c_str());  }
+                       return 0; 
+               }
                
+               if (groupfile != "") {
+                       //read in group map info.
+                       groupMap = new GroupMap(groupfile);
+                       int error = groupMap->readMap();
+                       if (error == 1) { delete groupMap; m->mothurOut("Error reading your groupfile. Proceeding without groupfile."); m->mothurOutEndLine(); groupfile = "";  }
+                       
+                       if (Groups.size() != 0) {
+                               SharedUtil* util = new SharedUtil();
+                               util->setGroups(Groups, groupMap->namesOfGroups, "getoturep");
+                               delete util;
+                       }
+               }
+                                                               
                //set format to list so input can get listvector
                globaldata->setFormat("list");
-
+       
                //read list file
                read = new ReadOTUFile(listfile);
                read->read(&*globaldata); 
@@ -328,31 +427,50 @@ int GetOTURepCommand::execute(){
                input = globaldata->ginput;
                list = globaldata->gListVector;
                string lastLabel = list->getLabel();
-               
+
                //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
                set<string> processedLabels;
                set<string> userLabels = labels;
-       
+               
+               if (m->control_pressed) { 
+                       if (large) {  inRow.close(); remove(distFile.c_str());  }
+                       delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; 
+               }
+               
+               if ((!weighted) && (namefile != "")) { readNamesFile(weighted); }
+               
                while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                        
                        if (allLines == 1 || labels.count(list->getLabel()) == 1){
-                                       mothurOut(list->getLabel() + "\t" + toString(list->size())); mothurOutEndLine();
+                                       m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
                                        error = process(list);
                                        if (error == 1) { return 0; } //there is an error in hte input files, abort command
                                        
+                                       if (m->control_pressed) { 
+                                               if (large) {  inRow.close(); remove(distFile.c_str());  }
+                                               for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  } outputTypes.clear();
+                                               delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; 
+                                       }
+                                       
                                        processedLabels.insert(list->getLabel());
                                        userLabels.erase(list->getLabel());
                        }
                        
-                       if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                       if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
                                        string saveLabel = list->getLabel();
                                        
                                        delete list;
                                        list = input->getListVector(lastLabel);
-                                       mothurOut(list->getLabel() + "\t" + toString(list->size())); mothurOutEndLine();
+                                       m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
                                        error = process(list);
                                        if (error == 1) { return 0; } //there is an error in hte input files, abort command
                                        
+                                       if (m->control_pressed) { 
+                                               if (large) {  inRow.close(); remove(distFile.c_str());  }
+                                               for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  } outputTypes.clear();
+                                               delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; 
+                                       }
+                                       
                                        processedLabels.insert(list->getLabel());
                                        userLabels.erase(list->getLabel());
                                        
@@ -361,7 +479,7 @@ int GetOTURepCommand::execute(){
                        }
                        
                        lastLabel = list->getLabel();
-                       
+       
                        delete list;
                        list = input->getListVector();
                }
@@ -369,12 +487,12 @@ int GetOTURepCommand::execute(){
                //output error messages about any remaining user labels
                bool needToRun = false;
                for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {  
-                       mothurOut("Your file does not include the label " + *it); 
-                       if (processedLabels.count(list->getLabel()) != 1) {
-                               mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
+                       m->mothurOut("Your file does not include the label " + (*it)); 
+                       if (processedLabels.count(lastLabel) != 1) {
+                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
                                needToRun = true;
                        }else {
-                               mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
+                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
                        }
                }
                
@@ -382,30 +500,59 @@ int GetOTURepCommand::execute(){
                if (needToRun == true)  {
                        if (list != NULL) {     delete list;    }
                        list = input->getListVector(lastLabel);
-                       mothurOut(list->getLabel() + "\t" + toString(list->size())); mothurOutEndLine();
+                       m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
                        error = process(list);
                        delete list;
                        if (error == 1) { return 0; } //there is an error in hte input files, abort command
+                       
+                       if (m->control_pressed) { 
+                                       if (large) {  inRow.close(); remove(distFile.c_str());  }
+                                       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  } outputTypes.clear();
+                                       delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; 
+                       }
                }
                
                //close and remove formatted matrix file
                if (large) {
                        inRow.close();
-                       //remove(distFile.c_str());
+                       remove(distFile.c_str());
                }
                
                globaldata->gListVector = NULL;
                delete input;  globaldata->ginput = NULL;
                delete read;
+               
+               if (!weighted) { nameFileMap.clear(); }
+               
+               //read fastafile
+               fasta = new FastaMap();
+               fasta->readFastaFile(fastafile);
+               
+               //if user gave a namesfile then use it
+               if (namefile != "") {   readNamesFile();        }
+               
+               //output create and output the .rep.fasta files
+               map<string, string>::iterator itNameFile;
+               for (itNameFile = outputNameFiles.begin(); itNameFile != outputNameFiles.end(); itNameFile++) {
+                       processNames(itNameFile->first, itNameFile->second);
+               }
+               
                delete fasta;
                if (groupfile != "") {
                        delete groupMap;  globaldata->gGroupmap = NULL;
                }
                
+               if (m->control_pressed) {  return 0; }
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+               
                return 0;
        }
        catch(exception& e) {
-               errorOut(e, "GetOTURepCommand", "execute");
+               m->errorOut(e, "GetOTURepCommand", "execute");
                exit(1);
        }
 }
@@ -414,18 +561,18 @@ int GetOTURepCommand::execute(){
 void GetOTURepCommand::readNamesFile() {
        try {
                vector<string> dupNames;
-               openInputFile(namefile, inNames);
+               m->openInputFile(namefile, inNames);
                
                string name, names, sequence;
        
-               while(inNames){
+               while(!inNames.eof()){
                        inNames >> name;                        //read from first column  A
                        inNames >> names;               //read from second column  A,B,C,D
                        
                        dupNames.clear();
                        
                        //parse names into vector
-                       splitAtComma(names, dupNames);
+                       m->splitAtComma(names, dupNames);
                        
                        //store names in fasta map
                        sequence = fasta->getSequence(name);
@@ -433,49 +580,51 @@ void GetOTURepCommand::readNamesFile() {
                                fasta->push_back(dupNames[i], sequence);
                        }
                
-                       gobble(inNames);
+                       m->gobble(inNames);
                }
                inNames.close();
 
        }
        catch(exception& e) {
-               errorOut(e, "GetOTURepCommand", "readNamesFile");
+               m->errorOut(e, "GetOTURepCommand", "readNamesFile");
                exit(1);
        }
 }
 //**********************************************************************************************************************
-string GetOTURepCommand::findRep(int bin, string& group, ListVector* thisList, int& binsize) {
-       try{
-               vector<string> names;
-               map<string, string> groups;
-               map<string, string>::iterator groupIt;
-
-               //parse names into vector
-               string binnames = thisList->get(bin);
-               splitAtComma(binnames, names);
-               binsize = names.size();
-
-               //if you have a groupfile
-               if (groupfile != "") {
-                       //find the groups that are in this bin
-                       for (size_t i = 0; i < names.size(); i++) {
-                               string groupName = groupMap->getGroup(names[i]);
-                               if (groupName == "not found") {  
-                                       mothurOut(names[i] + " is missing from your group file. Please correct. "); mothurOutEndLine();
-                                       groupError = true;
-                               } else {
-                                       groups[groupName] = groupName;
-                               }
-                       }
+//read names file to find the weighted rep for each bin
+void GetOTURepCommand::readNamesFile(bool w) {
+       try {
+               vector<string> dupNames;
+               m->openInputFile(namefile, inNames);
+               
+               string name, names, sequence;
+               
+               while(!inNames.eof()){
+                       inNames >> name;        m->gobble(inNames);             //read from first column  A
+                       inNames >> names;                                                       //read from second column  A,B,C,D
                        
-                       //turn the groups into a string
-                       for (groupIt = groups.begin(); groupIt != groups.end(); groupIt++) {
-                               group += groupIt->first + "-";
+                       dupNames.clear();
+                       
+                       //parse names into vector
+                       m->splitAtComma(names, dupNames);
+                       
+                       for (int i = 0; i < dupNames.size(); i++) {
+                               nameFileMap[dupNames[i]] = name;
                        }
-                       //rip off last dash
-                       group = group.substr(0, group.length()-1);
-               }else{ group = ""; }
-
+                       
+                       m->gobble(inNames);
+               }
+               inNames.close();
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOTURepCommand", "readNamesFile");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string GetOTURepCommand::findRep(vector<string> names) {
+       try{
                // if only 1 sequence in bin or processing the "unique" label, then 
                // the first sequence of the OTU is the representative one
                if ((names.size() == 1) || (list->getLabel() == "unique")) {
@@ -484,10 +633,42 @@ string GetOTURepCommand::findRep(int bin, string& group, ListVector* thisList, i
                        vector<int> seqIndex(names.size());
                        vector<float> max_dist(names.size());
                        vector<float> total_dist(names.size());
+                       map<string, string>::iterator itNameFile;
+                       map<string, int>::iterator itNameIndex;
 
                        //fill seqIndex and initialize sums
                        for (size_t i = 0; i < names.size(); i++) {
-                               seqIndex[i] = nameToIndex[names[i]];
+                               if (weighted) {
+                                       seqIndex[i] = nameToIndex[names[i]];
+                               }else { 
+                                       if (namefile == "") {
+                                               itNameIndex = nameToIndex.find(names[i]);
+                                               
+                                               if (itNameIndex == nameToIndex.end()) { // you are not in the distance file and no namesfile, then assume you are not unique
+                                                       if (large) {  seqIndex[i] = (rowPositions.size()-1); }
+                                                       else {  seqIndex[i] = (seqVec.size()-1); }
+                                               }else {
+                                                       seqIndex[i] = itNameIndex->second;
+                                               }
+                                               
+                                       }else {
+                                               itNameFile = nameFileMap.find(names[i]);
+                                               
+                                               if (itNameFile == nameFileMap.end()) {
+                                                       m->mothurOut("[ERROR]: " + names[i] + " is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; 
+                                               }else{
+                                                       string name1 = itNameFile->first;
+                                                       string name2 = itNameFile->second;
+                                                       
+                                                       if (name1 == name2) { //then you are unique so add your real dists
+                                                               seqIndex[i] = nameToIndex[names[i]];
+                                                       }else { //add dummy
+                                                               if (large) {  seqIndex[i] = (rowPositions.size()-1); }
+                                                               else {  seqIndex[i] = (seqVec.size()-1); }
+                                                       }
+                                               }
+                                       }
+                               }
                                max_dist[i] = 0.0;
                                total_dist[i] = 0.0;
                        }
@@ -496,6 +677,7 @@ string GetOTURepCommand::findRep(int bin, string& group, ListVector* thisList, i
                        SeqMap::iterator it;
                        SeqMap currMap;
                        for (size_t i=0; i < seqIndex.size(); i++) {
+                               if (m->control_pressed) {  return  "control"; }
                        
                                if (!large) {   currMap = seqVec[seqIndex[i]];  }
                                else            {       currMap = getMap(seqIndex[i]);  }
@@ -521,6 +703,7 @@ string GetOTURepCommand::findRep(int bin, string& group, ListVector* thisList, i
                        float min = 10000;
                        int minIndex;
                        for (size_t i=0; i < max_dist.size(); i++) {
+                               if (m->control_pressed) {  return  "control"; }
                                if (max_dist[i] < min) {
                                        min = max_dist[i];
                                        minIndex = i;
@@ -534,12 +717,12 @@ string GetOTURepCommand::findRep(int bin, string& group, ListVector* thisList, i
                                        }
                                }
                        }
-
+                       
                        return(names[minIndex]);
                }
        }
        catch(exception& e) {
-               errorOut(e, "GetOTURepCommand", "FindRep");
+               m->errorOut(e, "GetOTURepCommand", "FindRep");
                exit(1);
        }
 }
@@ -547,51 +730,184 @@ string GetOTURepCommand::findRep(int bin, string& group, ListVector* thisList, i
 //**********************************************************************************************************************
 int GetOTURepCommand::process(ListVector* processList) {
        try{
-               string nameRep, name, sequence;
+               string name, sequence;
+               string nameRep;
 
                //create output file
-               if (outputDir == "") { outputDir += hasPath(listfile); }
-               string outputFileName = outputDir + getRootName(getSimpleName(listfile)) + processList->getLabel() + ".rep.fasta";
-               openOutputFile(outputFileName, out);
-               vector<repStruct> reps;
-               
+               if (outputDir == "") { outputDir += m->hasPath(listfile); }
+                               
                ofstream newNamesOutput;
-               string outputNamesFile = outputDir + getRootName(getSimpleName(listfile)) + processList->getLabel() + ".rep.names";
-               openOutputFile(outputNamesFile, newNamesOutput);
+               string outputNamesFile;
+               map<string, ofstream*> filehandles;
+               
+               if (Groups.size() == 0) { //you don't want to use groups
+                       outputNamesFile  = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".rep.names";
+                       m->openOutputFile(outputNamesFile, newNamesOutput);
+                       outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile); 
+                       outputNameFiles[outputNamesFile] = processList->getLabel();
+               }else{ //you want to use groups
+                       ofstream* temp;
+                       for (int i=0; i<Groups.size(); i++) {
+                               temp = new ofstream;
+                               filehandles[Groups[i]] = temp;
+                               outputNamesFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + Groups[i] + ".rep.names";
+                               
+                               m->openOutputFile(outputNamesFile, *(temp));
+                               outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile);
+                               outputNameFiles[outputNamesFile] = processList->getLabel() + "." + Groups[i];
+                       }
+               }
                
                //for each bin in the list vector
                for (int i = 0; i < processList->size(); i++) {
-                       string groups;
-                       int binsize;
-                       nameRep = findRep(i, groups, processList, binsize);
+                       if (m->control_pressed) { 
+                               out.close();  
+                               if (Groups.size() == 0) { //you don't want to use groups
+                                       newNamesOutput.close();
+                               }else{
+                                       for (int j=0; j<Groups.size(); j++) {
+                                               (*(filehandles[Groups[j]])).close();
+                                               delete filehandles[Groups[j]];
+                                       }
+                               }
+                               return 0; 
+                       }
                        
-                       //output to new names file
-                       newNamesOutput << nameRep << '\t' << processList->get(i) << endl;
+                       string temp = processList->get(i);
+                       vector<string> namesInBin;
+                       m->splitAtComma(temp, namesInBin);
+                       
+                       if (Groups.size() == 0) {
+                               nameRep = findRep(namesInBin);
+                               newNamesOutput << i << '\t' << nameRep << '\t' << processList->get(i) << endl;
+                       }else{
+                               map<string, vector<string> > NamesInGroup;
+                               for (int j=0; j<Groups.size(); j++) { //initialize groups
+                                       NamesInGroup[Groups[j]].resize(0);
+                               }
+                               
+                               for (int j=0; j<namesInBin.size(); j++) {
+                                       string thisgroup = groupMap->getGroup(namesInBin[j]);
+                                       
+                                       if (thisgroup == "not found") { m->mothurOut(namesInBin[j] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
+                                       
+                                       if (m->inUsersGroups(thisgroup, Groups)) { //add this name to correct group
+                                               NamesInGroup[thisgroup].push_back(namesInBin[j]);
+                                       }
+                               }
+                               
+                               //get rep for each group in otu
+                               for (int j=0; j<Groups.size(); j++) {
+                                       if (NamesInGroup[Groups[j]].size() != 0) { //are there members from this group in this otu?
+                                               //get rep for each group
+                                               nameRep = findRep(NamesInGroup[Groups[j]]);
+                                               
+                                               //output group rep and other members of this group
+                                               (*(filehandles[Groups[j]])) << i << '\t' << nameRep << '\t';
+                                               
+                                               for (int k=0; k<NamesInGroup[Groups[j]].size()-1; k++) {//output list of names in this otu from this group
+                                                       (*(filehandles[Groups[j]])) << NamesInGroup[Groups[j]][k] << ",";
+                                               }
+                                               //output last name
+                                               (*(filehandles[Groups[j]])) << NamesInGroup[Groups[j]][NamesInGroup[Groups[j]].size()-1] << endl;
+                                       }
+                               }
+                       }
+               }
+               
+               if (Groups.size() == 0) { //you don't want to use groups
+                       newNamesOutput.close();
+               }else{
+                       for (int i=0; i<Groups.size(); i++) {
+                               (*(filehandles[Groups[i]])).close();
+                               delete filehandles[Groups[i]];
+                       }
+               }
+               
+               return 0;
 
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOTURepCommand", "process");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int GetOTURepCommand::processNames(string filename, string label) {
+       try{
+
+               //create output file
+               if (outputDir == "") { outputDir += m->hasPath(listfile); }
+               string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + label + ".rep.fasta";
+               m->openOutputFile(outputFileName, out);
+               vector<repStruct> reps;
+               outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
+               
+               ofstream out2;
+               string tempNameFile = filename + ".temp";
+               m->openOutputFile(tempNameFile, out2);
+               
+               ifstream in;
+               m->openInputFile(filename, in);
+               
+               int i = 0;
+               while (!in.eof()) {
+                       string rep, binnames;
+                       in >> i >> rep >> binnames; m->gobble(in);
+                       out2 << rep << '\t' << binnames << endl;
+                       
+                       vector<string> names;
+                       m->splitAtComma(binnames, names);
+                       int binsize = names.size();
+                       
+                       //if you have a groupfile
+                       string group = "";
+                       if (groupfile != "") {
+                               map<string, string> groups;
+                               map<string, string>::iterator groupIt;
+                               
+                               //find the groups that are in this bin
+                               for (size_t i = 0; i < names.size(); i++) {
+                                       string groupName = groupMap->getGroup(names[i]);
+                                       if (groupName == "not found") {  
+                                               m->mothurOut(names[i] + " is missing from your group file. Please correct. "); m->mothurOutEndLine();
+                                               groupError = true;
+                                       } else {
+                                               groups[groupName] = groupName;
+                                       }
+                               }
+                               
+                               //turn the groups into a string
+                               for (groupIt = groups.begin(); groupIt != groups.end(); groupIt++) {
+                                       group += groupIt->first + "-";
+                               }
+                               //rip off last dash
+                               group = group.substr(0, group.length()-1);
+                       }else{ group = ""; }
+
+                       
                        //print out name and sequence for that bin
-                       sequence = fasta->getSequence(nameRep);
+                       string sequence = fasta->getSequence(rep);
 
                        if (sequence != "not found") {
                                if (sorted == "") { //print them out
-                                       nameRep = nameRep + "|" + toString(i+1);
-                                       nameRep = nameRep + "|" + toString(binsize);
+                                       rep = rep + "\t" + toString(i+1);
+                                       rep = rep + "|" + toString(binsize);
                                        if (groupfile != "") {
-                                               nameRep = nameRep + "|" + groups;
+                                               rep = rep + "|" + group;
                                        }
-                                       out << ">" << nameRep << endl;
+                                       out << ">" << rep << endl;
                                        out << sequence << endl;
                                }else { //save them
-                                       repStruct newRep(nameRep, i+1, binsize, groups);
+                                       repStruct newRep(rep, i+1, binsize, group);
                                        reps.push_back(newRep);
                                }
                        }else { 
-                               mothurOut(nameRep + " is missing from your fasta or name file. Please correct. "); mothurOutEndLine(); 
-                               remove(outputFileName.c_str());
-                               remove(outputNamesFile.c_str());
-                               return 1;
+                               m->mothurOut(rep + " is missing from your fasta or name file, ignoring. Please correct."); m->mothurOutEndLine(); 
                        }
                }
                
+                       
                if (sorted != "") { //then sort them and print them
                        if (sorted == "name")           {  sort(reps.begin(), reps.end(), compareName);         }
                        else if (sorted == "bin")       {  sort(reps.begin(), reps.end(), compareBin);          }
@@ -601,7 +917,7 @@ int GetOTURepCommand::process(ListVector* processList) {
                        //print them
                        for (int i = 0; i < reps.size(); i++) {
                                string sequence = fasta->getSequence(reps[i].name);
-                               string outputName = reps[i].name + "|" + toString(reps[i].bin);
+                               string outputName = reps[i].name + "\t" + toString(reps[i].bin);
                                outputName = outputName + "|" + toString(reps[i].size);
                                if (groupfile != "") {
                                        outputName = outputName + "|" + reps[i].group;
@@ -610,18 +926,22 @@ int GetOTURepCommand::process(ListVector* processList) {
                                out << sequence << endl;
                        }
                }
-
+               
+               in.close();
                out.close();
-               newNamesOutput.close();
+               out2.close();
+               
+               remove(filename.c_str());
+               rename(tempNameFile.c_str(), filename.c_str());
+               
                return 0;
 
        }
        catch(exception& e) {
-               errorOut(e, "GetOTURepCommand", "process");
+               m->errorOut(e, "GetOTURepCommand", "processNames");
                exit(1);
        }
 }
-
 //**********************************************************************************************************************
 SeqMap GetOTURepCommand::getMap(int row) {
        try {
@@ -647,7 +967,7 @@ SeqMap GetOTURepCommand::getMap(int row) {
                return rowMap;
        }
        catch(exception& e) {
-               errorOut(e, "GetOTURepCommand", "getMap");
+               m->errorOut(e, "GetOTURepCommand", "getMap");
                exit(1);
        }
 }