]> git.donarmstrong.com Git - mothur.git/blobdiff - getoturepcommand.cpp
a few modifications for 1.9
[mothur.git] / getoturepcommand.cpp
index d37ee19eb573bba0ade4c91320c9766f1712be69..e9373a93cbf81ae6bcd84bdd063e01947994de6e 100644 (file)
@@ -36,38 +36,97 @@ inline bool compareGroup(repStruct left, repStruct right){
        return (left.group < right.group);      
 }
 //**********************************************************************************************************************
-GetOTURepCommand::GetOTURepCommand(string option){
+GetOTURepCommand::GetOTURepCommand(string option)  {
        try{
                globaldata = GlobalData::getInstance();
                abort = false;
                allLines = 1;
                labels.clear();
-               
+                               
                //allow user to run help
                if (option == "help") { 
                        help(); abort = true;
                } else {
                        //valid paramters for this command
-                       string Array[] =  {"fasta","list","label","name", "group", "sorted", "phylip","column","large","cutoff","precision"};
+                       string Array[] =  {"fasta","list","label","name", "group", "sorted", "phylip","column","large","cutoff","precision","outputdir","inputdir"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
                        map<string, string> parameters = parser.getParameters();
                        
                        ValidParameters validParameter;
+                       map<string, string>::iterator it;
                
                        //check to make sure all parameters are valid for command
-                       for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
                                if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
                        }
                        
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("list");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["list"] = inputDir + it->second;             }
+                               }
+                               
+                               it = parameters.find("fasta");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
+                               }
+                               
+                               it = parameters.find("phylip");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
+                               }
+                               
+                               it = parameters.find("column");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["column"] = inputDir + it->second;           }
+                               }
+                               
+                               it = parameters.find("name");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["name"] = inputDir + it->second;             }
+                               }
+                               
+                               it = parameters.find("group");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["group"] = inputDir + it->second;            }
+                               }
+                       }
+
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
+                       
                        //check for required parameters
                        fastafile = validParameter.validFile(parameters, "fasta", true);
-                       if (fastafile == "not found") { mothurOut("fasta is a required parameter for the get.oturep command."); mothurOutEndLine(); abort = true; }
+                       if (fastafile == "not found") { m->mothurOut("fasta is a required parameter for the get.oturep command."); m->mothurOutEndLine(); abort = true; }
                        else if (fastafile == "not open") { abort = true; }     
                
                        listfile = validParameter.validFile(parameters, "list", true);
-                       if (listfile == "not found") { mothurOut("list is a required parameter for the get.oturep command."); mothurOutEndLine(); abort = true; }
+                       if (listfile == "not found") { m->mothurOut("list is a required parameter for the get.oturep command."); m->mothurOutEndLine(); abort = true; }
                        else if (listfile == "not open") { abort = true; }      
                        
                        phylipfile = validParameter.validFile(parameters, "phylip", true);
@@ -84,8 +143,8 @@ GetOTURepCommand::GetOTURepCommand(string option){
                        if (namefile == "not open") { abort = true; }   
                        else if (namefile == "not found") { namefile = ""; }
                        
-                       if ((phylipfile == "") && (columnfile == "")) { mothurOut("When executing a get.oturep command you must enter a phylip or a column."); mothurOutEndLine(); abort = true; }
-                       else if ((phylipfile != "") && (columnfile != "")) { mothurOut("When executing a get.oturep command you must enter ONLY ONE of the following: phylip or column."); mothurOutEndLine(); abort = true; }
+                       if ((phylipfile == "") && (columnfile == "")) { m->mothurOut("When executing a get.oturep command you must enter a phylip or a column."); m->mothurOutEndLine(); abort = true; }
+                       else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a get.oturep command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
                
                        if (columnfile != "") {  if (namefile == "") {  cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; }  }
 
@@ -104,17 +163,18 @@ GetOTURepCommand::GetOTURepCommand(string option){
                        else {
                                //read in group map info.
                                groupMap = new GroupMap(groupfile);
-                               groupMap->readMap();
+                               int error = groupMap->readMap();
+                               if (error == 1) { delete groupMap; abort = true; }
                        }
                        
                        sorted = validParameter.validFile(parameters, "sorted", false);         if (sorted == "not found"){     sorted = "";    }
                        if ((sorted != "") && (sorted != "name") && (sorted != "bin") && (sorted != "size") && (sorted != "group")) {
-                               mothurOut(sorted + " is not a valid option for the sorted parameter. The only options are: name, bin, size and group. I will not sort."); mothurOutEndLine();
+                               m->mothurOut(sorted + " is not a valid option for the sorted parameter. The only options are: name, bin, size and group. I will not sort."); m->mothurOutEndLine();
                                sorted = "";
                        }
                        
                        if ((sorted == "group") && (groupfile == "")) {
-                               mothurOut("You must provide a groupfile to sort by group. I will not sort."); mothurOutEndLine();
+                               m->mothurOut("You must provide a groupfile to sort by group. I will not sort."); m->mothurOutEndLine();
                                sorted = "";
                        }
                        
@@ -130,7 +190,7 @@ GetOTURepCommand::GetOTURepCommand(string option){
                }
        }
        catch(exception& e) {
-               errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
+               m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
                exit(1);
        }
 }
@@ -139,21 +199,21 @@ GetOTURepCommand::GetOTURepCommand(string option){
 
 void GetOTURepCommand::help(){
        try {
-               mothurOut("The get.oturep command parameters are phylip, column, list, fasta, name, group, large, cutoff, precision, sorted and label.  The fasta and list parameters are required, as well as phylip or column and name.\n");
-               mothurOut("The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n");
-               mothurOut("The phylip or column parameter is required, but only one may be used.  If you use a column file the name filename is required. \n");
-               mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
-               mothurOut("The get.oturep command should be in the following format: get.oturep(phylip=yourDistanceMatrix, fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n");
-               mothurOut("Example get.oturep(phylip=amazon.dist, fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups).\n");
-               mothurOut("The default value for label is all labels in your inputfile.\n");
-               mothurOut("The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n");
-               mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n");
-               mothurOut("The get.oturep command outputs a .fastarep and .rep.names file for each distance you specify, selecting one OTU representative for each bin.\n");
-               mothurOut("If you provide a groupfile, then it also appends the names of the groups present in that bin.\n");
-               mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
+               m->mothurOut("The get.oturep command parameters are phylip, column, list, fasta, name, group, large, cutoff, precision, sorted and label.  The fasta and list parameters are required, as well as phylip or column and name.\n");
+               m->mothurOut("The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n");
+               m->mothurOut("The phylip or column parameter is required, but only one may be used.  If you use a column file the name filename is required. \n");
+               m->mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
+               m->mothurOut("The get.oturep command should be in the following format: get.oturep(phylip=yourDistanceMatrix, fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n");
+               m->mothurOut("Example get.oturep(phylip=amazon.dist, fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups).\n");
+               m->mothurOut("The default value for label is all labels in your inputfile.\n");
+               m->mothurOut("The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n");
+               m->mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n");
+               m->mothurOut("The get.oturep command outputs a .fastarep and .rep.names file for each distance you specify, selecting one OTU representative for each bin.\n");
+               m->mothurOut("If you provide a groupfile, then it also appends the names of the groups present in that bin.\n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
        }
        catch(exception& e) {
-               errorOut(e, "GetOTURepCommand", "help");
+               m->errorOut(e, "GetOTURepCommand", "help");
                exit(1);
        }
 }
@@ -174,7 +234,7 @@ int GetOTURepCommand::execute(){
                        //read distance files
                        if (format == "column") { readMatrix = new ReadColumnMatrix(distFile); }        
                        else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(distFile); }
-                       else { mothurOut("File format error."); mothurOutEndLine(); return 0;  }
+                       else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
                        
                        readMatrix->setCutoff(cutoff);
        
@@ -184,6 +244,8 @@ int GetOTURepCommand::execute(){
                        }else{  nameMap = NULL;         }
                        
                        readMatrix->read(nameMap);
+                       
+                       if (m->control_pressed) { delete readMatrix; delete groupMap; return 0; }
 
                        //get matrix
                        if (globaldata->gListVector != NULL) {  delete globaldata->gListVector;  }
@@ -197,16 +259,19 @@ int GetOTURepCommand::execute(){
                        // via the index of a sequence in the distance matrix
                        seqVec = vector<SeqMap>(globaldata->gListVector->size()); 
                        for (MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++) {
+                               if (m->control_pressed) { delete readMatrix; delete groupMap; return 0; }
                                seqVec[currentCell->row][currentCell->column] = currentCell->dist;
                        }
                        
                        delete matrix;
                        delete readMatrix;
+                       
+                       if (m->control_pressed) {  delete groupMap; return 0; }
                }else {
                        //process file and set up indexes
                        if (format == "column") { formatMatrix = new FormatColumnMatrix(distFile); }    
                        else if (format == "phylip") { formatMatrix = new FormatPhylipMatrix(distFile); }
-                       else { mothurOut("File format error."); mothurOutEndLine(); return 0;  }
+                       else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
                        
                        formatMatrix->setCutoff(cutoff);
        
@@ -216,6 +281,8 @@ int GetOTURepCommand::execute(){
                        }else{  nameMap = NULL;         }
                        
                        formatMatrix->read(nameMap);
+                       
+                       if (m->control_pressed) { delete formatMatrix; delete groupMap; return 0; }
 
                        //get matrix
                        if (globaldata->gListVector != NULL) {  delete globaldata->gListVector;  }
@@ -231,8 +298,11 @@ int GetOTURepCommand::execute(){
                        
                        //openfile for getMap to use
                        openInputFile(distFile, inRow);
+                       
+                       if (m->control_pressed) { inRow.close(); remove(distFile.c_str()); delete groupMap; return 0; }
                }
                
+               
                //globaldata->gListVector bin 0 = first name read in distance matrix, globaldata->gListVector bin 1 = second name read in distance matrix
                if (globaldata->gListVector != NULL) {
                        vector<string> names;
@@ -248,12 +318,17 @@ int GetOTURepCommand::execute(){
                                        nameToIndex[names[j]] = i;
                                }
                        }
-               } else { mothurOut("error, no listvector."); mothurOutEndLine(); }
+               } else { m->mothurOut("error, no listvector."); m->mothurOutEndLine(); }
                
                fasta = new FastaMap();
                
                //read fastafile
                fasta->readFastaFile(fastafile);
+               
+               if (m->control_pressed) { 
+                       if (large) {  inRow.close(); remove(distFile.c_str());  }
+                       delete groupMap; delete fasta; return 0; 
+               }
                                
                //if user gave a namesfile then use it
                if (namefile != "") {   readNamesFile();        }
@@ -272,14 +347,26 @@ int GetOTURepCommand::execute(){
                //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
                set<string> processedLabels;
                set<string> userLabels = labels;
+               
+               if (m->control_pressed) { 
+                       if (large) {  inRow.close(); remove(distFile.c_str());  }
+                       delete groupMap; delete fasta; delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; 
+               }
        
                while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                        
                        if (allLines == 1 || labels.count(list->getLabel()) == 1){
-                                       mothurOut(list->getLabel() + "\t" + toString(list->size())); mothurOutEndLine();
+                                       m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
                                        error = process(list);
                                        if (error == 1) { return 0; } //there is an error in hte input files, abort command
                                        
+                                       if (m->control_pressed) { 
+                                               if (large) {  inRow.close(); remove(distFile.c_str());  }
+                                               if (groupfile != "") {  delete groupMap;  globaldata->gGroupmap = NULL; }
+                                               for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  }
+                                               delete fasta; delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; 
+                                       }
+                                       
                                        processedLabels.insert(list->getLabel());
                                        userLabels.erase(list->getLabel());
                        }
@@ -289,10 +376,17 @@ int GetOTURepCommand::execute(){
                                        
                                        delete list;
                                        list = input->getListVector(lastLabel);
-                                       mothurOut(list->getLabel() + "\t" + toString(list->size())); mothurOutEndLine();
+                                       m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
                                        error = process(list);
                                        if (error == 1) { return 0; } //there is an error in hte input files, abort command
                                        
+                                       if (m->control_pressed) { 
+                                               if (large) {  inRow.close(); remove(distFile.c_str());  }
+                                               if (groupfile != "") {  delete groupMap;  globaldata->gGroupmap = NULL; }
+                                               for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  }
+                                               delete fasta; delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; 
+                                       }
+                                       
                                        processedLabels.insert(list->getLabel());
                                        userLabels.erase(list->getLabel());
                                        
@@ -309,12 +403,12 @@ int GetOTURepCommand::execute(){
                //output error messages about any remaining user labels
                bool needToRun = false;
                for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {  
-                       mothurOut("Your file does not include the label " + *it); 
+                       m->mothurOut("Your file does not include the label " + *it); 
                        if (processedLabels.count(list->getLabel()) != 1) {
-                               mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
+                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
                                needToRun = true;
                        }else {
-                               mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
+                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
                        }
                }
                
@@ -322,16 +416,23 @@ int GetOTURepCommand::execute(){
                if (needToRun == true)  {
                        if (list != NULL) {     delete list;    }
                        list = input->getListVector(lastLabel);
-                       mothurOut(list->getLabel() + "\t" + toString(list->size())); mothurOutEndLine();
+                       m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
                        error = process(list);
                        delete list;
                        if (error == 1) { return 0; } //there is an error in hte input files, abort command
+                       
+                       if (m->control_pressed) { 
+                                       if (large) {  inRow.close(); remove(distFile.c_str());  }
+                                       if (groupfile != "") {  delete groupMap;  globaldata->gGroupmap = NULL; }
+                                       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  }
+                                       delete fasta; delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; 
+                       }
                }
                
                //close and remove formatted matrix file
                if (large) {
                        inRow.close();
-                       //remove(distFile.c_str());
+                       remove(distFile.c_str());
                }
                
                globaldata->gListVector = NULL;
@@ -342,10 +443,17 @@ int GetOTURepCommand::execute(){
                        delete groupMap;  globaldata->gGroupmap = NULL;
                }
                
+               if (m->control_pressed) {  return 0; }
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+               
                return 0;
        }
        catch(exception& e) {
-               errorOut(e, "GetOTURepCommand", "execute");
+               m->errorOut(e, "GetOTURepCommand", "execute");
                exit(1);
        }
 }
@@ -379,7 +487,7 @@ void GetOTURepCommand::readNamesFile() {
 
        }
        catch(exception& e) {
-               errorOut(e, "GetOTURepCommand", "readNamesFile");
+               m->errorOut(e, "GetOTURepCommand", "readNamesFile");
                exit(1);
        }
 }
@@ -401,7 +509,7 @@ string GetOTURepCommand::findRep(int bin, string& group, ListVector* thisList, i
                        for (size_t i = 0; i < names.size(); i++) {
                                string groupName = groupMap->getGroup(names[i]);
                                if (groupName == "not found") {  
-                                       mothurOut(names[i] + " is missing from your group file. Please correct. "); mothurOutEndLine();
+                                       m->mothurOut(names[i] + " is missing from your group file. Please correct. "); m->mothurOutEndLine();
                                        groupError = true;
                                } else {
                                        groups[groupName] = groupName;
@@ -436,6 +544,7 @@ string GetOTURepCommand::findRep(int bin, string& group, ListVector* thisList, i
                        SeqMap::iterator it;
                        SeqMap currMap;
                        for (size_t i=0; i < seqIndex.size(); i++) {
+                               if (m->control_pressed) {  return  "control"; }
                        
                                if (!large) {   currMap = seqVec[seqIndex[i]];  }
                                else            {       currMap = getMap(seqIndex[i]);  }
@@ -461,6 +570,7 @@ string GetOTURepCommand::findRep(int bin, string& group, ListVector* thisList, i
                        float min = 10000;
                        int minIndex;
                        for (size_t i=0; i < max_dist.size(); i++) {
+                               if (m->control_pressed) {  return  "control"; }
                                if (max_dist[i] < min) {
                                        min = max_dist[i];
                                        minIndex = i;
@@ -479,7 +589,7 @@ string GetOTURepCommand::findRep(int bin, string& group, ListVector* thisList, i
                }
        }
        catch(exception& e) {
-               errorOut(e, "GetOTURepCommand", "FindRep");
+               m->errorOut(e, "GetOTURepCommand", "FindRep");
                exit(1);
        }
 }
@@ -490,20 +600,28 @@ int GetOTURepCommand::process(ListVector* processList) {
                string nameRep, name, sequence;
 
                //create output file
-               string outputFileName = getRootName(listfile) + processList->getLabel() + ".rep.fasta";
+               if (outputDir == "") { outputDir += hasPath(listfile); }
+               string outputFileName = outputDir + getRootName(getSimpleName(listfile)) + processList->getLabel() + ".rep.fasta";
                openOutputFile(outputFileName, out);
                vector<repStruct> reps;
+               outputNames.push_back(outputFileName);
                
                ofstream newNamesOutput;
-               string outputNamesFile = getRootName(listfile) + processList->getLabel() + ".rep.names";
+               string outputNamesFile = outputDir + getRootName(getSimpleName(listfile)) + processList->getLabel() + ".rep.names";
                openOutputFile(outputNamesFile, newNamesOutput);
+               outputNames.push_back(outputNamesFile);
                
                //for each bin in the list vector
                for (int i = 0; i < processList->size(); i++) {
                        string groups;
                        int binsize;
+                       
+                       if (m->control_pressed) { out.close();  newNamesOutput.close(); return 0; }
+                       
                        nameRep = findRep(i, groups, processList, binsize);
                        
+                       if (m->control_pressed) { out.close();  newNamesOutput.close(); return 0; }
+                       
                        //output to new names file
                        newNamesOutput << nameRep << '\t' << processList->get(i) << endl;
 
@@ -524,7 +642,7 @@ int GetOTURepCommand::process(ListVector* processList) {
                                        reps.push_back(newRep);
                                }
                        }else { 
-                               mothurOut(nameRep + " is missing from your fasta or name file. Please correct. "); mothurOutEndLine(); 
+                               m->mothurOut(nameRep + " is missing from your fasta or name file. Please correct. "); m->mothurOutEndLine(); 
                                remove(outputFileName.c_str());
                                remove(outputNamesFile.c_str());
                                return 1;
@@ -556,7 +674,7 @@ int GetOTURepCommand::process(ListVector* processList) {
 
        }
        catch(exception& e) {
-               errorOut(e, "GetOTURepCommand", "process");
+               m->errorOut(e, "GetOTURepCommand", "process");
                exit(1);
        }
 }
@@ -586,7 +704,7 @@ SeqMap GetOTURepCommand::getMap(int row) {
                return rowMap;
        }
        catch(exception& e) {
-               errorOut(e, "GetOTURepCommand", "getMap");
+               m->errorOut(e, "GetOTURepCommand", "getMap");
                exit(1);
        }
 }