]> git.donarmstrong.com Git - mothur.git/blobdiff - getoturepcommand.cpp
added pipeline commands which involved change to command factory and command class...
[mothur.git] / getoturepcommand.cpp
index 9dfc8bdeadc83c9784a42c10d66b26efcd52e8d4..1e09dba67dc7f1d9b0b10c5a5dfa79023b04a3a6 100644 (file)
@@ -36,6 +36,54 @@ inline bool compareGroup(repStruct left, repStruct right){
        return (left.group < right.group);      
 }
 //**********************************************************************************************************************
+GetOTURepCommand::GetOTURepCommand(){  
+       try {
+               //initialize outputTypes
+               vector<string> tempOutNames;
+               outputTypes["fasta"] = tempOutNames;
+               outputTypes["name"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> GetOTURepCommand::getValidParameters(){ 
+       try {
+               string Array[] =  {"fasta","list","label","name", "group", "sorted", "phylip","column","large","cutoff","precision","groups","outputdir","inputdir"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOTURepCommand", "getValidParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> GetOTURepCommand::getRequiredParameters(){      
+       try {
+               string Array[] =  {"fasta","list"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOTURepCommand", "getRequiredParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> GetOTURepCommand::getRequiredFiles(){   
+       try {
+               vector<string> myArray;
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOTURepCommand", "getRequiredFiles");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 GetOTURepCommand::GetOTURepCommand(string option)  {
        try{
                globaldata = GlobalData::getInstance();
@@ -62,6 +110,11 @@ GetOTURepCommand::GetOTURepCommand(string option)  {
                                if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
                        }
                        
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["fasta"] = tempOutNames;
+                       outputTypes["name"] = tempOutNames;
+                       
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
                        if (inputDir == "not found"){   inputDir = "";          }
@@ -70,7 +123,7 @@ GetOTURepCommand::GetOTURepCommand(string option)  {
                                it = parameters.find("list");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["list"] = inputDir + it->second;             }
                                }
@@ -78,7 +131,7 @@ GetOTURepCommand::GetOTURepCommand(string option)  {
                                it = parameters.find("fasta");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
                                }
@@ -86,7 +139,7 @@ GetOTURepCommand::GetOTURepCommand(string option)  {
                                it = parameters.find("phylip");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
                                }
@@ -94,7 +147,7 @@ GetOTURepCommand::GetOTURepCommand(string option)  {
                                it = parameters.find("column");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["column"] = inputDir + it->second;           }
                                }
@@ -102,7 +155,7 @@ GetOTURepCommand::GetOTURepCommand(string option)  {
                                it = parameters.find("name");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
@@ -110,7 +163,7 @@ GetOTURepCommand::GetOTURepCommand(string option)  {
                                it = parameters.find("group");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["group"] = inputDir + it->second;            }
                                }
@@ -153,7 +206,7 @@ GetOTURepCommand::GetOTURepCommand(string option)  {
                        label = validParameter.validFile(parameters, "label", false);                   
                        if (label == "not found") { label = ""; allLines = 1;  }
                        else { 
-                               if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
+                               if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
                                else { allLines = 1;  }
                        }
                        
@@ -179,13 +232,13 @@ GetOTURepCommand::GetOTURepCommand(string option)  {
                                        m->mothurOut("You must provide a groupfile to use groups."); m->mothurOutEndLine();
                                        abort = true;
                                }else { 
-                                       splitAtDash(groups, Groups);
+                                       m->splitAtDash(groups, Groups);
                                }
                        }
                        globaldata->Groups = Groups;
                        
                        string temp = validParameter.validFile(parameters, "large", false);             if (temp == "not found") {      temp = "F";     }
-                       large = isTrue(temp);
+                       large = m->isTrue(temp);
                        
                        temp = validParameter.validFile(parameters, "precision", false);                        if (temp == "not found") { temp = "100"; }
                        convert(temp, precision); 
@@ -214,6 +267,7 @@ void GetOTURepCommand::help(){
                m->mothurOut("The default value for label is all labels in your inputfile.\n");
                m->mothurOut("The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n");
                m->mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n");
+               m->mothurOut("The group parameter allows you provide a group file.\n");
                m->mothurOut("The groups parameter allows you to indicate that you want representative sequences for each group specified for each OTU, group name should be separated by dashes. ex. groups=A-B-C.\n");
                m->mothurOut("The get.oturep command outputs a .fastarep and .rep.names file for each distance you specify, selecting one OTU representative for each bin.\n");
                m->mothurOut("If you provide a groupfile, then it also appends the names of the groups present in that bin.\n");
@@ -306,7 +360,7 @@ int GetOTURepCommand::execute(){
                        delete nameMap;
                        
                        //openfile for getMap to use
-                       openInputFile(distFile, inRow);
+                       m->openInputFile(distFile, inRow);
                        
                        if (m->control_pressed) { inRow.close(); remove(distFile.c_str()); return 0; }
                }
@@ -321,7 +375,7 @@ int GetOTURepCommand::execute(){
                                names.clear();
                                binnames = globaldata->gListVector->get(i);
                                
-                               splitAtComma(binnames, names);
+                               m->splitAtComma(binnames, names);
                                
                                for (int j = 0; j < names.size(); j++) {
                                        nameToIndex[names[j]] = i;
@@ -377,7 +431,7 @@ int GetOTURepCommand::execute(){
                                        
                                        if (m->control_pressed) { 
                                                if (large) {  inRow.close(); remove(distFile.c_str());  }
-                                               for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  }
+                                               for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  } outputTypes.clear();
                                                delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; 
                                        }
                                        
@@ -385,7 +439,7 @@ int GetOTURepCommand::execute(){
                                        userLabels.erase(list->getLabel());
                        }
                        
-                       if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                       if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
                                        string saveLabel = list->getLabel();
                                        
                                        delete list;
@@ -396,7 +450,7 @@ int GetOTURepCommand::execute(){
                                        
                                        if (m->control_pressed) { 
                                                if (large) {  inRow.close(); remove(distFile.c_str());  }
-                                               for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  }
+                                               for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  } outputTypes.clear();
                                                delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; 
                                        }
                                        
@@ -436,7 +490,7 @@ int GetOTURepCommand::execute(){
                        
                        if (m->control_pressed) { 
                                        if (large) {  inRow.close(); remove(distFile.c_str());  }
-                                       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  }
+                                       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  } outputTypes.clear();
                                        delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; 
                        }
                }
@@ -488,7 +542,7 @@ int GetOTURepCommand::execute(){
 void GetOTURepCommand::readNamesFile() {
        try {
                vector<string> dupNames;
-               openInputFile(namefile, inNames);
+               m->openInputFile(namefile, inNames);
                
                string name, names, sequence;
        
@@ -499,7 +553,7 @@ void GetOTURepCommand::readNamesFile() {
                        dupNames.clear();
                        
                        //parse names into vector
-                       splitAtComma(names, dupNames);
+                       m->splitAtComma(names, dupNames);
                        
                        //store names in fasta map
                        sequence = fasta->getSequence(name);
@@ -507,7 +561,7 @@ void GetOTURepCommand::readNamesFile() {
                                fasta->push_back(dupNames[i], sequence);
                        }
                
-                       gobble(inNames);
+                       m->gobble(inNames);
                }
                inNames.close();
 
@@ -522,7 +576,7 @@ string GetOTURepCommand::findRep(vector<string> names) {
        try{
                // if only 1 sequence in bin or processing the "unique" label, then 
                // the first sequence of the OTU is the representative one
-               if ((names.size() == 1) || (list->getLabel() == "unique")) {
+               if ((names.size() == 2) || (names.size() == 1) || (list->getLabel() == "unique")) {
                        return names[0];
                }else{
                        vector<int> seqIndex(names.size());
@@ -597,26 +651,26 @@ int GetOTURepCommand::process(ListVector* processList) {
                string nameRep;
 
                //create output file
-               if (outputDir == "") { outputDir += hasPath(listfile); }
+               if (outputDir == "") { outputDir += m->hasPath(listfile); }
                                
                ofstream newNamesOutput;
                string outputNamesFile;
                map<string, ofstream*> filehandles;
                
                if (Groups.size() == 0) { //you don't want to use groups
-                       outputNamesFile  = outputDir + getRootName(getSimpleName(listfile)) + processList->getLabel() + ".rep.names";
-                       openOutputFile(outputNamesFile, newNamesOutput);
-                       outputNames.push_back(outputNamesFile);
+                       outputNamesFile  = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".rep.names";
+                       m->openOutputFile(outputNamesFile, newNamesOutput);
+                       outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile); 
                        outputNameFiles[outputNamesFile] = processList->getLabel();
                }else{ //you want to use groups
                        ofstream* temp;
                        for (int i=0; i<Groups.size(); i++) {
                                temp = new ofstream;
                                filehandles[Groups[i]] = temp;
-                               outputNamesFile = outputDir + getRootName(getSimpleName(listfile)) + processList->getLabel() + "." + Groups[i] + ".rep.names";
+                               outputNamesFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + Groups[i] + ".rep.names";
                                
-                               openOutputFile(outputNamesFile, *(temp));
-                               outputNames.push_back(outputNamesFile);
+                               m->openOutputFile(outputNamesFile, *(temp));
+                               outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile);
                                outputNameFiles[outputNamesFile] = processList->getLabel() + "." + Groups[i];
                        }
                }
@@ -638,7 +692,7 @@ int GetOTURepCommand::process(ListVector* processList) {
                        
                        string temp = processList->get(i);
                        vector<string> namesInBin;
-                       splitAtComma(temp, namesInBin);
+                       m->splitAtComma(temp, namesInBin);
                        
                        if (Groups.size() == 0) {
                                nameRep = findRep(namesInBin);
@@ -654,7 +708,7 @@ int GetOTURepCommand::process(ListVector* processList) {
                                        
                                        if (thisgroup == "not found") { m->mothurOut(namesInBin[j] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
                                        
-                                       if (inUsersGroups(thisgroup, Groups)) { //add this name to correct group
+                                       if (m->inUsersGroups(thisgroup, Groups)) { //add this name to correct group
                                                NamesInGroup[thisgroup].push_back(namesInBin[j]);
                                        }
                                }
@@ -700,27 +754,27 @@ int GetOTURepCommand::processNames(string filename, string label) {
        try{
 
                //create output file
-               if (outputDir == "") { outputDir += hasPath(listfile); }
-               string outputFileName = outputDir + getRootName(getSimpleName(listfile)) + label + ".rep.fasta";
-               openOutputFile(outputFileName, out);
+               if (outputDir == "") { outputDir += m->hasPath(listfile); }
+               string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + label + ".rep.fasta";
+               m->openOutputFile(outputFileName, out);
                vector<repStruct> reps;
-               outputNames.push_back(outputFileName);
+               outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
                
                ofstream out2;
                string tempNameFile = filename + ".temp";
-               openOutputFile(tempNameFile, out2);
+               m->openOutputFile(tempNameFile, out2);
                
                ifstream in;
-               openInputFile(filename, in);
+               m->openInputFile(filename, in);
                
                int i = 0;
                while (!in.eof()) {
                        string rep, binnames;
-                       in >> i >> rep >> binnames; gobble(in);
+                       in >> i >> rep >> binnames; m->gobble(in);
                        out2 << rep << '\t' << binnames << endl;
                        
                        vector<string> names;
-                       splitAtComma(binnames, names);
+                       m->splitAtComma(binnames, names);
                        int binsize = names.size();
                        
                        //if you have a groupfile
@@ -754,7 +808,7 @@ int GetOTURepCommand::processNames(string filename, string label) {
 
                        if (sequence != "not found") {
                                if (sorted == "") { //print them out
-                                       rep = rep + "|" + toString(i+1);
+                                       rep = rep + "\t" + toString(i+1);
                                        rep = rep + "|" + toString(binsize);
                                        if (groupfile != "") {
                                                rep = rep + "|" + group;
@@ -780,7 +834,7 @@ int GetOTURepCommand::processNames(string filename, string label) {
                        //print them
                        for (int i = 0; i < reps.size(); i++) {
                                string sequence = fasta->getSequence(reps[i].name);
-                               string outputName = reps[i].name + "|" + toString(reps[i].bin);
+                               string outputName = reps[i].name + "\t" + toString(reps[i].bin);
                                outputName = outputName + "|" + toString(reps[i].size);
                                if (groupfile != "") {
                                        outputName = outputName + "|" + reps[i].group;
@@ -789,10 +843,12 @@ int GetOTURepCommand::processNames(string filename, string label) {
                                out << sequence << endl;
                        }
                }
-                       
+               
+               in.close();
                out.close();
                out2.close();
                
+               remove(filename.c_str());
                rename(tempNameFile.c_str(), filename.c_str());
                
                return 0;