]> git.donarmstrong.com Git - mothur.git/blobdiff - getoturepcommand.cpp
added pca command
[mothur.git] / getoturepcommand.cpp
index 92ba3ad5214cee119f9dcee08c256e1b1a2f79be..41cc6b5eefca511b4137ecf635a841ca24159f1a 100644 (file)
@@ -9,13 +9,32 @@
 
 #include "getoturepcommand.h"
 
+//********************************************************************************************************************
+//sorts lowest to highest
+inline bool compareName(repStruct left, repStruct right){
+       return (left.name < right.name);        
+}
+//********************************************************************************************************************
+//sorts lowest to highest
+inline bool compareBin(repStruct left, repStruct right){
+       return (left.bin < right.bin);  
+}
+//********************************************************************************************************************
+//sorts lowest to highest
+inline bool compareSize(repStruct left, repStruct right){
+       return (left.size < right.size);        
+}
+//********************************************************************************************************************
+//sorts lowest to highest
+inline bool compareGroup(repStruct left, repStruct right){
+       return (left.group < right.group);      
+}
 //**********************************************************************************************************************
 GetOTURepCommand::GetOTURepCommand(string option){
        try{
                globaldata = GlobalData::getInstance();
                abort = false;
                allLines = 1;
-               lines.clear();
                labels.clear();
                
                //allow user to run help
@@ -23,7 +42,7 @@ GetOTURepCommand::GetOTURepCommand(string option){
                        help(); abort = true;
                } else {
                        //valid paramters for this command
-                       string Array[] =  {"fasta","list","line","label","name", "group"};
+                       string Array[] =  {"fasta","list","label","name", "group", "sorted", "phylip","column"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -50,16 +69,22 @@ GetOTURepCommand::GetOTURepCommand(string option){
                        listfile = validParameter.validFile(parameters, "list", true);
                        if (listfile == "not found") { mothurOut("list is a required parameter for the get.oturep command."); mothurOutEndLine(); abort = true; }
                        else if (listfile == "not open") { abort = true; }      
+                       
+                       phylipfile = validParameter.validFile(parameters, "phylip", true);
+                       if (phylipfile == "not found") { phylipfile = "";  }
+                       else if (phylipfile == "not open") { abort = true; }    
+                       
+                       columnfile = validParameter.validFile(parameters, "column", true);
+                       if (columnfile == "not found") { columnfile = ""; }
+                       else if (columnfile == "not open") { abort = true; }    
+                       
+                       if ((phylipfile == "") && (columnfile == "")) { mothurOut("When executing a get.oturep command you must enter a phylip or a column."); mothurOutEndLine(); abort = true; }
+                       else if ((phylipfile != "") && (columnfile != "")) { mothurOut("When executing a get.oturep command you must enter ONLY ONE of the following: phylip or column."); mothurOutEndLine(); abort = true; }
+               
+                       if (columnfile != "") {  if (namefile == "") {  cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; }  }
 
                        //check for optional parameter and set defaults
                        // ...at some point should added some additional type checking...
-                       line = validParameter.validFile(parameters, "line", false);
-                       if (line == "not found") { line = "";  }
-                       else { 
-                               if(line != "all") {  splitAtDash(line, lines);  allLines = 0;  }
-                               else { allLines = 1;  }
-                       }
-                       
                        label = validParameter.validFile(parameters, "label", false);                   
                        if (label == "not found") { label = ""; }
                        else { 
@@ -67,13 +92,10 @@ GetOTURepCommand::GetOTURepCommand(string option){
                                else { allLines = 1;  }
                        }
                        
-                       //make sure user did not use both the line and label parameters
-                       if ((line != "") && (label != "")) { mothurOut("You cannot use both the line and label parameters at the same time. "); mothurOutEndLine(); abort = true; }
-                       //if the user has not specified any line or labels use the ones from read.otu
-                       else if ((line == "") && (label == "")) {  
+                       //if the user has not specified any labels use the ones from read.otu
+                       if (label == "") {  
                                allLines = globaldata->allLines; 
                                labels = globaldata->labels; 
-                               lines = globaldata->lines;
                        }
                        
                        namesfile = validParameter.validFile(parameters, "name", true);
@@ -81,19 +103,26 @@ GetOTURepCommand::GetOTURepCommand(string option){
                        else if (namesfile == "not found") { namesfile = ""; }
 
                        groupfile = validParameter.validFile(parameters, "group", true);
-                       if (groupfile == "not open") { abort = true; }
+                       if (groupfile == "not open") { groupfile = ""; abort = true; }
                        else if (groupfile == "not found") { groupfile = ""; }
                        else {
                                //read in group map info.
                                groupMap = new GroupMap(groupfile);
                                groupMap->readMap();
                        }
-               
-                       if (abort == false) {
                        
-                               if(globaldata->gSparseMatrix != NULL)   {
-                                       matrix = globaldata->gSparseMatrix;
-                               }
+                       sorted = validParameter.validFile(parameters, "sorted", false);         if (sorted == "not found"){     sorted = "";    }
+                       if ((sorted != "") && (sorted != "name") && (sorted != "bin") && (sorted != "size") && (sorted != "group")) {
+                               mothurOut(sorted + " is not a valid option for the sorted parameter. The only options are: name, bin, size and group. I will not sort."); mothurOutEndLine();
+                               sorted = "";
+                       }
+                       
+                       if ((sorted == "group") && (groupfile == "")) {
+                               mothurOut("You must provide a groupfile to sort by group. I will not sort."); mothurOutEndLine();
+                               sorted = "";
+                       }
+                       
+                       if (abort == false) {
 
                                //globaldata->gListVector bin 0 = first name read in distance matrix, globaldata->gListVector bin 1 = second name read in distance matrix
                                if (globaldata->gListVector != NULL) {
@@ -112,15 +141,6 @@ GetOTURepCommand::GetOTURepCommand(string option){
                                        }
                                } else { mothurOut("error, no listvector."); mothurOutEndLine(); }
 
-                               // Create a data structure to quickly access the distance information.
-                               // It consists of a vector of distance maps, where each map contains
-                               // all distances of a certain sequence. Vector and maps are accessed
-                               // via the index of a sequence in the distance matrix
-                               seqVec = vector<SeqMap>(globaldata->gListVector->size()); 
-                               for (MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++) {
-                                       seqVec[currentCell->row][currentCell->column] = currentCell->dist;
-                               }
-
                                fasta = new FastaMap();
                        }
                }
@@ -136,11 +156,12 @@ GetOTURepCommand::GetOTURepCommand(string option){
 void GetOTURepCommand::help(){
        try {
                mothurOut("The get.oturep command can only be executed after a successful read.dist command.\n");
-               mothurOut("The get.oturep command parameters are list, fasta, name, group, line and label.  The fasta and list parameters are required, and you may not use line and label at the same time.\n");
-               mothurOut("The line and label allow you to select what distance levels you would like a output files created for, and are separated by dashes.\n");
-               mothurOut("The get.oturep command should be in the following format: get.oturep(fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, line=yourLines, label=yourLabels).\n");
-               mothurOut("Example get.oturep(fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups, line=1-3-5, name=amazon.names).\n");
-               mothurOut("The default value for line and label are all lines in your inputfile.\n");
+               mothurOut("The get.oturep command parameters are list, fasta, name, group, sorted and label.  The fasta and list parameters are required.\n");
+               mothurOut("The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n");
+               mothurOut("The get.oturep command should be in the following format: get.oturep(fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n");
+               mothurOut("Example get.oturep(fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups, name=amazon.names).\n");
+               mothurOut("The default value for label is all labels in your inputfile.\n");
+               mothurOut("The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n");
                mothurOut("The get.oturep command outputs a .fastarep file for each distance you specify, selecting one OTU representative for each bin.\n");
                mothurOut("If you provide a groupfile, then it also appends the names of the groups present in that bin.\n");
                mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
@@ -170,23 +191,21 @@ int GetOTURepCommand::execute(){
        try {
        
                if (abort == true) { return 0; }
-
-               int count = 1;
                int error;
                
                //read fastafile
                fasta->readFastaFile(fastafile);
                
-               in.close();
-               
-               //set format to list so input can get listvector
-               globaldata->setFormat("list");
+               //in.close();
+               //read distance file and generate indexed distance file that can be used by findrep function
+//....new reading class....//
                
                //if user gave a namesfile then use it
-               if (namesfile != "") {
-                       readNamesFile();
-               }
+               if (namesfile != "") {  readNamesFile();        }
                
+               //set format to list so input can get listvector
+               globaldata->setFormat("list");
+
                //read list file
                read = new ReadOTUFile(listfile);
                read->read(&*globaldata); 
@@ -198,37 +217,38 @@ int GetOTURepCommand::execute(){
                //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
                set<string> processedLabels;
                set<string> userLabels = labels;
-               set<int> userLines = lines;
-
                
-               while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
+               while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                        
-                       if (allLines == 1 || lines.count(count) == 1 || labels.count(list->getLabel()) == 1){
-                                       mothurOut(list->getLabel() + "\t" + toString(list->size()) + "\t" + toString(count)); mothurOutEndLine();
+                       if (allLines == 1 || labels.count(list->getLabel()) == 1){
+                                       mothurOut(list->getLabel() + "\t" + toString(list->size())); mothurOutEndLine();
                                        error = process(list);
                                        if (error == 1) { return 0; } //there is an error in hte input files, abort command
                                        
                                        processedLabels.insert(list->getLabel());
                                        userLabels.erase(list->getLabel());
-                                       userLines.erase(count);
                        }
                        
                        if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                                       string saveLabel = list->getLabel();
+                                       
                                        delete list;
                                        list = input->getListVector(lastLabel);
-                                       mothurOut(list->getLabel() + "\t" + toString(list->size()) + "\t" + toString(count)); mothurOutEndLine();
+                                       mothurOut(list->getLabel() + "\t" + toString(list->size())); mothurOutEndLine();
                                        error = process(list);
                                        if (error == 1) { return 0; } //there is an error in hte input files, abort command
                                        
                                        processedLabels.insert(list->getLabel());
                                        userLabels.erase(list->getLabel());
+                                       
+                                       //restore real lastlabel to save below
+                                       list->setLabel(saveLabel);
                        }
                        
                        lastLabel = list->getLabel();
                        
                        delete list;
                        list = input->getListVector();
-                       count++;
                }
                
                //output error messages about any remaining user labels
@@ -243,18 +263,16 @@ int GetOTURepCommand::execute(){
                        }
                }
                
-               //run last line if you need to
+               //run last label if you need to
                if (needToRun == true)  {
-                       delete list;
+                       if (list != NULL) {     delete list;    }
                        list = input->getListVector(lastLabel);
-                       mothurOut(list->getLabel() + "\t" + toString(list->size()) + "\t" + toString(count)); mothurOutEndLine();
+                       mothurOut(list->getLabel() + "\t" + toString(list->size())); mothurOutEndLine();
                        error = process(list);
                        delete list;
                        if (error == 1) { return 0; } //there is an error in hte input files, abort command
                }
                
-               delete matrix;
-               globaldata->gSparseMatrix = NULL;
                delete list;
                globaldata->gListVector = NULL;
 
@@ -330,9 +348,9 @@ string GetOTURepCommand::findRep(int bin, string& group, ListVector* thisList, i
                        }
                        //rip off last dash
                        group = group.substr(0, group.length()-1);
-               }
+               }else{ group = ""; }
 
-               // if only 1 sequence in bin or processing the "unique" line, then 
+               // if only 1 sequence in bin or processing the "unique" label, then 
                // the first sequence of the OTU is the representative one
                if ((names.size() == 1) || (list->getLabel() == "unique")) {
                        return names[0];
@@ -350,7 +368,7 @@ string GetOTURepCommand::findRep(int bin, string& group, ListVector* thisList, i
                        SeqMap::iterator it;
                        SeqMap currMap;
                        for (size_t i=0; i < seqIndex.size(); i++) {
-                               currMap = seqVec[seqIndex[i]];
+       //currMap = seqVec[seqIndex[i]];
                                for (size_t j=0; j < seqIndex.size(); j++) {
                                        it = currMap.find(seqIndex[j]);         
                                        if (it != currMap.end()) {
@@ -387,6 +405,7 @@ int GetOTURepCommand::process(ListVector* processList) {
                //create output file
                string outputFileName = getRootName(listfile) + processList->getLabel() + ".rep.fasta";
                openOutputFile(outputFileName, out);
+               vector<repStruct> reps;
 
                //for each bin in the list vector
                for (int i = 0; i < processList->size(); i++) {
@@ -398,19 +417,43 @@ int GetOTURepCommand::process(ListVector* processList) {
                        sequence = fasta->getSequence(nameRep);
 
                        if (sequence != "not found") {
-                               nameRep = nameRep + "|" + toString(i+1);
-                               nameRep = nameRep + "|" + toString(binsize);
-                               if (groupfile != "") {
-                                       nameRep = nameRep + "|" + groups;
+                               if (sorted == "") { //print them out
+                                       nameRep = nameRep + "|" + toString(i+1);
+                                       nameRep = nameRep + "|" + toString(binsize);
+                                       if (groupfile != "") {
+                                               nameRep = nameRep + "|" + groups;
+                                       }
+                                       out << ">" << nameRep << endl;
+                                       out << sequence << endl;
+                               }else { //save them
+                                       repStruct newRep(nameRep, i+1, binsize, groups);
+                                       reps.push_back(newRep);
                                }
-                               out << ">" << nameRep << endl;
-                               out << sequence << endl;
-                       } else { 
+                       }else { 
                                mothurOut(nameRep + " is missing from your fasta or name file. Please correct. "); mothurOutEndLine(); 
                                remove(outputFileName.c_str());
                                return 1;
                        }
                }
+               
+               if (sorted != "") { //then sort them and print them
+                       if (sorted == "name")           {  sort(reps.begin(), reps.end(), compareName);         }
+                       else if (sorted == "bin")       {  sort(reps.begin(), reps.end(), compareBin);          }
+                       else if (sorted == "size")      {  sort(reps.begin(), reps.end(), compareSize);         }
+                       else if (sorted == "group")     {  sort(reps.begin(), reps.end(), compareGroup);        }
+                       
+                       //print them
+                       for (int i = 0; i < reps.size(); i++) {
+                               string sequence = fasta->getSequence(reps[i].name);
+                               string outputName = reps[i].name + "|" + toString(reps[i].bin);
+                               outputName = outputName + "|" + toString(reps[i].size);
+                               if (groupfile != "") {
+                                       outputName = outputName + "|" + reps[i].group;
+                               }
+                               out << ">" << outputName << endl;
+                               out << sequence << endl;
+                       }
+               }
 
                out.close();
                return 0;