]> git.donarmstrong.com Git - mothur.git/blobdiff - sensspeccommand.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / sensspeccommand.cpp
index 7f5d9dc05ff88882f90680a4f9281908102b573c..f61232a26c133fb9d0cebfa9cdcca31fdfe2f0dc 100644 (file)
 //**********************************************************************************************************************
 vector<string> SensSpecCommand::setParameters(){       
        try {
-               CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
-               CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pphylip);
-               //CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName",false,false); parameters.push_back(pname);
-               CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pcolumn);
-               CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
-               CommandParameter pcutoff("cutoff", "Number", "", "-1.00", "", "", "",false,false); parameters.push_back(pcutoff);
-               CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
-               CommandParameter phard("hard", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phard);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","sensspec",false,true,true); parameters.push_back(plist);
+               CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","",false,false); parameters.push_back(pphylip);
+               CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","",false,false); parameters.push_back(pcolumn);
+               CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+               CommandParameter pcutoff("cutoff", "Number", "", "-1.00", "", "", "","",false,false); parameters.push_back(pcutoff);
+               CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
+               CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -36,25 +35,7 @@ vector<string> SensSpecCommand::setParameters(){
 string SensSpecCommand::getHelpString(){       
        try {
                string helpString = "";
-               helpString += "The get.oturep command parameters are phylip, column, list, fasta, name, group, large, weighted, cutoff, precision, groups, sorted and label.  The fasta and list parameters are required, as well as phylip or column and name, unless you have valid current files.\n";
-               helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n";
-               helpString += "The phylip or column parameter is required, but only one may be used.  If you use a column file the name filename is required. \n";
-               helpString += "If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n";
-               helpString += "The get.oturep command should be in the following format: get.oturep(phylip=yourDistanceMatrix, fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n";
-               helpString += "Example get.oturep(phylip=amazon.dist, fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups).\n";
-               helpString += "The default value for label is all labels in your inputfile.\n";
-               helpString += "The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n";
-               helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n";
-               helpString += "The weighted parameter allows you to indicate that want to find the weighted representative. You must provide a namesfile to set weighted to true.  The default value is false.\n";
-               helpString += "The representative is found by selecting the sequence that has the smallest total distance to all other sequences in the OTU. If a tie occurs the smallest average distance is used.\n";
-               helpString += "For weighted = false, mothur assumes the distance file contains only unique sequences, the list file may contain all sequences, but only the uniques are considered to become the representative. If your distance file contains all the sequences it would become weighted=true.\n";
-               helpString += "For weighted = true, mothur assumes the distance file contains only unique sequences, the list file must contain all sequences, all sequences are considered to become the representative, but unique name will be used in the output for consistency.\n";
-               helpString += "If your distance file contains all the sequence and you do not provide a name file, the weighted representative will be given, unless your listfile is unique. If you provide a namefile, then you can select weighted or unweighted.\n";
-               helpString += "The group parameter allows you provide a group file.\n";
-               helpString += "The groups parameter allows you to indicate that you want representative sequences for each group specified for each OTU, group name should be separated by dashes. ex. groups=A-B-C.\n";
-               helpString += "The get.oturep command outputs a .fastarep and .rep.names file for each distance you specify, selecting one OTU representative for each bin.\n";
-               helpString += "If you provide a groupfile, then it also appends the names of the groups present in that bin.\n";
-               helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n";
+               helpString += "The sens.spec command....\n";
                return helpString;
        }
        catch(exception& e) {
@@ -63,6 +44,21 @@ string SensSpecCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
+string SensSpecCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "sensspec") {  pattern = "[filename],sensspec"; } 
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "SensSpecCommand", "getOutputPattern");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
 SensSpecCommand::SensSpecCommand(){    
        try {
                abort = true; calledHelp = true; 
@@ -80,18 +76,17 @@ SensSpecCommand::SensSpecCommand(){
 SensSpecCommand::SensSpecCommand(string option)  {
        try {
                
-               abort = false; calledHelp = false;   
+               abort = false; calledHelp = false;  
+               allLines = 1;
                
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
                
                else {
                        string temp;
-
-                       //valid paramters for this command
-                       string AlignArray[] =  {"list", "phylip", "column", "hard", "label", "cutoff", "precision", "outputdir", "inputdir"};
                        
-                       vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+                       vector<string> myArray = setParameters();
                        
                        OptionParser parser(option);
                        map<string,string> parameters = parser.getParameters();
@@ -135,16 +130,7 @@ SensSpecCommand::SensSpecCommand(string option)  {
                                        path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["column"] = inputDir + it->second;           }
-                               }
-                               
-                               //it = parameters.find("name");
-                               //user has given a template file
-                               //if(it != parameters.end()){ 
-                                       //path = m->hasPath(it->second);
-                                       //if the user has not given a path then, add inputdir. else leave path alone.
-                                       //if (path == "") {     parameters["name"] = inputDir + it->second;             }
-                               //}
-                               
+                               }                               
                        }
                        //check for required parameters
                        listFile = validParameter.validFile(parameters, "list", true);
@@ -154,16 +140,17 @@ SensSpecCommand::SensSpecCommand(string option)  {
                                else {  m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
                        }
                        else if (listFile == "not open") { abort = true; }      
+                       else { m->setListFile(listFile); }
                        
                        phylipfile = validParameter.validFile(parameters, "phylip", true);
                        if (phylipfile == "not found") { phylipfile = "";  }
                        else if (phylipfile == "not open") { abort = true; }    
-                       else { distFile = phylipfile; format = "phylip";   }
+                       else { distFile = phylipfile; format = "phylip"; m->setPhylipFile(phylipfile);  }
                        
                        columnfile = validParameter.validFile(parameters, "column", true);
                        if (columnfile == "not found") { columnfile = ""; }
                        else if (columnfile == "not open") { abort = true; }    
-                       else { distFile = columnfile; format = "column";   }
+                       else { distFile = columnfile; format = "column";   m->setColumnFile(columnfile); }
                        
                        if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these?
                                //give priority to column, then phylip
@@ -194,23 +181,24 @@ SensSpecCommand::SensSpecCommand(string option)  {
                        else if(!m->isTrue(temp))       {       hard = 0;       }
                        else if(m->isTrue(temp))        {       hard = 1;       }
                        
-//                     temp = validParameter.validFile(parameters, "name", true);
-//                     if (temp == "not found")        {       nameFile = "";          }
-//                     else if(temp == "not open")     {       abort = true;           }
-//                     else                                            {       nameFile = temp;        }
-//                     cout << "name:\t" << nameFile << endl;
-
                        temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found") { temp = "-1.00"; }
-                       convert(temp, cutoff);  
+                       m->mothurConvert(temp, cutoff);  
 //                     cout << cutoff << endl;
                        
                        temp = validParameter.validFile(parameters, "precision", false);        if (temp == "not found") { temp = "100"; }
-                       convert(temp, precision);  
+                       m->mothurConvert(temp, precision);  
 //                     cout << precision << endl;
                        
-                       lineLabel = validParameter.validFile(parameters, "label", false);       if (lineLabel == "not found") { lineLabel = ""; }
+                       string label = validParameter.validFile(parameters, "label", false);                    
+                       if (label == "not found") { label = ""; }
+                       else { 
+                               if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
+                               else { allLines = 1;  }
+                       }
                        
-                       sensSpecFileName = listFile.substr(0,listFile.find_last_of('.')) + ".sensspec";
+            map<string, string> variables; 
+            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listFile));
+                       sensSpecFileName = getOutputFileName("sensspec",variables);
                }
        }
        catch(exception& e) {
@@ -223,12 +211,30 @@ SensSpecCommand::SensSpecCommand(string option)  {
 int SensSpecCommand::execute(){
        try{
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
-
+        
+        int startTime = time(NULL);
+        
+        //create list file with only unique names, saves time and memory by removing redundant names from list file that are not in the distance file.
+        string newListFile = preProcessList();
+        if (newListFile != "") { listFile = newListFile; }
+        
                setUpOutput();
                outputNames.push_back(sensSpecFileName); outputTypes["sensspec"].push_back(sensSpecFileName);
                if(format == "phylip")          {       processPhylip();        }
                else if(format == "column")     {       processColumn();        }
                
+        //remove temp file if created
+        if (newListFile != "") { m->mothurRemove(newListFile); }
+        
+               if (m->control_pressed) { m->mothurRemove(sensSpecFileName); return 0; }
+        
+        m->mothurOut("It took " + toString(time(NULL) - startTime) + " to run sens.spec."); m->mothurOutEndLine();
+        
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               m->mothurOut(sensSpecFileName); m->mothurOutEndLine();  
+               m->mothurOutEndLine();
+               
                
                return 0;       
        }
@@ -240,216 +246,383 @@ int SensSpecCommand::execute(){
 
 //***************************************************************************************************************
 
-void SensSpecCommand::processPhylip(){
+int SensSpecCommand::processPhylip(){
        try{
                //probably need some checking to confirm that the names in the distance matrix are the same as those in the list file
-               
-               ifstream inputListFile;
-               m->openInputFile(listFile, inputListFile);
-               
                string origCutoff = "";
                bool getCutoff = 0;
                if(cutoff == -1.00)     {       getCutoff = 1;                                                                                                                  }
-               else                            {       origCutoff = toString(cutoff);  cutoff += (0.49 / double(precision));   }
+               else                            {       origCutoff = toString(cutoff);  cutoff += (0.49 / double(precision));   }               
                
-               string label;
-               int numOTUs;
-
                map<string, int> seqMap;
                string seqList;
                
-               while(inputListFile){
-                       inputListFile >> label >> numOTUs;
-                       for(int i=0;i<numOTUs;i++){
-                               inputListFile >> seqList;
-                               int seqListLength = seqList.length();
-                               string seqName = "";
-                               for(int j=0;j<seqListLength;j++){
-                                       
-                                       if(seqList[j] == ','){
-                                               seqMap[seqName] = i;
-                                               seqName = "";
-                                       }
-                                       else{
-                                               seqName += seqList[j];
-                                       }
-                                       
-                               }
-                               seqMap[seqName] = i;
+               InputData input(listFile, "list");
+               ListVector* list = input.getListVector();
+               string lastLabel = list->getLabel();
+               
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> processedLabels;
+               set<string> userLabels = labels;
+               
+               
+               while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+                       
+                       if(m->control_pressed){
+                for (int i = 0; i < outputNames.size(); i++){  m->mothurRemove(outputNames[i]);  }  delete list;  return 0;
+            }
+                       
+                       if(allLines == 1 || labels.count(list->getLabel()) == 1){                       
+                               
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
+                               
+                               //process
+                               fillSeqMap(seqMap, list);
+                               process(seqMap, list->getLabel(), getCutoff, origCutoff);
                        }
-                       m->gobble(inputListFile);
+                       
+                       if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = list->getLabel();
+                               
+                               delete list;
+                               list = input.getListVector(lastLabel);
+                               
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
+                               
+                               //process
+                               fillSeqMap(seqMap, list);
+                               process(seqMap, list->getLabel(), getCutoff, origCutoff);
+                               
+                               //restore real lastlabel to save below
+                               list->setLabel(saveLabel);
+                       }               
+                       
+                       lastLabel = list->getLabel();                   
+                       
+                       delete list;
+                       list = input.getListVector();
+               }
+               
                
-                       int lNumSeqs = seqMap.size();
-                       int pNumSeqs = 0;
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                       m->mothurOut("Your file does not include the label " + *it); 
+                       if (processedLabels.count(lastLabel) != 1) {
+                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                               needToRun = true;
+                       }else {
+                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                       }
+               }
+               
+               //run last label if you need to
+               if (needToRun == true)  {
+                       if (list != NULL) {     delete list;    }
+                       list = input.getListVector(lastLabel);
+                       
+                       //process
+                       fillSeqMap(seqMap, list);
+                       process(seqMap, list->getLabel(), getCutoff, origCutoff);
+                       
+                       delete list;
+               }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SensSpecCommand", "processPhylip");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
 
-                       ifstream phylipFile;
-                       m->openInputFile(distFile, phylipFile);
-                       phylipFile >> pNumSeqs;
-                       if(pNumSeqs != lNumSeqs){       cout << "numSeq mismatch!" << endl;     }
-                       
-                       string seqName;
-                       double distance;
-                       vector<int> otuIndices(lNumSeqs, -1);
+int SensSpecCommand::fillSeqMap(map<string, int>& seqMap, ListVector*& list){
+       try {
+               //for each otu
+               for(int i=0;i<list->getNumBins();i++){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       string seqList = list->get(i);
+                       int seqListLength = seqList.length();
+                       string seqName = "";
+                       
+                       //parse bin by name, mapping each name to its otu number
+                       for(int j=0;j<seqListLength;j++){
                                
-                       truePositives = 0;
-                       falsePositives = 0;
-                       trueNegatives = 0;
-                       falseNegatives = 0;
-                       
-                       if(getCutoff == 1){
-                               if(label != "unique"){
-                                       origCutoff = label;
-                                       convert(label, cutoff);
-                                       if(hard == 0){  cutoff += (0.49 / double(precision));   }               
+                               if(seqList[j] == ','){
+                                       seqMap[seqName] = i;
+                                       seqName = "";
                                }
                                else{
-                                       origCutoff = "unique";
-                                       cutoff = 0.0000;
+                                       seqName += seqList[j];
                                }
+                               
                        }
-                                  
-                       cout << label << endl;
+                       seqMap[seqName] = i;
+               }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SensSpecCommand", "fillSeqMap");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+int SensSpecCommand::fillSeqPairSet(set<string>& seqPairSet, ListVector*& list){
+       try {
+               int numSeqs = 0;
+               
+               //for each otu
+               for(int i=0;i<list->getNumBins();i++){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       vector<string> seqNameVector;
+                       string bin = list->get(i);
+                       m->splitAtComma(bin, seqNameVector);
+                       
+                       numSeqs += seqNameVector.size();
+                       
+                       for(int j=0;j<seqNameVector.size();j++){
+                               string seqPairString = "";                              
+                               for(int k=0;k<j;k++){
+                                       if(seqNameVector[j] < seqNameVector[k]) {       seqPairString = seqNameVector[j] + '\t' + seqNameVector[k];     }
+                                       else                                                                    {       seqPairString = seqNameVector[k] + '\t' + seqNameVector[j];     }
+                                       seqPairSet.insert(seqPairString);
+                               }
+                       }
+               }
+               
+               return numSeqs;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SensSpecCommand", "fillSeqPairSet");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+int SensSpecCommand::process(map<string, int>& seqMap, string label, bool& getCutoff, string& origCutoff){
+       try {
+                                               
+               int lNumSeqs = seqMap.size();
+               int pNumSeqs = 0;
+               
+               ifstream phylipFile;
+               m->openInputFile(distFile, phylipFile);
+               phylipFile >> pNumSeqs;
+               if(pNumSeqs != lNumSeqs){       m->mothurOut("numSeq mismatch!\n"); /*m->control_pressed = true;*/ }
+               
+               string seqName;
+               double distance;
+               vector<int> otuIndices(lNumSeqs, -1);
+               
+               truePositives = 0;
+               falsePositives = 0;
+               trueNegatives = 0;
+               falseNegatives = 0;
+               
+               if(getCutoff == 1){
+                       if(label != "unique"){
+                               origCutoff = label;
+                               convert(label, cutoff);
+                               if(hard == 0){  cutoff += (0.49 / double(precision));   }               
+                       }
+                       else{
+                               origCutoff = "unique";
+                               cutoff = 0.0000;
+                       }
+               }
+               
+               m->mothurOut(label); m->mothurOutEndLine();
+               
+               for(int i=0;i<pNumSeqs;i++){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       phylipFile >> seqName;
+                       otuIndices[i] = seqMap[seqName];
                        
-                       for(int i=0;i<lNumSeqs;i++){
-                               phylipFile >> seqName;
-                               otuIndices[i] = seqMap[seqName];
+                       for(int j=0;j<i;j++){
+                               phylipFile >> distance;
                                
-                               for(int j=0;j<i;j++){
-                                       phylipFile >> distance;
-                                       
-                                       if(distance <= cutoff){
-                                               if(otuIndices[i] == otuIndices[j])      {       truePositives++;        }
-                                               else                                                            {       falseNegatives++;       }
-                                       }
-                                       else{
-                                               if(otuIndices[i] == otuIndices[j])      {       falsePositives++;       }
-                                               else                                                            {       trueNegatives++;        }
-                                       }
+                               if(distance <= cutoff){
+                                       if(otuIndices[i] == otuIndices[j])      {       truePositives++;        }
+                                       else                                                            {       falseNegatives++;       }
+                               }
+                               else{
+                                       if(otuIndices[i] == otuIndices[j])      {       falsePositives++;       }
+                                       else                                                            {       trueNegatives++;        }
                                }
                        }
-                       phylipFile.close();
+               }
+               phylipFile.close();
+               
+               outputStatistics(label, origCutoff);
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SensSpecCommand", "process");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+int SensSpecCommand::process(set<string>& seqPairSet, string label, bool& getCutoff, string& origCutoff, int numSeqs){
+       try {
+               int numDists = (numSeqs * (numSeqs-1) / 2);
+               
+               ifstream columnFile;
+               m->openInputFile(distFile, columnFile);
+               string seqNameA, seqNameB, seqPairString;
+               double distance;
+               
+               truePositives = 0;
+               falsePositives = 0;
+               trueNegatives = numDists;
+               falseNegatives = 0;
+               
+               if(getCutoff == 1){
+                       if(label != "unique"){
+                               origCutoff = label;
+                               convert(label, cutoff);
+                               if(hard == 0){  cutoff += (0.49 / double(precision));   }               
+                       }
+                       else{
+                               origCutoff = "unique";
+                               cutoff = 0.0000;
+                       }
+               }
+               
+               m->mothurOut(label); m->mothurOutEndLine();
+               
+               while(columnFile){
+                       columnFile >> seqNameA >> seqNameB >> distance;
+                       if(seqNameA < seqNameB) {       seqPairString = seqNameA + '\t' + seqNameB;     }
+                       else                                    {       seqPairString = seqNameB + '\t' + seqNameA;     }
                        
-                       outputStatistics(label, origCutoff);
+                       set<string>::iterator it = seqPairSet.find(seqPairString);
+                       
+                       if(distance <= cutoff){
+                               if(it != seqPairSet.end()){
+                                       truePositives++;
+                                       seqPairSet.erase(it);   
+                               }
+                               else{
+                                       falseNegatives++;
+                               }
+                               trueNegatives--;
+                       }
+                       else if(it != seqPairSet.end()){        
+                               falsePositives++;
+                               trueNegatives--;
+                               seqPairSet.erase(it);   
+                       }
+                       
+                       m->gobble(columnFile);
                }
-               inputListFile.close();
-
+               falsePositives += seqPairSet.size();
+               
+               outputStatistics(label, origCutoff);
+               
+                               
+               return 0;
        }
        catch(exception& e) {
-               m->errorOut(e, "SensSpecCommand", "processPhylip");
+               m->errorOut(e, "SensSpecCommand", "process");
                exit(1);
        }
 }
-
 //***************************************************************************************************************
 
-void SensSpecCommand::processColumn(){
+int SensSpecCommand::processColumn(){
        try{            
-               ifstream inputListFile;
-               m->openInputFile(listFile, inputListFile);
-               
                string origCutoff = "";
                bool getCutoff = 0;
                if(cutoff == -1.00)     {       getCutoff = 1;                                                                                                                  }
                else                            {       origCutoff = toString(cutoff);  cutoff += (0.49 / double(precision));   }
                
                set<string> seqPairSet;
+               int numSeqs = 0;
+               
+               InputData input(listFile, "list");
+               ListVector* list = input.getListVector();
+               string lastLabel = list->getLabel();
+               
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> processedLabels;
+               set<string> userLabels = labels;
                
-               string label, seqList;
-               int numOTUs;
-               int numSeqs;
                
-               while(inputListFile){
-                       numSeqs = 0;
+               while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                        
-                       inputListFile >> label >> numOTUs;
-                       for(int i=0;i<numOTUs;i++){
-                               
-                               vector<string> seqNameVector;
+                       if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }  delete list;  return 0;  }
+                       
+                       if(allLines == 1 || labels.count(list->getLabel()) == 1){                       
                                
-                               inputListFile >> seqList;
-                               int seqListLength = seqList.length();
-                               string seqName = "";
-                               for(int j=0;j<seqListLength;j++){
-                                       
-                                       if(seqList[j] == ','){
-                                               seqNameVector.push_back(seqName);
-                                               seqName = "";
-                                       }
-                                       else{
-                                               seqName += seqList[j];
-                                       }
-                               }
-                               seqNameVector.push_back(seqName);
-
-                               numSeqs += seqNameVector.size();
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
                                
-                               int numSeqsInOTU = seqNameVector.size();
-                               for(int j=0;j<numSeqsInOTU;j++){
-                                       string seqPairString = "";                              
-                                       for(int k=0;k<j;k++){
-                                               if(seqNameVector[j] < seqNameVector[k]) {       seqPairString = seqNameVector[j] + '\t' + seqNameVector[k];     }
-                                               else                                                                    {       seqPairString = seqNameVector[k] + '\t' + seqNameVector[j];     }
-                                               seqPairSet.insert(seqPairString);
-                                       }
-                               }
-                       }
-                       m->gobble(inputListFile);
-                       
-                       int numDists = (numSeqs * (numSeqs-1) / 2);
-
-                       ifstream columnFile;
-                       m->openInputFile(distFile, columnFile);
-                       string seqNameA, seqNameB, seqPairString;
-                       double distance;
-                       
-                       truePositives = 0;
-                       falsePositives = 0;
-                       trueNegatives = numDists;
-                       falseNegatives = 0;
-                       
-                       if(getCutoff == 1){
-                               if(label != "unique"){
-                                       origCutoff = label;
-                                       convert(label, cutoff);
-                                       if(hard == 0){  cutoff += (0.49 / double(precision));   }               
-                               }
-                               else{
-                                       origCutoff = "unique";
-                                       cutoff = 0.0000;
-                               }
+                               //process
+                               numSeqs = fillSeqPairSet(seqPairSet, list);
+                               process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
                        }
                        
-                       cout << label << endl;
+                       if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = list->getLabel();
+                               
+                               delete list;
+                               list = input.getListVector(lastLabel);
+                               
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
+                               
+                               //process
+                               numSeqs = fillSeqPairSet(seqPairSet, list);
+                               process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
+                               
+                               //restore real lastlabel to save below
+                               list->setLabel(saveLabel);
+                       }               
                        
-                       while(columnFile){
-                               columnFile >> seqNameA >> seqNameB >> distance;
-                               if(seqNameA < seqNameB) {       seqPairString = seqNameA + '\t' + seqNameB;     }
-                               else                                    {       seqPairString = seqNameB + '\t' + seqNameA;     }
-
-                               set<string>::iterator it = seqPairSet.find(seqPairString);
+                       lastLabel = list->getLabel();                   
                        
-                               if(distance <= cutoff){
-                                       if(it != seqPairSet.end()){
-                                               truePositives++;
-                                               seqPairSet.erase(it);   
-                                       }
-                                       else{
-                                               falseNegatives++;
-                                       }
-                                       trueNegatives--;
-                               }
-                               else if(it != seqPairSet.end()){        
-                                       falsePositives++;
-                                       trueNegatives--;
-                                       seqPairSet.erase(it);   
-                               }
-                               
-                               m->gobble(columnFile);
+                       delete list;
+                       list = input.getListVector();
+               }
+               
+               
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                       m->mothurOut("Your file does not include the label " + *it); 
+                       if (processedLabels.count(lastLabel) != 1) {
+                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                               needToRun = true;
+                       }else {
+                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
                        }
-                       falsePositives += seqPairSet.size();
+               }
+               
+               //run last label if you need to
+               if (needToRun == true)  {
+                       if (list != NULL) {     delete list;    }
+                       list = input.getListVector(lastLabel);
                        
-                       outputStatistics(label, origCutoff);
+                       //process
+                       numSeqs = fillSeqPairSet(seqPairSet, list);
+                       delete list;
+                       process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
                }
+               
+               return 0;
        }
        catch(exception& e) {
                m->errorOut(e, "SensSpecCommand", "processColumn");
@@ -522,6 +695,122 @@ void SensSpecCommand::outputStatistics(string label, string cutoff){
                exit(1);
        }
 }
+//***************************************************************************************************************
+
+string SensSpecCommand::preProcessList(){
+    try {
+        set<string> uniqueNames;
+        //get unique names from distance file
+        if (format == "phylip") {
+            
+            ifstream phylipFile;
+            m->openInputFile(distFile, phylipFile);
+            string numTest;
+            int pNumSeqs;
+                       phylipFile >> numTest;
+                       
+                       if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
+            else {
+                m->mothurConvert(numTest, pNumSeqs);
+            }
+            phylipFile >> pNumSeqs; m->gobble(phylipFile);
+            
+            string seqName;
+            double distance;
+            
+            for(int i=0;i<pNumSeqs;i++){
+                
+                if (m->control_pressed) { return ""; }
+                
+                phylipFile >> seqName; 
+                uniqueNames.insert(seqName);
+                
+                for(int j=0;j<i;j++){
+                    phylipFile >> distance;
+                }
+                m->gobble(phylipFile);
+            }
+            phylipFile.close();
+        }else {
+            ifstream columnFile;
+            m->openInputFile(distFile, columnFile);
+            string seqNameA, seqNameB;
+            double distance;
+            
+            while(columnFile){
+                if (m->control_pressed) { return ""; }
+                columnFile >> seqNameA >> seqNameB >> distance;
+                uniqueNames.insert(seqNameA); uniqueNames.insert(seqNameB);
+                m->gobble(columnFile);
+            }
+            columnFile.close();
+        }
+        
+        //read list file, if numSeqs > unique names then remove redundant names
+        string newListFile = listFile + ".temp";
+        ofstream out;
+        m->openOutputFile(newListFile, out);
+        ifstream in;
+               m->openInputFile(listFile, in);
+               
+               bool wroteSomething = false;
+               
+               while(!in.eof()){
+                       
+                       if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(newListFile);  return ""; }
+            
+                       //read in list vector
+                       ListVector list(in);
+            
+            //listfile is already unique
+            if (list.getNumSeqs() == uniqueNames.size()) { in.close(); out.close(); m->mothurRemove(newListFile);  return ""; }
+                       
+                       //make a new list vector
+                       ListVector newList;
+                       newList.setLabel(list.getLabel());
+                       
+                       //for each bin
+                       for (int i = 0; i < list.getNumBins(); i++) {
+                
+                               //parse out names that are in accnos file
+                               string binnames = list.get(i);
+                vector<string> bnames;
+                m->splitAtComma(binnames, bnames);
+                               
+                               string newNames = "";
+                for (int i = 0; i < bnames.size(); i++) {
+                                       string name = bnames[i];
+                                       //if that name is in the .accnos file, add it
+                                       if (uniqueNames.count(name) != 0) {  newNames += name + ",";  }
+                               }
+                
+                               //if there are names in this bin add to new list
+                               if (newNames != "") { 
+                                       newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
+                                       newList.push_back(newNames);    
+                               }
+                       }
+            
+                       //print new listvector
+                       if (newList.getNumBins() != 0) {
+                               wroteSomething = true;
+                               newList.print(out);
+                       }
+                       
+                       m->gobble(in);
+               }
+               in.close();     
+               out.close();
+
+        if (wroteSomething) { return newListFile; }
+        return ""; 
+    }
+    catch(exception& e) {
+        m->errorOut(e, "SensSpecCommand", "preProcessList");
+        exit(1);
+    }
+}
+
 
 //***************************************************************************************************************