]> git.donarmstrong.com Git - mothur.git/commitdiff
fixed subsample name file name issue. added count parameter to cluster command....
authorSarah Westcott <mothur.westcott@gmail.com>
Mon, 30 Jul 2012 19:25:39 +0000 (15:25 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Mon, 30 Jul 2012 19:25:39 +0000 (15:25 -0400)
23 files changed:
cluster.cpp
clustercommand.cpp
clustercommand.h
counttable.cpp
counttable.h
createdatabasecommand.cpp
getcurrentcommand.cpp
mgclustercommand.cpp
mothurout.cpp
nameassignment.cpp
nameassignment.hpp
optionparser.cpp
readcolumn.cpp
readcolumn.h
readmatrix.hpp
readphylip.cpp
readphylip.h
screenseqscommand.cpp
setcurrentcommand.cpp
setcurrentcommand.h
sparsedistancematrix.cpp
subsamplecommand.cpp
summarycommand.cpp

index 2a3e751c5897e34039e6b9d15f1e0a908ed60556..18133a29030cc47d10ab7a1c7f3572cec74e4c29 100644 (file)
@@ -64,7 +64,7 @@ void Cluster::update(double& cutOFF){
         nRowCells = dMatrix->seqVec[smallRow].size();
         
                vector<int> foundCol(nColCells, 0);
-        //cout << "small cell: " << smallRow << '\t' << smallCol << endl;  
+        //cout << dMatrix->getNNodes() << " small cell: " << smallRow << '\t' << smallCol << endl;  
                int search;
                bool changed;
         
index 56ce8bb19e19d10b7181512b31d422d3d6b2a32f..19eaf85bc04fdce079f21a189c7d5bd4baa6eca0 100644 (file)
 #include "readmatrix.hpp"
 #include "clusterdoturcommand.h"
 
+
 //**********************************************************************************************************************
 vector<string> ClusterCommand::setParameters(){        
        try {
                CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pphylip);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName",false,false); parameters.push_back(pname);
-               CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "ColumnName",false,false); parameters.push_back(pcolumn);              
+               CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName",false,false); parameters.push_back(pname);
+               CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pcount);
+        CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "ColumnName",false,false); parameters.push_back(pcolumn);             
                CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff);
                CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
                CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod);
@@ -42,7 +44,7 @@ vector<string> ClusterCommand::setParameters(){
 string ClusterCommand::getHelpString(){        
        try {
                string helpString = "";
-               helpString += "The cluster command parameter options are phylip, column, name, method, cuttoff, hard, precision, sim, showabund and timing. Phylip or column and name are required, unless you have a valid current file.\n";
+               helpString += "The cluster command parameter options are phylip, column, name, count, method, cuttoff, hard, precision, sim, showabund and timing. Phylip or column and name are required, unless you have a valid current file.\n";
                helpString += "The cluster command should be in the following format: \n";
                helpString += "cluster(method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n";
                helpString += "The acceptable cluster methods are furthest, nearest, average and weighted.  If no method is provided then average is assumed.\n";       
@@ -169,6 +171,11 @@ ClusterCommand::ClusterCommand(string option)  {
                        if (namefile == "not open") { abort = true; }   
                        else if (namefile == "not found") { namefile = ""; }
                        else { m->setNameFile(namefile); }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { abort = true; countfile = ""; }  
+                       else if (countfile == "not found") { countfile = ""; }
+                       else { m->setCountTableFile(countfile); }
                        
                        if ((phylipfile == "") && (columnfile == "")) { 
                                //is there are current file available for either of these?
@@ -187,16 +194,22 @@ ClusterCommand::ClusterCommand(string option)  {
                        else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a cluster command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
                        
                        if (columnfile != "") {
-                               if (namefile == "") 
+                               if ((namefile == "") && (countfile == ""))
                                        namefile = m->getNameFile(); 
                                        if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
                                        else { 
-                                               m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine(); 
-                                               abort = true; 
+                                               countfile = m->getCountTableFile();
+                        if (countfile != "") {  m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+                        else { 
+                            m->mothurOut("You need to provide a namefile or countfile if you are going to use the column format."); m->mothurOutEndLine(); 
+                            abort = true; 
+                        }      
                                        }       
                                }
                        }
                        
+            if ((countfile != "") && (namefile != "")) { m->mothurOut("When executing a cluster command you must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
+            
                        //check for optional parameter and set defaults
                        // ...at some point should added some additional type checking...
                        //get user cutoff and precision or use defaults
@@ -253,6 +266,7 @@ int ClusterCommand::execute(){
                        //run unique.seqs for deconvolute results
                        string inputString = "phylip=" + distfile;
                        if (namefile != "") { inputString += ", name=" + namefile; }
+            else if (countfile != "") { inputString += ", count=" + countfile; }
                        inputString += ", precision=" + toString(precision);
                        inputString += ", method=" + method;
                        if (hard)       { inputString += ", hard=T";    }
@@ -281,22 +295,30 @@ int ClusterCommand::execute(){
                read->setCutoff(cutoff);
                
                NameAssignment* nameMap = NULL;
+        CountTable* ct = NULL;
                if(namefile != ""){     
                        nameMap = new NameAssignment(namefile);
                        nameMap->readMap();
-               }
+            read->read(nameMap);
+               }else if (countfile != "") {
+            ct = new CountTable();
+            ct->readTable(countfile);
+            read->read(ct);
+        }
                
-               read->read(nameMap);
                list = read->getListVector();
                matrix = read->getDMatrix();
-               rabund = new RAbundVector(list->getRAbundVector());
+        
+               if(countfile != "") {
+            rabund = new RAbundVector();
+            createRabund(ct, list, rabund); //creates an rabund that includes the counts for the unique list
+            delete ct;
+        }else { rabund = new RAbundVector(list->getRAbundVector()); }
                delete read;
                
                if (m->control_pressed) { //clean up
-                       delete list; delete matrix; delete rabund;
-                       sabundFile.close();rabundFile.close();listFile.close();
-                       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        } outputTypes.clear();
-                       return 0;
+                       delete list; delete matrix; delete rabund; if(countfile == ""){rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); }
+                       listFile.close(); m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0;
                }
                
                //create cluster
@@ -311,15 +333,19 @@ int ClusterCommand::execute(){
                
         string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
         string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
-        string listFileName = fileroot+ tag + "." + getOutputFileNameTag("list");
+        string listFileName = fileroot+ tag + ".";
+        if (countfile != "") { listFileName += "unique_"; }
+        listFileName += getOutputFileNameTag("list");
         
-               m->openOutputFile(sabundFileName,       sabundFile);
-               m->openOutputFile(rabundFileName,       rabundFile);
+        if (countfile == "") {
+            m->openOutputFile(sabundFileName,  sabundFile);
+            m->openOutputFile(rabundFileName,  rabundFile);
+            outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
+            outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
+
+        }
                m->openOutputFile(listFileName, listFile);
-               
-               outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
-               outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
-               outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
+        outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
                
                
                time_t estart = time(NULL);
@@ -337,9 +363,8 @@ int ClusterCommand::execute(){
                
                        if (m->control_pressed) { //clean up
                                delete list; delete matrix; delete rabund; delete cluster;
-                               sabundFile.close();rabundFile.close();listFile.close();
-                               for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        } outputTypes.clear();
-                               return 0;
+                               if(countfile == "") {rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); }
+                listFile.close(); m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0;
                        }
                
                        if (print_start && m->isTrue(timing)) {
@@ -393,9 +418,10 @@ int ClusterCommand::execute(){
                delete list;
                delete rabund;
                delete cluster;
-               
-               sabundFile.close();
-               rabundFile.close();
+        if (countfile == "") {
+            sabundFile.close();
+            rabundFile.close();
+        }
                listFile.close();
        
                if (saveCutoff != cutoff) { 
@@ -454,14 +480,17 @@ void ClusterCommand::printData(string label){
                print_start = true;
                loops = 0;
                start = time(NULL);
-
-               oldRAbund.setLabel(label);
-               if (m->isTrue(showabund)) {
-                       oldRAbund.getSAbundVector().print(cout);
-               }
-               oldRAbund.print(rabundFile);
-               oldRAbund.getSAbundVector().print(sabundFile);
-       
+        
+        if (countfile == "") {
+            oldRAbund.print(rabundFile);
+            oldRAbund.getSAbundVector().print(sabundFile);
+        }
+        
+        oldRAbund.setLabel(label);
+        if (m->isTrue(showabund)) {
+            oldRAbund.getSAbundVector().print(cout);
+        }
+        
                oldList.setLabel(label);
                oldList.print(listFile);
        }
@@ -473,3 +502,25 @@ void ClusterCommand::printData(string label){
 
 }
 //**********************************************************************************************************************
+
+int ClusterCommand::createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund){
+    try {
+        rabund->setLabel(list->getLabel());        
+        for(int i = 0; i < list->getNumBins(); i++) { 
+            if (m->control_pressed) { break; }
+            vector<string> binNames;
+            string bin = list->get(i);
+            m->splitAtComma(bin, binNames);
+            int total = 0;
+            for (int j = 0; j < binNames.size(); j++) { total += ct->getNumSeqs(binNames[j]);  }
+            rabund->push_back(total);   
+        }
+        return 0;
+    }
+    catch(exception& e) {
+               m->errorOut(e, "ClusterCommand", "createRabund");
+               exit(1);
+       }
+    
+}
+//**********************************************************************************************************************
index e3dd214446d31d187148de2be7dbf2ffbfebe96e..f70bb322c76cd3bf897fd5498333e5da379557fd 100644 (file)
@@ -15,6 +15,7 @@
 #include "listvector.hpp"
 #include "cluster.hpp"
 #include "sparsedistancematrix.h"
+#include "counttable.h"
 
 /* The cluster() command:
        The cluster command outputs a .list , .rabund and .sabund files.  
@@ -52,7 +53,7 @@ private:
 
        bool abort, hard, sim;
 
-       string method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, format, distfile;
+       string method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, format, distfile, countfile;
        double cutoff;
        string showabund, timing;
        int precision, length;
@@ -64,6 +65,8 @@ private:
        
        void printData(string label);
        vector<string> outputNames;
+    
+    int createRabund(CountTable*&, ListVector*&, RAbundVector*&);
 };
 
 #endif
index f632731cd8892da29469b31fdefe80d9557b52a0..a664228b17c35fff4ce48ca0ea5255b5b99bb1f3 100644 (file)
@@ -158,7 +158,41 @@ int CountTable::getNumSeqs(string seqName) {
        }
 }
 /************************************************************/
-//returns names of seqs
+//returns unique index for sequence like get in NameAssignment
+int CountTable::get(string seqName) {
+    try {
+        
+        map<string, int>::iterator it = indexNameMap.find(seqName);
+        if (it == indexNameMap.end()) {
+            m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+        }else { return it->second; }
+        
+        return -1;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "get");
+               exit(1);
+       }
+}
+/************************************************************/
+//create ListVector from uniques
+ListVector CountTable::getListVector() {
+    try {
+        ListVector list(indexNameMap.size());
+        for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) { 
+            if (m->control_pressed) { break; }
+            list.set(it->second, it->first); 
+        }
+        return list;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "getListVector");
+               exit(1);
+       }
+}
+
+/************************************************************/
+//returns the names of all unique sequences in file
 vector<string> CountTable::getNamesOfSeqs() {
     try {
         vector<string> names;
index ffc08167682bf8aa9f1a4a42267a90d666376250..8baff3080a5f5124214e0b6b6834a899b356f0f9 100644 (file)
@@ -37,6 +37,7 @@
 
 
 #include "mothurout.h"
+#include "listvector.hpp"
 
 class CountTable {
     
@@ -60,6 +61,9 @@ class CountTable {
         int getGroupIndex(string); //returns index in getGroupCounts vector of specific group
         vector<string> getNamesOfSeqs();
         int mergeCounts(string, string); //combines counts for 2 seqs, saving under the first name passed in.
+        int get(string); //returns unique sequence index for reading distance matrices like NameAssignment
+        ListVector getListVector();
+        int size() { return indexNameMap.size(); }
     
     private:
         string filename;
index 5919f0c005888fba82da66d10c8dd657ab5c366b..4331c29249338ec1a9d9e57b0328dcd65945bdf6 100644 (file)
@@ -396,7 +396,7 @@ int CreateDatabaseCommand::execute(){
                 
                 //sanity check
                 if (totalAbund != classifyOtuSizes[index]) {
-                    m->mothurOut("[ERROR: OTU " + m->currentBinLabels[h] + " contains " + toString(totalAbund) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
+                    m->mothurOut("[WARNING]: OTU " + m->currentBinLabels[h] + " contains " + toString(totalAbund) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); //m->control_pressed = true;   break;
                 }
                 
                 //output repSeq
index c3b5f0af58ab6ef0627a6a42784c76c2813184bf..86046b64375ebd151c27e5909c0efdc9d0149bbf 100644 (file)
@@ -140,7 +140,7 @@ int GetCurrentCommand::execute(){
                                        m->setFlowFile("");
                 }else if (types[i] == "biom") {
                                        m->setBiomFile("");
-                }else if (types[i] == "counttable") {
+                }else if (types[i] == "count") {
                                        m->setCountTableFile("");
                                }else if (types[i] == "processors") {
                                        m->setProcessors("1");
index dd98b3764884f5c50009d8e9e7422a71681d94c1..477450475d479eb4308d9c2503086b9d7c03b051 100644 (file)
@@ -268,7 +268,9 @@ int MGClusterCommand::execute(){
                
         string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
         string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
-        string listFileName = fileroot+ tag + "." + getOutputFileNameTag("list");
+        string listFileName = fileroot+ tag + ".";
+        if (countfile != "") { listFileName += "unique_"; }
+        listFileName += getOutputFileNameTag("list");
         
         if (countfile == "") {
             m->openOutputFile(sabundFileName,  sabundFile);
index dfcf25b4447d28362dae3f27a4e9cfd27e362d8e..9704464bf80326c87d4afa556e3353b95fcbf37e 100644 (file)
@@ -43,7 +43,7 @@ set<string> MothurOut::getCurrentTypes()  {
         types.insert("tree");
         types.insert("flow");
         types.insert("biom");
-        types.insert("counttable");
+        types.insert("count");
         types.insert("processors");
 
                return types;
@@ -79,7 +79,7 @@ void MothurOut::printCurrentFiles()  {
                if (treefile != "")                     {  mothurOut("tree=" + treefile); mothurOutEndLine();                           }
                if (flowfile != "")                     {  mothurOut("flow=" + flowfile); mothurOutEndLine();                           }
         if (biomfile != "")                    {  mothurOut("biom=" + biomfile); mothurOutEndLine();                           }
-        if (counttablefile != "")      {  mothurOut("counttable=" + counttablefile); mothurOutEndLine();       }
+        if (counttablefile != "")      {  mothurOut("count=" + counttablefile); mothurOutEndLine();    }
                if (processors != "1")          {  mothurOut("processors=" + processors); mothurOutEndLine();           }
                
        }
@@ -1052,6 +1052,9 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle){
 
 int MothurOut::renameFile(string oldName, string newName){
        try {
+        
+        if (oldName == newName) { return 0; }
+        
                ifstream inTest;
                int exist = openInputFile(newName, inTest, "");
                inTest.close();
index 126c4e0c594b6745ec6a7c4fcd5228c717d67677..1e42111e574baacf2eb2f5c3407416c9029f9041 100644 (file)
@@ -9,14 +9,15 @@ NameAssignment::NameAssignment(string nameMapFile){
        m->openInputFile(nameMapFile, fileHandle);
        
 }
-
+//**********************************************************************************************************************
+NameAssignment::NameAssignment(){ m = MothurOut::getInstance(); }
 //**********************************************************************************************************************
 
 void NameAssignment::readMap(){
        try{
                string firstCol, secondCol, skip;
        //      int index = 0;
-       
+        
                
                map<string, int>::iterator itData;
                int rowIndex = 0;
index 758a080b9d01ffea80e3b19076a7e0ce8121de0c..87f97771392c4eef78854053aa441f5c43e50e76 100644 (file)
@@ -7,7 +7,7 @@
 class NameAssignment : public map<string,int> {
 public:
        NameAssignment(string);
-       NameAssignment(){};
+       NameAssignment();
        void readMap();
        ListVector getListVector();
        int get(string);
index e3850e52c56f629a6e5395203f31cc1299a6193b..a6d5c0a668be5ebdc259a3108d4ff11341f7cd00 100644 (file)
@@ -124,7 +124,7 @@ map<string, string> OptionParser::getParameters() {
                         it->second = m->getTaxonomyFile();
                     }else if (it->first == "biom") {
                         it->second = m->getBiomFile();
-                    }else if (it->first == "counttable") {
+                    }else if (it->first == "count") {
                             it->second = m->getCountTableFile();
                     }else {
                         m->mothurOut("[ERROR]: mothur does not save a current file for " + it->first); m->mothurOutEndLine();
index 665dcabd5db7fd17513d912bab974ae4087d449f..83d11c92221bcb3ffbb46ea097cff26b29cd0b6c 100644 (file)
@@ -54,7 +54,7 @@ int ReadColumnMatrix::read(NameAssignment* nameMap){
        
                        map<string,int>::iterator itA = nameMap->find(firstName);
                        map<string,int>::iterator itB = nameMap->find(secondName);
-                               
+
                        if(itA == nameMap->end()){  m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1);  }
                        if(itB == nameMap->end()){  m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1);  }
 
@@ -144,6 +144,124 @@ int ReadColumnMatrix::read(NameAssignment* nameMap){
                exit(1);
        }
 }
+/***********************************************************************/
+
+int ReadColumnMatrix::read(CountTable* countTable){
+       try {           
+        
+               string firstName, secondName;
+               float distance;
+               int nseqs = countTable->size();
+        
+        DMatrix->resize(nseqs);
+               list = new ListVector(countTable->getListVector());
+        
+               Progress* reading = new Progress("Reading matrix:     ", nseqs * nseqs);
+        
+               int lt = 1;
+               int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose
+               int refCol = 0; //shows up later - Cell(refCol,refRow).  If it does, then its a square matrix
+        
+               //need to see if this is a square or a triangular matrix...
+               
+               while(fileHandle && lt == 1){  //let's assume it's a triangular matrix...
+            
+            
+                       fileHandle >> firstName >> secondName >> distance;      // get the row and column names and distance
+                       
+                       if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }
+            
+                       int itA = countTable->get(firstName);
+                       int itB = countTable->get(secondName);
+            
+            if (m->control_pressed) { exit(1); }
+            
+                       if (distance == -1) { distance = 1000000; }
+                       else if (sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.
+                       
+                       if(distance < cutoff && itA != itB){
+                               if(itA > itB){
+                    PDistCell value(itA, distance);
+                    
+                    
+                                       if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
+                                               refRow = itA;
+                                               refCol = itB;
+                                               DMatrix->addCell(itB, value);
+                                       }
+                                       else if(refRow == itA && refCol == itB){
+                                               lt = 0;
+                                       }
+                                       else{
+                                               DMatrix->addCell(itB, value);
+                                       }
+                               }
+                               else if(itA < itB){
+                                       PDistCell value(itB, distance);
+                    
+                                       if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
+                                               refRow = itA;
+                                               refCol = itB;
+                                               DMatrix->addCell(itA, value);
+                                       }
+                                       else if(refRow == itB && refCol == itA){
+                                               lt = 0;
+                                       }
+                                       else{
+                                               DMatrix->addCell(itA, value);
+                                       }
+                               }
+                               reading->update(itA * nseqs);
+                       }
+                       m->gobble(fileHandle);
+               }
+        
+               if(lt == 0){  // oops, it was square
+            
+                       fileHandle.close();  //let's start over
+                       DMatrix->clear();  //let's start over
+            
+                       m->openInputFile(distFile, fileHandle);  //let's start over
+            
+                       while(fileHandle){
+                               fileHandle >> firstName >> secondName >> distance;
+                               
+                               if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }
+                
+                               int itA = countTable->get(firstName);
+                int itB = countTable->get(secondName);
+                
+                
+                if (m->control_pressed) { exit(1); }
+                               
+                               if (distance == -1) { distance = 1000000; }
+                               else if (sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.
+                               
+                               if(distance < cutoff && itA > itB){
+                    PDistCell value(itA, distance);
+                                       DMatrix->addCell(itB, value);
+                                       reading->update(itA * nseqs);
+                               }
+                
+                               m->gobble(fileHandle);
+                       }
+               }
+               
+               if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }
+               
+               reading->finish();
+               fileHandle.close();
+        
+               list->setLabel("0");
+               
+               return 1;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ReadColumnMatrix", "read");
+               exit(1);
+       }
+}
 
 /***********************************************************************/
 ReadColumnMatrix::~ReadColumnMatrix(){}
index 415bef2fddf5a6272261cab2b0c364c64411fd8e..ff95aff3f8edfc6f85ece8860e78308f5bea191d 100644 (file)
@@ -20,6 +20,7 @@ public:
        ReadColumnMatrix(string, bool);
        ~ReadColumnMatrix();
        int read(NameAssignment*);
+    int read(CountTable*);
 private:
        ifstream fileHandle;
        string distFile;
index 5ef8afa9d7846a18957ac30cb483e0be98f6450f..90d5b430de4a1ce73e5a9c03880333c4b39febad 100644 (file)
@@ -13,6 +13,7 @@
 #include "mothur.h"
 #include "listvector.hpp"
 #include "nameassignment.hpp"
+#include "counttable.h"
 #include "sparsedistancematrix.h"
 
 class SparseMatrix;
@@ -23,6 +24,7 @@ public:
        ReadMatrix(){ DMatrix = new SparseDistanceMatrix(); m = MothurOut::getInstance();  }
        virtual ~ReadMatrix() {}
        virtual int read(NameAssignment*){ return 1; }
+    virtual int read(CountTable*){ return 1; }
        
        void setCutoff(float c)                 {       cutoff = c;             }
     SparseDistanceMatrix* getDMatrix()         {       return DMatrix;         }
index d256f92a816b03c101a01c12fdf19f4cace6678a..6de23e83279482127f5f261c53f94c345585455d 100644 (file)
@@ -200,7 +200,194 @@ int ReadPhylipMatrix::read(NameAssignment* nameMap){
                 exit(1);
         }
        }
+/***********************************************************************/
+
+int ReadPhylipMatrix::read(CountTable* countTable){
+    try {
+        
+        float distance;
+        int square, nseqs; 
+        string name;
+        vector<string> matrixNames;
+        
+        string numTest;
+        fileHandle >> numTest >> name;
+        
+        if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
+        else { convert(numTest, nseqs); }
+        
+        matrixNames.push_back(name);
+        
+        if(countTable == NULL){
+            list = new ListVector(nseqs);
+            list->set(0, name);
+        }
+        else{  list = new ListVector(countTable->getListVector()); }
+        
+        if (m->control_pressed) { return 0; }
+        
+        char d;
+        while((d=fileHandle.get()) != EOF){
+            
+            if(isalnum(d)){
+                square = 1;
+                fileHandle.putback(d);
+                for(int i=0;i<nseqs;i++){
+                    fileHandle >> distance;
+                }
+                break;
+            }
+            if(d == '\n'){
+                square = 0;
+                break;
+            }
+        }
+        
+        Progress* reading;
+        DMatrix->resize(nseqs);
+        
+        if(square == 0){
+            
+            reading = new Progress("Reading matrix:     ", nseqs * (nseqs - 1) / 2);
+            
+            int        index = 0;
+            
+            for(int i=1;i<nseqs;i++){
+                if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }
+                
+                fileHandle >> name;
+                matrixNames.push_back(name);
+                
+                
+                //there's A LOT of repeated code throughout this method...
+                if(countTable == NULL){
+                    list->set(i, name);
+                    
+                    for(int j=0;j<i;j++){
+                        
+                        if (m->control_pressed) { delete reading; fileHandle.close(); return 0;  }
+                        
+                        fileHandle >> distance;
+                        
+                        if (distance == -1) { distance = 1000000; }
+                        else if (sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.
+                        
+                        if(distance < cutoff){
+                            PDistCell value(i, distance);
+                            DMatrix->addCell(j, value);
+                        }
+                        index++;
+                        reading->update(index);
+                    }
+                    
+                }
+                else{
+                    for(int j=0;j<i;j++){
+                        fileHandle >> distance;
+                        
+                        if (m->control_pressed) { delete reading; fileHandle.close(); return 0;  }
+                        
+                        if (distance == -1) { distance = 1000000; }
+                        else if (sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.
+                        
+                        if(distance < cutoff){
+                            int iIndex = countTable->get(matrixNames[i]);
+                            int jIndex = countTable->get(matrixNames[j]);
+                            
+                            if (m->control_pressed) { delete reading; fileHandle.close(); return 0;  }
+                            if (iIndex < jIndex) { 
+                                PDistCell value(jIndex, distance);
+                                DMatrix->addCell(iIndex, value);
+                            }else {
+                                PDistCell value(iIndex, distance);
+                                DMatrix->addCell(jIndex, value);
 
+                            }
+                        }
+                        index++;
+                        reading->update(index);
+                    }
+                }
+            }
+        }
+        else{
+            
+            reading = new Progress("Reading matrix:     ", nseqs * nseqs);
+            
+            int index = nseqs;
+            
+            for(int i=1;i<nseqs;i++){
+                fileHandle >> name;                
+                matrixNames.push_back(name);
+                
+                
+                
+                if(countTable == NULL){
+                    list->set(i, name);
+                    for(int j=0;j<nseqs;j++){
+                        fileHandle >> distance;
+                        
+                        if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }
+                        
+                        if (distance == -1) { distance = 1000000; }
+                        else if (sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.
+                        
+                        if(distance < cutoff && j < i){
+                            PDistCell value(i, distance);
+                            DMatrix->addCell(j, value);
+                        }
+                        index++;
+                        reading->update(index);
+                    }
+                    
+                }
+                else{
+                    for(int j=0;j<nseqs;j++){
+                        fileHandle >> distance;
+                        
+                        if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }
+                        
+                        if (distance == -1) { distance = 1000000; }
+                        else if (sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.                                                        
+                        
+                        if(distance < cutoff && j < i){
+                            int iIndex = countTable->get(matrixNames[i]);
+                            int jIndex = countTable->get(matrixNames[j]);
+                            
+                            if (m->control_pressed) { delete reading; fileHandle.close(); return 0;  }
+                            if (iIndex < jIndex) { 
+                                PDistCell value(jIndex, distance);
+                                DMatrix->addCell(iIndex, value);
+                            }else {
+                                PDistCell value(iIndex, distance);
+                                DMatrix->addCell(jIndex, value);
+                                
+                            }
+                        }
+                        index++;
+                        reading->update(index);
+                    }
+                }
+            }
+        }
+        
+        if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }
+        
+        reading->finish();
+        delete reading;
+        
+        list->setLabel("0");
+        fileHandle.close();
+        
+        
+        return 1;
+        
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ReadPhylipMatrix", "read");
+        exit(1);
+    }
+}
 /***********************************************************************/
 ReadPhylipMatrix::~ReadPhylipMatrix(){}
 /***********************************************************************/
index 244cc47e4085f2820f4fef01adddf0317075e88b..c7720324ce261c475e2e83c99e2f0e0949083c81 100644 (file)
@@ -20,6 +20,7 @@ public:
        ReadPhylipMatrix(string, bool);
        ~ReadPhylipMatrix();
        int read(NameAssignment*);
+    int read(CountTable*);
 private:
        ifstream fileHandle;
        string distFile;
index 7f385d39c729e1acbc2d33325e9640c706f095e2..6a9a61323b50a4ea688ba9faa7ea855a233bbab9 100644 (file)
@@ -48,8 +48,8 @@ string ScreenSeqsCommand::getHelpString(){
                helpString += "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, group, qfile, alignreport, taxonomy, optimize, criteria and processors.\n";
                helpString += "The fasta parameter is required.\n";
                helpString += "The alignreport and taxonomy parameters allow you to remove bad seqs from taxonomy and alignreport files.\n";
-               helpString += "The start parameter .... The default is -1.\n";
-               helpString += "The end parameter .... The default is -1.\n";
+               helpString += "The start parameter is used to set a position the \"good\" sequences must start by. The default is -1.\n";
+               helpString += "The end parameter is used to set a position the \"good\" sequences must end after. The default is -1.\n";
                helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
                helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
                helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
@@ -1236,7 +1236,6 @@ int ScreenSeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File&
                        int length = MPIPos[start+i+1] - MPIPos[start+i];
 
                        char* buf4 = new char[length];
-                       memcpy(buf4, outputString.c_str(), length);
 
                        MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
                        
index 96735405c5e148159ab762a720a93f88ec2354ee..27d7b92764bda85599b2f18cbdafb3e486e704ef 100644 (file)
@@ -32,6 +32,7 @@ vector<string> SetCurrentCommand::setParameters(){
                CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(ptree);
                CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pshared);
                CommandParameter pordergroup("ordergroup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pordergroup);
+        CommandParameter pcount("count", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pcount);
                CommandParameter prelabund("relabund", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(prelabund);
                CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(psff);
                CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
@@ -53,7 +54,7 @@ string SetCurrentCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The set.current command allows you to set the current files saved by mothur.\n";
-               helpString += "The set.current command parameters are: clear, phylip, column, list, rabund, sabund, name, group, design, order, tree, shared, ordergroup, relabund, fasta, qfile, sff, oligos, accnos, biom and taxonomy.\n";
+               helpString += "The set.current command parameters are: clear, phylip, column, list, rabund, sabund, name, group, design, order, tree, shared, ordergroup, relabund, fasta, qfile, sff, oligos, accnos, biom, count and taxonomy.\n";
                helpString += "The clear paramter is used to indicate which file types you would like to clear values for, multiple types can be separated by dashes.\n";
                helpString += "The set.current command should be in the following format: \n";
                helpString += "set.current(fasta=yourFastaFile) or set.current(fasta=amazon.fasta, clear=name-accnos)\n";
@@ -210,6 +211,14 @@ SetCurrentCommand::SetCurrentCommand(string option)  {
                                        if (path == "") {       parameters["ordergroup"] = inputDir + it->second;               }
                                }
                                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
+                
                                it = parameters.find("relabund");
                                //user has given a template file
                                if(it != parameters.end()){ 
@@ -318,6 +327,11 @@ SetCurrentCommand::SetCurrentCommand(string option)  {
                        if (groupfile == "not open") { m->mothurOut("Ignoring: " + parameters["group"]); m->mothurOutEndLine(); groupfile = ""; }
                        else if (groupfile == "not found") {  groupfile = "";  }
                        if (groupfile != "") { m->setGroupFile(groupfile); }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { m->mothurOut("Ignoring: " + parameters["count"]); m->mothurOutEndLine(); countfile = ""; }
+                       else if (countfile == "not found") {  countfile = "";  }
+                       if (countfile != "") { m->setCountTableFile(countfile); }
                        
                        designfile = validParameter.validFile(parameters, "design", true);
                        if (designfile == "not open") { m->mothurOut("Ignoring: " + parameters["design"]); m->mothurOutEndLine(); designfile = ""; }
@@ -460,6 +474,8 @@ int SetCurrentCommand::execute(){
                                        m->setFlowFile("");
                 }else if (types[i] == "biom") {
                                        m->setBiomFile("");
+                }else if (types[i] == "count") {
+                                       m->setCountTableFile("");
                                }else if (types[i] == "processors") {
                                        m->setProcessors("1");
                                }else if (types[i] == "all") {
index becc4d4712bba2257018378424e4f555e1a6f42a..e741399844347f3db0a936cd2581ce9ba45dd7b2 100644 (file)
@@ -39,7 +39,7 @@ private:
        string clearTypes;
        vector<string> types;
        
-       string accnosfile, phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, designfile, taxonomyfile, biomfile;
+       string accnosfile, phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, designfile, taxonomyfile, biomfile, countfile;
        string orderfile, treefile, sharedfile, ordergroupfile, relabundfile, fastafile, qualfile, sfffile, oligosfile, processors, flowfile;
 
        
index 8035e138c261ae427f02c17eeeb60a5db7e8d12f..7d505239d88b4d817b3b6c5fd3609c6c18e785b4 100644 (file)
@@ -118,7 +118,7 @@ ull SparseDistanceMatrix::getSmallestCell(ull& row){
                        }
                }
         
-               random_shuffle(mins.begin(), mins.end());  //randomize the order of the iterators in the mins vector
+               //random_shuffle(mins.begin(), mins.end());  //randomize the order of the iterators in the mins vector
         
         row = mins[0].row;
         ull col = mins[0].col;
index 35441013919fbfe55452532e735b5bd1d7673f2f..f9cb1e60e43d59d9d1a6ce760b3031de0e92c301 100644 (file)
@@ -569,8 +569,8 @@ int SubSampleCommand::getSubSampleFasta() {
                        delete uniqueCommand;
                        m->mothurCalling = false;
             
-            m->renameFile(filenames["name"][0], outputNameFileName);
-            m->renameFile(filenames["fasta"][0], outputFileName);
+            m->renameFile(filenames["name"][0], outputNameFileName); 
+            m->renameFile(filenames["fasta"][0], outputFileName);  
             
                        outputTypes["name"].push_back(outputNameFileName);  outputNames.push_back(outputNameFileName);
 
index 9a6aac883c7d13af87d735df4a7d484dd379479b..7a167a6432015b41cbd2b8304aaa7c3415542624 100644 (file)
@@ -777,7 +777,7 @@ vector<string> SummaryCommand::createGroupSummaryFile(int numLines, int numCols,
             }
                        
                        temp.close();
-                       //m->mothurRemove(outputNames[i]);
+                       m->mothurRemove(outputNames[i]);
                }