]> git.donarmstrong.com Git - mothur.git/commitdiff
fixed trim.seqs bug with qtrim parameter and added num=1 special case to database...
authorwestcott <westcott>
Fri, 14 Jan 2011 16:09:49 +0000 (16:09 +0000)
committerwestcott <westcott>
Fri, 14 Jan 2011 16:09:49 +0000 (16:09 +0000)
17 files changed:
Mothur.xcodeproj/project.pbxproj
commandfactory.cpp
database.hpp
distancedb.cpp
getseqscommand.cpp
kmerdb.cpp
linearalgebra.cpp
linearalgebra.h
nmdscommand.cpp [new file with mode: 0644]
nmdscommand.h [new file with mode: 0644]
pcoacommand.cpp
pcoacommand.h
qualityscores.cpp
readphylipvector.cpp [new file with mode: 0644]
readphylipvector.h [new file with mode: 0644]
treegroupscommand.cpp
trimseqscommand.cpp

index c59979fe15bb1ce671c189216fdd3d9776d10280..1b87a4f360376eb978f94c7a1e846ec2a18ffb7c 100644 (file)
@@ -9,6 +9,8 @@
 /* Begin PBXBuildFile section */
                8DD76FB00486AB0100D96B5E /* mothur.1 in CopyFiles */ = {isa = PBXBuildFile; fileRef = C6A0FF2C0290799A04C91782 /* mothur.1 */; };
                A70332B712D3A13400761E33 /* makefile in Sources */ = {isa = PBXBuildFile; fileRef = A70332B512D3A13400761E33 /* makefile */; };
+               A713EBAC12DC7613000092AC /* readphylipvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBAB12DC7613000092AC /* readphylipvector.cpp */; };
+               A713EBED12DC7C5E000092AC /* nmdscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */; };
                A7E9B88112D37EC400DA6239 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B64F12D37EC300DA6239 /* ace.cpp */; };
                A7E9B88212D37EC400DA6239 /* aligncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65112D37EC300DA6239 /* aligncommand.cpp */; };
                A7E9B88312D37EC400DA6239 /* alignment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65312D37EC300DA6239 /* alignment.cpp */; };
 /* Begin PBXFileReference section */
                8DD76FB20486AB0100D96B5E /* Mothur */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = Mothur; sourceTree = BUILT_PRODUCTS_DIR; };
                A70332B512D3A13400761E33 /* makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; path = makefile; sourceTree = "<group>"; };
+               A713EBAA12DC7613000092AC /* readphylipvector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readphylipvector.h; sourceTree = "<group>"; };
+               A713EBAB12DC7613000092AC /* readphylipvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readphylipvector.cpp; sourceTree = "<group>"; };
+               A713EBEB12DC7C5E000092AC /* nmdscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = nmdscommand.h; sourceTree = "<group>"; };
+               A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = nmdscommand.cpp; sourceTree = "<group>"; };
                A7E9B64F12D37EC300DA6239 /* ace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ace.cpp; sourceTree = "<group>"; };
                A7E9B65012D37EC300DA6239 /* ace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ace.h; sourceTree = "<group>"; };
                A7E9B65112D37EC300DA6239 /* aligncommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligncommand.cpp; sourceTree = "<group>"; };
                                A7E9B76912D37EC400DA6239 /* nocommands.cpp */,
                                A7E9B76B12D37EC400DA6239 /* normalizesharedcommand.cpp */,
                                A7E9B76C12D37EC400DA6239 /* normalizesharedcommand.h */,
+                               A713EBEB12DC7C5E000092AC /* nmdscommand.h */,
+                               A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */,
                                A7E9B77912D37EC400DA6239 /* otuhierarchycommand.cpp */,
                                A7E9B77A12D37EC400DA6239 /* otuhierarchycommand.h */,
                                A7E9B77D12D37EC400DA6239 /* pairwiseseqscommand.cpp */,
                                A7E9B7BE12D37EC400DA6239 /* readphylip.h */,
                                A7E9B7BF12D37EC400DA6239 /* readtree.cpp */,
                                A7E9B7C012D37EC400DA6239 /* readtree.h */,
+                               A713EBAA12DC7613000092AC /* readphylipvector.h */,
+                               A713EBAB12DC7613000092AC /* readphylipvector.cpp */,
                                A7E9B84312D37EC400DA6239 /* splitmatrix.cpp */,
                                A7E9B84412D37EC400DA6239 /* splitmatrix.h */,
                        );
                                A70332B712D3A13400761E33 /* makefile in Sources */,
                                A7FC480E12D788F20055BC5C /* linearalgebra.cpp in Sources */,
                                A7FC486712D795D60055BC5C /* pcacommand.cpp in Sources */,
+                               A713EBAC12DC7613000092AC /* readphylipvector.cpp in Sources */,
+                               A713EBED12DC7C5E000092AC /* nmdscommand.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index f581b6db86bb7e8727c76ba28abc57dfe98c06f5..ae59a8dcef9c43927b08759108c65f4c63d6a0cb 100644 (file)
 #include "corraxescommand.h"
 #include "shhhercommand.h"
 #include "pcacommand.h"
+#include "nmdscommand.h"
 
 /*******************************************************/
 
@@ -215,6 +216,7 @@ CommandFactory::CommandFactory(){
        commands["consensus.seqs"]              = "consensus.seqs";
        commands["corr.axes"]                   = "corr.axes";
        commands["pca"]                                 = "pca";
+       commands["nmds"]                                = "nmds";
        commands["pairwise.seqs"]               = "MPIEnabled";
        commands["pipeline.pds"]                = "MPIEnabled";
        commands["classify.seqs"]               = "MPIEnabled"; 
@@ -337,6 +339,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "pre.cluster")                   {       command = new PreClusterCommand(optionString);                          }
                else if(commandName == "pcoa")                                  {       command = new PCOACommand(optionString);                                        }
                else if(commandName == "pca")                                   {       command = new PCACommand(optionString);                                         }
+               else if(commandName == "nmds")                                  {       command = new NMDSCommand(optionString);                                        }
                else if(commandName == "otu.hierarchy")                 {       command = new OtuHierarchyCommand(optionString);                        }
                else if(commandName == "set.dir")                               {       command = new SetDirectoryCommand(optionString);                        }
                else if(commandName == "set.logfile")                   {       command = new SetLogFileCommand(optionString);                          }
@@ -462,6 +465,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
                else if(commandName == "pre.cluster")                   {       pipecommand = new PreClusterCommand(optionString);                              }
                else if(commandName == "pcoa")                                  {       pipecommand = new PCOACommand(optionString);                                    }
                else if(commandName == "pca")                                   {       pipecommand = new PCACommand(optionString);                                             }
+               else if(commandName == "nmds")                                  {       pipecommand = new NMDSCommand(optionString);                                    }
                else if(commandName == "otu.hierarchy")                 {       pipecommand = new OtuHierarchyCommand(optionString);                    }
                else if(commandName == "set.dir")                               {       pipecommand = new SetDirectoryCommand(optionString);                    }
                else if(commandName == "set.logfile")                   {       pipecommand = new SetLogFileCommand(optionString);                              }
@@ -574,6 +578,7 @@ Command* CommandFactory::getCommand(string commandName){
                else if(commandName == "pre.cluster")                   {       shellcommand = new PreClusterCommand();                         }
                else if(commandName == "pcoa")                                  {       shellcommand = new PCOACommand();                                       }
                else if(commandName == "pca")                                   {       shellcommand = new PCACommand();                                        }
+               else if(commandName == "nmds")                                  {       shellcommand = new NMDSCommand();                                       }
                else if(commandName == "otu.hierarchy")                 {       shellcommand = new OtuHierarchyCommand();                       }
                else if(commandName == "set.dir")                               {       shellcommand = new SetDirectoryCommand();                       }
                else if(commandName == "set.logfile")                   {       shellcommand = new SetLogFileCommand();                         }
index 9293f13d5f091251d77115d2f3dc5344868d2c5c..bbe01c456acbb9e66c56127d9bc23371e09b5ddf 100644 (file)
@@ -20,6 +20,7 @@
 struct seqMatch {  //used to select top n matches
                int seq;
                int match;
+               seqMatch() {}
                seqMatch(int s, int m) : seq(s), match(m) {}
 };
 /**************************************************************************************************/
index ca6ffe8ba61217ac0b66b353e8f33461f4916e52..b5c22b35bf7a58addcff79651e6841b9506ec8e2 100644 (file)
@@ -47,7 +47,7 @@ vector<int> DistanceDB::findClosestSequences(Sequence* query, int numWanted){
                vector<int> topMatches;
                bool templateSameLength = true;
                string sequence = query->getAligned();
-               vector<seqDist> dists;
+               vector<seqDist> dists; 
                
                searchScore = -1.0;
        
@@ -56,24 +56,46 @@ vector<int> DistanceDB::findClosestSequences(Sequence* query, int numWanted){
                if (sequence.length() != templateSeqsLength) { templateSameLength = false; }
                
                if (templateSameLength && templateAligned) {
-                       //calc distance from this sequence to every sequence in the template
-                       for (int i = 0; i < data.size(); i++) {
-                               distCalculator->calcDist(*query, data[i]);
-                               float dist = distCalculator->getDist();
+                       if (numWanted != 1) {
                                
-                               //save distance to each template sequence
-                               seqDist temp(-1, i, dist);
-                               dists.push_back(temp);
-                       }
-                       
-                       sort(dists.begin(), dists.end(), compareSequenceDistance);  //sorts by distance lowest to highest
-                       
-                       //save distance of best match
-                       searchScore = dists[0].dist;
-                       
-                       //fill topmatches with numwanted closest sequences indexes
-                       for (int i = 0; i < numWanted; i++) {
-                               topMatches.push_back(dists[i].seq2);
+                               dists.resize(data.size());
+                               
+                               //calc distance from this sequence to every sequence in the template
+                               for (int i = 0; i < data.size(); i++) {
+                                       distCalculator->calcDist(*query, data[i]);
+                                       float dist = distCalculator->getDist();
+                                       
+                                       //save distance to each template sequence
+                                       dists[i].seq1 = -1;
+                                       dists[i].seq2 = i;
+                                       dists[i].dist = dist;
+                               }
+                               
+                               sort(dists.begin(), dists.end(), compareSequenceDistance);  //sorts by distance lowest to highest
+                               
+                               //save distance of best match
+                               searchScore = dists[0].dist;
+                               
+                               //fill topmatches with numwanted closest sequences indexes
+                               for (int i = 0; i < numWanted; i++) {
+                                       topMatches.push_back(dists[i].seq2);
+                               }
+                       }else {
+                               int bestIndex = 0;
+                               float smallDist = 100000;
+                               for (int i = 0; i < data.size(); i++) {
+                                       distCalculator->calcDist(*query, data[i]);
+                                       float dist = distCalculator->getDist();
+                                       
+                                       //are you smaller?
+                                       if (dist < smallDist) {
+                                               bestIndex = i;
+                                               smallDist = dist;
+                                       }
+                               }
+                               
+                               searchScore = smallDist;
+                               topMatches.push_back(bestIndex);
                        }
                
                }else{
index a16accbb8ac724b1fce6579a7bcad9b2f5b261c7..a191515cb601408b2c7d884b895f7e14d668d798 100644 (file)
@@ -190,6 +190,7 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
                        
                        accnosfile2 = validParameter.validFile(parameters, "accnos2", true);
                        if (accnosfile2 == "not open") { abort = true; }
+                       if (accnosfile2 == "not found") { accnosfile2 = ""; }
                        
                        fastafile = validParameter.validFile(parameters, "fasta", true);
                        if (fastafile == "not open") { abort = true; }
index 108b207d87c469b9f95df83921332b1bf5a756ae..e7e8ab2acbbf04c8bc30c6adb84fbd3880f7b2d8 100644 (file)
@@ -75,24 +75,39 @@ vector<int> KmerDB::findClosestSequences(Sequence* candidateSeq, int num){
                        }
                        timesKmerFound[kmerNumber] = 1;                                         //      ok, we've seen the kmer now
                }
-       
-               vector<seqMatch> seqMatches;
-               for(int i=0;i<numSeqs;i++){             
-                       seqMatch temp(i, matches[i]);
-                       seqMatches.push_back(temp);
-               }
                
-               //sorts putting largest matches first
-               sort(seqMatches.begin(), seqMatches.end(), compareSeqMatches);
+               if (num != 1) {
+                       vector<seqMatch> seqMatches; seqMatches.resize(numSeqs);
+                       for(int i=0;i<numSeqs;i++){             
+                               seqMatches[i].seq = i;
+                               seqMatches[i].match = matches[i];
+                       }
+                       
+                       //sorts putting largest matches first
+                       sort(seqMatches.begin(), seqMatches.end(), compareSeqMatches);
+                       
+                       searchScore = seqMatches[0].match;
+                       searchScore = 100 * searchScore / (float) numKmers;             //      return the Sequence object corresponding to the db
                
-               searchScore = seqMatches[0].match;
-               searchScore = 100 * searchScore / (float) numKmers;             //      return the Sequence object corresponding to the db
-       
-               //save top matches
-               for (int i = 0; i < num; i++) {
-                       topMatches.push_back(seqMatches[i].seq);
+                       //save top matches
+                       for (int i = 0; i < num; i++) {
+                               topMatches.push_back(seqMatches[i].seq);
+                       }
+               }else{
+                       int bestIndex = 0;
+                       int bestMatch = -1;
+                       for(int i=0;i<numSeqs;i++){     
+                               
+                               if (matches[i] > bestMatch) {
+                                       bestIndex = i;
+                                       bestMatch = matches[i];
+                               }
+                       }
+                       
+                       searchScore = bestMatch;
+                       searchScore = 100 * searchScore / (float) numKmers;             //      return the Sequence object corresponding to the db
+                       topMatches.push_back(bestIndex);
                }
-               
                return topMatches;              
        }
        catch(exception& e) {
index 192701af4738ffebc9190a54e5a4da1095679bfc..6b56597f10501b4720d77b28ca08961d2c35554e 100644 (file)
@@ -234,6 +234,7 @@ int LinearAlgebra::qtli(vector<double>& d, vector<double>& e, vector<vector<doub
        }
 }
 /*********************************************************************************************************************************/
+//groups by dimension
 vector< vector<double> > LinearAlgebra::calculateEuclidianDistance(vector< vector<double> >& axes, int dimensions){
        try {
                //make square matrix
@@ -252,38 +253,70 @@ vector< vector<double> > LinearAlgebra::calculateEuclidianDistance(vector< vecto
                                }
                        }
                        
-               }else if (dimensions == 2) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)
+               }else if (dimensions > 1) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)...
                        
                        for (int i = 0; i < dists.size(); i++) {
                                
                                if (m->control_pressed) { return dists; }
                                
                                for (int j = 0; j < i; j++) {
-                                       double firstDim = ((axes[i][0] - axes[j][0]) * (axes[i][0] - axes[j][0]));
-                                       double secondDim = ((axes[i][1] - axes[j][1]) * (axes[i][1] - axes[j][1]));
+                                       double sum = 0.0;
+                                       for (int k = 0; k < dimensions; k++) {
+                                               sum += ((axes[i][k] - axes[j][k]) * (axes[i][k] - axes[j][k]));
+                                       }
                                        
-                                       dists[i][j] = sqrt((firstDim + secondDim));
+                                       dists[i][j] = sqrt(sum);
                                        dists[j][i] = dists[i][j];
                                }
                        }
                        
-               }else if (dimensions == 3) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2 + (x3 - y3)^2)
+               }
+               
+               return dists;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance");
+               exit(1);
+       }
+}
+/*********************************************************************************************************************************/
+//returns groups by dimensions from dimensions by groups
+vector< vector<double> > LinearAlgebra::calculateEuclidianDistance(vector< vector<double> >& axes){
+       try {
+               //make square matrix
+               vector< vector<double> > dists; dists.resize(axes[0].size());
+               for (int i = 0; i < dists.size(); i++) {  dists[i].resize(axes[0].size(), 0.0); }
+               
+               if (axes.size() == 1) { //one dimension calc = abs(x-y)
                        
                        for (int i = 0; i < dists.size(); i++) {
                                
                                if (m->control_pressed) { return dists; }
                                
                                for (int j = 0; j < i; j++) {
-                                       double firstDim = ((axes[i][0] - axes[j][0]) * (axes[i][0] - axes[j][0]));
-                                       double secondDim = ((axes[i][1] - axes[j][1]) * (axes[i][1] - axes[j][1]));
-                                       double thirdDim = ((axes[i][2] - axes[j][2]) * (axes[i][2] - axes[j][2]));
+                                       dists[i][j] = abs(axes[0][i] - axes[0][j]);
+                                       dists[j][i] = dists[i][j];
+                               }
+                       }
+                       
+               }else if (axes.size() > 1) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)...
+                       
+                       for (int i = 0; i < dists[0].size(); i++) {
+                               
+                               if (m->control_pressed) { return dists; }
+                               
+                               for (int j = 0; j < i; j++) {
+                                       double sum = 0.0;
+                                       for (int k = 0; k < axes.size(); k++) {
+                                               sum += ((axes[k][i] - axes[k][j]) * (axes[k][i] - axes[k][j]));
+                                       }
                                        
-                                       dists[i][j] = sqrt((firstDim + secondDim + thirdDim));
+                                       dists[i][j] = sqrt(sum);
                                        dists[j][i] = dists[i][j];
                                }
                        }
                        
-               }else { m->mothurOut("[ERROR]: too many dimensions, aborting."); m->mothurOutEndLine(); m->control_pressed = true; }
+               }
                
                return dists;
        }
index 286649374e1bd5a4768c257f564e86fb8ca14567..70f7699b138e06a4fbf50bfde9e4c6df0e2e3831 100644 (file)
@@ -21,7 +21,8 @@ public:
        vector<vector<double> > matrix_mult(vector<vector<double> >, vector<vector<double> >);
        int tred2(vector<vector<double> >&, vector<double>&, vector<double>&);
        int qtli(vector<double>&, vector<double>&, vector<vector<double> >&);
-       vector< vector<double> > calculateEuclidianDistance(vector<vector<double> >&, int);
+       vector< vector<double> > calculateEuclidianDistance(vector<vector<double> >&, int); //pass in axes and number of dimensions
+       vector< vector<double> > calculateEuclidianDistance(vector<vector<double> >&); //pass in axes
        double calcPearson(vector<vector<double> >&, vector<vector<double> >&);
        
 private:
diff --git a/nmdscommand.cpp b/nmdscommand.cpp
new file mode 100644 (file)
index 0000000..a9d361d
--- /dev/null
@@ -0,0 +1,657 @@
+/*
+ *  nmdscommand.cpp
+ *  mothur
+ *
+ *  Created by westcott on 1/11/11.
+ *  Copyright 2011 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "nmdscommand.h"
+#include "readphylipvector.h"
+
+//**********************************************************************************************************************
+vector<string> NMDSCommand::getValidParameters(){      
+       try {
+               string Array[] =  {"phylip","axes","dimension","maxiters","step","outputdir","inputdir"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "NMDSCommand", "getValidParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+NMDSCommand::NMDSCommand(){    
+       try {
+               abort = true;
+               //initialize outputTypes
+               vector<string> tempOutNames;
+               outputTypes["nmds"] = tempOutNames;
+               outputTypes["stress"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "NMDSCommand", "NMDSCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> NMDSCommand::getRequiredParameters(){   
+       try {
+               string Array[] =  {"phylip"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "NMDSCommand", "getRequiredParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> NMDSCommand::getRequiredFiles(){        
+       try {
+               vector<string> myArray;
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "NMDSCommand", "getRequiredFiles");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+NMDSCommand::NMDSCommand(string option)  {
+       try {
+               abort = false;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"phylip","axes","dimension","maxiters","step","outputdir", "inputdir"};
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string, string> parameters = parser. getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string, string>::iterator it;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("phylip");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
+                               }
+                               
+                               it = parameters.find("axes");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["axes"] = inputDir + it->second;             }
+                               }
+                       }
+                       
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["nmds"] = tempOutNames;
+                       outputTypes["stress"] = tempOutNames;
+                       
+                       //required parameters
+                       phylipfile = validParameter.validFile(parameters, "phylip", true);
+                       if (phylipfile == "not open") { phylipfile = ""; abort = true; }
+                       else if (phylipfile == "not found") { phylipfile = ""; m->mothurOut("You must provide a distance file before running the nmds command."); m->mothurOutEndLine(); abort = true; }        
+                       
+                       axesfile = validParameter.validFile(parameters, "axes", true);
+                       if (axesfile == "not open") { axesfile = ""; abort = true; }
+                       else if (axesfile == "not found") { axesfile = "";  }                           
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
+                               outputDir = ""; 
+                               outputDir += m->hasPath(phylipfile); //if user entered a file with a path then preserve it      
+                       }
+                       
+                       string temp = validParameter.validFile(parameters, "dimension", false); if (temp == "not found") {      temp = "2";     }
+                       convert(temp, dimension);
+                       
+                       temp = validParameter.validFile(parameters, "maxiters", false); if (temp == "not found") {      temp = "1000";  }
+                       convert(temp, maxIters);
+                       
+                       temp = validParameter.validFile(parameters, "step", false);     if (temp == "not found") {      temp = "0.2";   }
+                       convert(temp, step);
+                       
+                       temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found") {      temp = "2";     }
+                       convert(temp, cutoff); 
+                       cutoff /= 100.0;
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "NMDSCommand", "NMDSCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+void NMDSCommand::help(){
+       try {
+               
+               m->mothurOut("The nmds command parameters are phylip, axes, dimension, maxiters, cutoff and step."); m->mothurOutEndLine();
+               m->mothurOut("The phylip parameter allows you to enter your distance file."); m->mothurOutEndLine();
+               m->mothurOut("The axes parameter allows you to enter a file containing a starting configuration."); m->mothurOutEndLine();
+               m->mothurOut("The dimension parameter allows you to select how many dimensions to use. Default=2"); m->mothurOutEndLine();
+               m->mothurOut("The maxiters parameter allows you to select the maximum number of iters to try. Default=1000"); m->mothurOutEndLine();
+               m->mothurOut("The cutoff parameter allows you to select set an acceptable percentage of magnitude. Default=2, meaning when magnitude of g reaches 2% of it's starting value the process will stop."); m->mothurOutEndLine();
+               m->mothurOut("The step parameter allows you to set a starting step. Default=0.2"); m->mothurOutEndLine();
+               m->mothurOut("Example nmds(phylip=yourDistanceFile).\n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourDistanceFile).\n\n");
+       }
+       catch(exception& e) {
+               m->errorOut(e, "NMDSCommand", "help");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+NMDSCommand::~NMDSCommand(){}
+//**********************************************************************************************************************
+int NMDSCommand::execute(){
+       try {
+               
+               if (abort == true) { return 0; }
+               
+               cout.setf(ios::fixed, ios::floatfield);
+               cout.setf(ios::showpoint);
+               cerr.setf(ios::fixed, ios::floatfield);
+               cerr.setf(ios::showpoint);
+               
+               vector<string> names;
+               vector<seqDist> matrix; //seqDist = int, int, float - index of seq1 in names, index of seq2 in names, their distance
+               
+               //read in phylip file
+               ReadPhylipVector readFile(phylipfile);
+               names = readFile.read(matrix);
+               if (m->control_pressed) { return 0; }
+       
+               //randomly generate the starting configuration - step 2
+               vector< vector<double> > axes;
+               if (axesfile == "") {   axes = generateStartingConfiguration(names.size());             }
+               else                            {       axes = readAxes(names);                                                                 }
+               if (m->control_pressed) { return 0; }
+               
+               //sort matrix from smallest distance to largest - step 5
+               sort(matrix.begin(), matrix.end(), compareSequenceDistance);
+               
+               bool stable = false;
+               int count = 0;
+               vector<double> previousStresses;
+               vector< vector<double> > previousGradient = axes;
+               double initialMagnitude;
+               m->mothurOutEndLine(); m->mothurOut("Iter\tStress\tMagnitude"); m->mothurOutEndLine();
+               while ((count != maxIters) && (!stable)) {
+                       count++;
+                       
+                       //normalize axes - step 3
+                       normalizeConfiguration(axes, names.size());
+                       if (m->control_pressed) { return 0; }
+                       
+                       //calculate Euclidean distances - step 4
+                       vector< vector<double> > euclid = linearCalc.calculateEuclidianDistance(axes);
+                       if (m->control_pressed) { return 0; }
+                       
+                       //order euclid elements in same order as matrix - step 6
+                       //if there are ties in the matrix we want to arrange the euclid distances in the best way so we do not to add unnecessary stress
+                       vector<seqDist> eDists;
+                       vector<seqDist> ties;
+                       for (int i = 0; i < matrix.size(); i++) {
+                               
+                               seqDist temp(matrix[i].seq1, matrix[i].seq2, euclid[matrix[i].seq1][matrix[i].seq2]);
+                               ties.push_back(temp);
+                               
+                               if (i != matrix.size()-1) { // you are not the last so you can look ahead
+                                       if (matrix[i].dist != matrix[i+1].dist) { // you are done with ties, sort and save them, then continue
+                                               sort(ties.begin(), ties.end(), compareSequenceDistance);
+                                               for (int k = 0; k < ties.size(); k++) { eDists.push_back(ties[k]);      }
+                                               ties.clear();
+                                       }
+                               }else { // you are the last one
+                                       sort(ties.begin(), ties.end(), compareSequenceDistance);
+                                       for (int k = 0; k < ties.size(); k++) { eDists.push_back(ties[k]);      }
+                               }
+                       }
+                       
+                       for (int i = 0; i < euclid.size(); i++) {  euclid[i].clear(); } euclid.clear();
+                       if (m->control_pressed) { return 0; }
+                       
+                       //find D - from step 7
+                       vector<seqDist> D = satisfyMonotonicity(eDists);
+                       if (m->control_pressed) { return 0; }
+                       
+                       //calculate the raw stress and normalize it - steps 8 and 9
+                       double rawStress;
+                       double stress = calculateStress(eDists, D, rawStress);
+                       previousStresses.push_back(stress);
+                       if (stress == 0) { m->mothurOut("Stress reached zero after " + toString(count) + " iters, stopping."); m->mothurOutEndLine(); break; }
+                       if (m->control_pressed) { return 0; }
+                       
+                       //calculate stress gradient - step 10
+                       vector< vector<double> > stressGradient = calculateStressGradientVector(eDists, D, rawStress, stress, axes);
+                       if (m->control_pressed) { return 0; }
+                       
+                       //calculate magnitude
+                       double magnitude = calculateMagnitude(stressGradient);  
+                       if (count == 1) { initialMagnitude = magnitude; }
+                       if (m->control_pressed) { return 0; }
+                       
+                       //save gradient before adjusting config.
+                       previousGradient = stressGradient;
+                       
+                       if ((count % 100) == 0) { m->mothurOut(toString(count) + "\t" + toString(previousStresses[previousStresses.size()-1]) + "\t" + toString(magnitude)); m->mothurOutEndLine(); }
+
+                       //are we done - we are done if percentage of magnitude compared to initial magnitude is less than cutoff
+                       double percentage = magnitude / initialMagnitude;
+                       if (percentage < cutoff) { stable = true; }
+                       else {
+                       
+                               //calculate new step size
+                               step = calculateStep(previousGradient, stressGradient, previousStresses);
+                               cout << "count = " << count << '\t' << step << endl;
+                               if (m->control_pressed) { return 0; }
+                       
+                               //find new config.
+                               axes = calculateNewConfiguration(magnitude, axes, stressGradient);
+                               if (m->control_pressed) { return 0; }
+                       }
+               }
+               
+               if (m->control_pressed) { return 0; }
+               
+               string outputFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "nmds";
+               string stressFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "stress.nmds";
+               outputNames.push_back(outputFileName); outputTypes["nmds"].push_back(outputFileName);
+               outputNames.push_back(stressFileName); outputTypes["stress"].push_back(stressFileName);
+               
+               output(outputFileName, stressFileName, previousGradient, previousStresses, names);
+               
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } return 0; }
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "NMDSCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+//generate random config
+vector< vector<double> > NMDSCommand::generateStartingConfiguration(int numNames) {
+       try {
+               vector< vector<double> > axes;  axes.resize(dimension);
+               for (int i = 0; i < axes.size(); i++) {  axes[i].resize(numNames); }
+               
+               //generate random number between -1 and 1, precision 6
+               for (int i = 0; i < axes.size(); i++) {
+                       for (int j = 0; j < axes[i].size(); j++) {
+                               
+                               if (m->control_pressed) { return axes; }
+                               
+                               //generate random int between 0 and 99999
+                               int myrand = (int)((float)(rand()) / ((RAND_MAX / 99998) + 1));
+                               
+                               //generate random sign
+                               int mysign = (int)((float)(rand()) / ((RAND_MAX / 99998) + 1));
+                               
+                               //if mysign is even then sign = positive, else sign = negative
+                               if ((mysign % 2) == 0) { mysign = 1.0; }
+                               else { mysign = -1.0; }
+                               
+                               axes[i][j] = mysign * myrand / (float) 100000;
+                       }
+               }
+
+               return axes;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "NMDSCommand", "generateStartingConfiguration");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+//normalize configuration
+int NMDSCommand::normalizeConfiguration(vector< vector<double> >& axes, int numNames) {
+       try {
+               vector<double> averageAxes; averageAxes.resize(dimension, 0.0);
+               
+               //find average
+               for (int i = 0; i < axes.size(); i++) {
+                       for (int j = 0; j < axes[i].size(); j++) {      averageAxes[i] += axes[i][j];   }
+                       
+                       averageAxes[i] /= (float) numNames;
+               }
+               
+               //normalize axes
+               double sumDenom = 0.0;
+               for (int i = 0; i < axes.size(); i++) {
+                       for (int j = 0; j < axes[i].size(); j++) {
+                               sumDenom += ((axes[i][j] - averageAxes[i]) * (axes[i][j] - averageAxes[i]));
+                       }
+               }
+               
+               double denom = sqrt((sumDenom / (float) (axes.size() * numNames)));
+               
+               for (int i = 0; i < axes.size(); i++) {
+                       for (int j = 0; j < axes[i].size(); j++) {
+                               axes[i][j] = (axes[i][j] - averageAxes[i]) / denom;
+                       }
+               }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "NMDSCommand", "normalizeConfiguration");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+//adjust eDists so that it creates monotonically increasing series of succesive values that increase or stay the same, but never decrease
+vector<seqDist> NMDSCommand::satisfyMonotonicity(vector<seqDist> eDists) {
+       try {
+               
+               vector<seqDist> D = eDists; 
+               
+               for (int i = 0; i < (D.size()-1); i++) {
+                       
+                       if (m->control_pressed) { return D; }
+                       
+                       //is the distance in i+1 smaller than i, if yes then adjust
+                       if (D[i+1].dist < D[i].dist) {  D[i+1].dist = D[i].dist;  }
+               }
+               
+               return D;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "NMDSCommand", "satisfyMonotonicity");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+//find raw stress, and normalize using
+double NMDSCommand::calculateStress(vector<seqDist>& eDists, vector<seqDist>& D, double& rawStress) {
+       try {
+               double normStress = 0.0;
+               double denom = 0.0;
+               rawStress = 0.0;
+               
+               //find raw stress
+               for (int i = 0; i < D.size(); i++) {
+                       
+                       if (m->control_pressed) { return normStress; }
+                       
+                       rawStress += ((eDists[i].dist - D[i].dist) * (eDists[i].dist - D[i].dist));
+                       denom += (eDists[i].dist * eDists[i].dist);
+               }
+               
+               //normalize stress
+               if (rawStress != 0.0) {
+                       normStress = 100 * sqrt((rawStress / denom));
+               }
+               
+               return normStress;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "NMDSCommand", "calculateStress");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector< vector<double> > NMDSCommand::calculateStressGradientVector(vector<seqDist>& eDists, vector<seqDist>& D, double rawStress, double stress, vector< vector<double> >& axes) {
+       try {
+               vector< vector<double> > gradient; gradient.resize(dimension);
+               for (int i = 0; i < gradient.size(); i++) { gradient[i].resize(axes[0].size(), 0.0); }
+       
+               double sumDij = 0.0;
+               for (int i = 0; i < eDists.size(); i++) {  sumDij += (eDists[i].dist * eDists[i].dist); }
+               
+               for (int i = 0; i < eDists.size(); i++) {
+                       
+                       for (int j = 0; j < dimension; j++) {
+                       
+                               if (m->control_pressed) { return gradient; }
+                               
+                               double firstTerm1 = (stress / rawStress) * (eDists[i].dist - D[i].dist);
+                               double firstTerm2 = eDists[i].dist * (stress / sumDij);
+                               double firstTerm = firstTerm1 - firstTerm2;
+                               
+                               double secondTerm = (axes[j][eDists[i].seq1] - axes[j][eDists[i].seq2]) / eDists[i].dist; 
+                               
+                               double results = (firstTerm * secondTerm);
+                               
+                               gradient[j][eDists[i].seq1] += results;
+                               gradient[j][eDists[i].seq2] -= results;
+                       }
+               }
+               
+               return gradient;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "NMDSCommand", "calculateStressGradientVector");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+double NMDSCommand::calculateMagnitude(vector< vector<double> >& gradient) {
+       try {
+               double magnitude = 0.0;
+               
+               double sum = 0.0;
+               for (int i = 0; i < gradient.size(); i++) {
+                       for (int j = 0; j < gradient[i].size(); j++) {
+                               sum += (gradient[i][j] * gradient[i][j]);
+                       }
+               }
+               
+               magnitude = sqrt(((1.0/(float)gradient[0].size()) * sum));
+               
+               return magnitude;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "NMDSCommand", "calculateMagnitude");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+//described in Kruskal paper page 121 + 122
+double NMDSCommand::calculateStep(vector< vector<double> >& prevGrad, vector< vector<double> >& grad, vector<double>& prevStress) {
+       try {
+               double newStep = step;
+               
+               //calc the cos theta
+               double sumNum = 0.0;
+               double sumDenom1 = 0.0;
+               double sumDenom2 = 0.0;
+               for (int i = 0; i < prevGrad.size(); i++) {
+                       for (int j = 0; j < prevGrad[i].size(); j++) {
+                               sumDenom1 += (grad[i][j] * grad[i][j]);
+                               sumDenom2 += (prevGrad[i][j] * prevGrad[i][j]);
+                               sumNum += (grad[i][j] * prevGrad[i][j]);
+                       }
+               }
+               
+               double cosTheta = sumNum / (sqrt(sumDenom1) * sqrt(sumDenom2));
+               cosTheta *= cosTheta;
+       
+               //calc angle factor
+               double angle = pow(4.0, cosTheta);
+       
+               //calc 5 step ratio
+               double currentStress = prevStress[prevStress.size()-1];
+               double lastStress = prevStress[0];
+               if (prevStress.size() > 1) {  lastStress = prevStress[prevStress.size()-2];             }
+               double fivePrevStress = prevStress[0];
+               if (prevStress.size() > 5) {  fivePrevStress = prevStress[prevStress.size()-6]; }
+                       
+               double fiveStepRatio = min(1.0, (currentStress / fivePrevStress));
+               
+               //calc relaxation factor
+               double relaxation = 1.3 / (1.0 + pow(fiveStepRatio, 5.0));
+               
+               //calc good luck factor
+               double goodLuck = min(1.0, (currentStress / lastStress));
+               
+               //calc newStep
+               cout << "\ncos = " << cosTheta << " step = " << step << " angle = " << angle << " relaxation = " << relaxation << " goodluck = " << goodLuck << endl;
+               newStep = step * angle * relaxation * goodLuck;
+               
+               return newStep;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "NMDSCommand", "calculateStep");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector< vector<double> > NMDSCommand::calculateNewConfiguration(double magnitude, vector< vector<double> >& axes, vector< vector<double> >& gradient) {
+       try {
+               
+               vector< vector<double> > newAxes = axes;
+               
+               for (int i = 0; i < newAxes.size(); i++) {
+                       
+                       if (m->control_pressed) { return newAxes; }
+                       
+                       for (int j = 0; j < newAxes[i].size(); j++) {
+                               newAxes[i][j] = axes[i][j] + ((step / magnitude) * gradient[i][j]);
+                       }
+               }
+               
+               return newAxes;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "NMDSCommand", "calculateNewConfiguration");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int NMDSCommand::output(string outputFileName, string stressFileName, vector< vector<double> >& config, vector<double>& stresses, vector<string>& names) {
+       try {
+               
+               ofstream out, out2;
+               m->openOutputFile(outputFileName, out);
+               m->openOutputFile(stressFileName, out2);
+               
+               //output headers
+               out << "group\t";
+               for (int i = 0; i < dimension; i++) { out << "axis" << (i+1) << '\t'; }
+               out << endl;
+               
+               out2 << "Iter\tStress" << endl;
+               
+               //output nmds file
+               for (int i = 0; i < config[0].size(); i++) {
+                       
+                       if (m->control_pressed) { out.close(); out2.close(); return 0; }
+                       
+                       out << names[i] << '\t';
+                       
+                       for (int j = 0; j < config.size(); j++) {
+                               out << config[j][i] << '\t';
+                       }
+                       
+                       out << endl;
+               }
+               out.close();
+               
+               //output stress file
+               for (int j = 0; j < stresses.size(); j++) {
+                       if (m->control_pressed) { out2.close(); return 0; }
+                       
+                       out2 << (j+1) << '\t' << stresses[j] << endl;
+               }
+               out2.close();
+               
+                               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "NMDSCommand", "output");
+               exit(1);
+       }
+}
+/*****************************************************************/
+vector< vector<double> > NMDSCommand::readAxes(vector<string> names){
+       try {
+               vector< vector<double> > axes;  
+               
+               ifstream in;
+               m->openInputFile(axesfile, in);
+               
+               string headerLine = m->getline(in); m->gobble(in);
+               
+               //count the number of axis you are reading
+               bool done = false;
+               int count = 0;
+               while (!done) {
+                       int pos = headerLine.find("axis");
+                       if (pos != string::npos) {
+                               count++;
+                               headerLine = headerLine.substr(pos+4);
+                       }else { done = true; }
+               }
+               
+               if (dimension > count) { m->mothurOut("You requested " + toString(dimension) + " axes, but your file only includes " + toString(count) + ". Using " + toString(count) + "."); m->mothurOutEndLine(); dimension = count; }
+               
+               while (!in.eof()) {
+                       
+                       if (m->control_pressed) { in.close(); return axes; }
+                       
+                       string group = "";
+                       in >> group; m->gobble(in);
+                       
+                       bool ignore = false;
+                       if (!m->inUsersGroups(group, names)) { ignore = true; m->mothurOut(group + " is in your axes file and not in your distance file, ignoring."); m->mothurOutEndLine(); }
+                       
+                       vector<double> thisGroupsAxes;
+                       for (int i = 0; i < count; i++) {
+                               float temp = 0.0;
+                               in >> temp; 
+                               
+                               //only save the axis we want
+                               if (i < dimension) {  thisGroupsAxes.push_back(temp); }
+                       }
+                       
+                       if (!ignore) {  axes.push_back(thisGroupsAxes); }
+                       
+                       m->gobble(in);
+               }
+               in.close();
+               
+               //sanity check
+               if (names.size() != axes.size()) { m->mothurOut("[ERROR]: your axes file does not match your distance file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; }
+               
+               return axes;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "NMDSCommand", "readAxes");      
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+
diff --git a/nmdscommand.h b/nmdscommand.h
new file mode 100644 (file)
index 0000000..5e4e655
--- /dev/null
@@ -0,0 +1,60 @@
+#ifndef NMDSCOMMAND_H
+#define NMDSCOMMAND_H
+
+/*
+ *  nmdscommand.h
+ *  mothur
+ *
+ *  Created by westcott on 1/11/11.
+ *  Copyright 2011 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "command.hpp"
+#include "linearalgebra.h"
+
+
+/*  references used to make this command: "Nonmetric Multidimensional Scalling: A Numerical Method"
+ by J. B. Kruskal  Psychometrika - Vol 29, No. 2 June 1964 */
+
+/*****************************************************************/
+class NMDSCommand : public Command {
+       
+public:
+       NMDSCommand(string);    
+       NMDSCommand();
+       ~NMDSCommand();
+       vector<string> getRequiredParameters();
+       vector<string> getValidParameters();
+       vector<string> getRequiredFiles();
+       map<string, vector<string> > getOutputFiles() { return outputTypes; }
+       int execute();  
+       void help();
+       
+private:
+       
+       bool abort;
+       string phylipfile, outputDir, axesfile;
+       int dimension, maxIters;
+       double step, cutoff;
+       vector<string> outputNames;
+       map<string, vector<string> > outputTypes;
+       LinearAlgebra linearCalc;
+       
+       vector< vector<double> > generateStartingConfiguration(int); //pass in numNames, return axes
+       int normalizeConfiguration(vector< vector<double> >&, int);
+       vector<seqDist> satisfyMonotonicity(vector<seqDist>);
+       double calculateStress(vector<seqDist>&, vector<seqDist>&, double&);
+       vector< vector<double> > calculateStressGradientVector(vector<seqDist>&, vector<seqDist>&, double, double, vector< vector<double> >&);
+       double calculateMagnitude(vector< vector<double> >&);
+       double calculateStep(vector< vector<double> >&, vector< vector<double> >&, vector<double>&);
+       vector< vector<double> > calculateNewConfiguration(double, vector< vector<double> >&, vector< vector<double> >&);
+       vector< vector<double> > readAxes(vector<string>);
+       int output(string, string, vector< vector<double> >&, vector<double>&, vector<string>&);
+};
+
+/*****************************************************************/
+
+#endif
+
+
index d1d919c93d5f43086658a86cf79f11a09de8ae9f..6777b5a56cb1182654c66bf6d065f9f973a10525 100644 (file)
@@ -9,6 +9,7 @@
  */
 
 #include "pcoacommand.h"
+#include "readphylipvector.h"
 
 //**********************************************************************************************************************
 vector<string> PCOACommand::getValidParameters(){      
@@ -160,7 +161,8 @@ int PCOACommand::execute(){
        
                fbase = outputDir + m->getRootName(m->getSimpleName(filename));
                
-               read(filename, names, D);
+               ReadPhylipVector readFile(filename);
+               names = readFile.read(D);
                
                if (m->control_pressed) { return 0; }
        
@@ -227,103 +229,6 @@ void PCOACommand::get_comment(istream& f, char begin, char end){
                exit(1);
        }
 }      
-
-/*********************************************************************************************************************************/
-
-int PCOACommand::read_phylip(istream& f, int square_m, vector<string>& name_list, vector<vector<double> >& d){
-       try {
-               //     int count1=0;
-               //     int count2=0;
-               
-               int rank;
-               f >> rank;
-               
-               name_list.resize(rank);
-               d.resize(rank);
-               if(square_m == 1){
-                       for(int i=0;i<rank;i++)
-                               d[i].resize(rank);
-                       for(int i=0;i<rank;i++) {
-                               f >> name_list[i];
-                               //                      cout << i << "\t" << name_list[i] << endl;
-                               for(int j=0;j<rank;j++) {
-                                       if (m->control_pressed) { return 0; }
-                                       
-                                       f >> d[i][j];
-                                       if (d[i][j] == -0.0000)
-                                               d[i][j] = 0.0000;
-                               }
-                       }
-               }
-               else if(square_m == 2){
-                       for(int i=0;i<rank;i++){
-                               d[i].resize(rank);
-                       }
-                       d[0][0] = 0.0000;
-                       f >> name_list[0];
-                       for(int i=1;i<rank;i++){
-                               f >> name_list[i];
-                               d[i][i]=0.0000;
-                               for(int j=0;j<i;j++){
-                                       if (m->control_pressed) { return 0; }
-                                       f >> d[i][j];
-                                       if (d[i][j] == -0.0000)
-                                               d[i][j] = 0.0000;
-                                       d[j][i]=d[i][j];
-                               }
-                       }
-               }
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "PCOACommand", "read_phylip");
-               exit(1);
-       }
-
-}
-
-/*********************************************************************************************************************************/
-
-void PCOACommand::read(string fname, vector<string>& names, vector<vector<double> >& D){
-       try {
-               ifstream f;
-               m->openInputFile(fname, f);
-                       
-               //check whether matrix is square
-               char d;
-               int q = 1;
-               int numSeqs;
-               string name;
-               
-               f >> numSeqs >> name; 
-               
-               while((d=f.get()) != EOF){
-                       
-                       //is d a number meaning its square
-                       if(isalnum(d)){ 
-                               q = 1; 
-                               break; 
-                       }
-                       
-                       //is d a line return meaning its lower triangle
-                       if(d == '\n'){
-                               q = 2;
-                               break;
-                       }
-               }
-               f.close();
-               
-               //reopen to get back to beginning
-               m->openInputFile(fname, f);                     
-               read_phylip(f, q, names, D);
-       }
-               catch(exception& e) {
-               m->errorOut(e, "PCOACommand", "read");
-               exit(1);
-       }
-}
-
 /*********************************************************************************************************************************/
 
 void PCOACommand::recenter(double offset, vector<vector<double> > D, vector<vector<double> >& G){
index c62b3d668c9a38287769476a598033544d9790a3..02e562ec23f97d1fdbd0e6e18c949ae3ffc44c54 100644 (file)
@@ -31,15 +31,12 @@ public:
 private:
 
        bool abort, metric;
-       string phylipfile, columnfile, namefile, format, filename, fbase, outputDir;
-       float cutoff, precision;
+       string phylipfile, filename, fbase, outputDir;
        vector<string> outputNames;
        map<string, vector<string> > outputTypes;
        LinearAlgebra linearCalc;
        
        void get_comment(istream&, char, char);
-       int read_phylip(istream&, int, vector<string>&, vector<vector<double> >&);
-       void read(string, vector<string>&, vector<vector<double> >&);
        void recenter(double, vector<vector<double> >, vector<vector<double> >&);
        void output(string, vector<string>, vector<vector<double> >&, vector<double>);
        
index aafa5794e206b936793cf6ffce6109f66eebaf86..4dd5b38d4f7ab3895d7fa305bdb57f2e9ab5a628 100644 (file)
@@ -16,6 +16,7 @@ QualityScores::QualityScores(){
                m = MothurOut::getInstance();
                seqName = "";
                seqLength = -1;
+               
        }
        catch(exception& e) {
                m->errorOut(e, "QualityScores", "QualityScores");
@@ -197,9 +198,12 @@ bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
                        }
                }
                
+               //every score passed
+               if (end == (seqLength-1)) { end = seqLength; }
+               
                sequence.setUnaligned(rawSequence.substr(0,end));
                trimQScores(-1, end);
-       
+               
                return 1;
        }
        catch(exception& e) {
@@ -237,9 +241,11 @@ bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshol
                
                if(end == -1){  end = seqLength;        }
                
+               
                sequence.setUnaligned(rawSequence.substr(0,end));
                trimQScores(-1, end);
                
+               
                return 1;
        }
        catch(exception& e) {
@@ -286,6 +292,7 @@ bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int
                
                if(end == -1){  end = seqLength;        }
                
+               
                sequence.setUnaligned(rawSequence.substr(0,end));
                trimQScores(-1, end);
                
diff --git a/readphylipvector.cpp b/readphylipvector.cpp
new file mode 100644 (file)
index 0000000..f8907cc
--- /dev/null
@@ -0,0 +1,184 @@
+/*
+ *  readphylipvector.cpp
+ *  mothur
+ *
+ *  Created by westcott on 1/11/11.
+ *  Copyright 2011 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "readphylipvector.h"
+
+/***********************************************************************/
+ReadPhylipVector::ReadPhylipVector(string d) {
+       try {
+               m = MothurOut::getInstance();
+               distFile = d;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ReadPhylipVector", "ReadPhylipVector");
+               exit(1);
+       }
+}
+/***********************************************************************/
+vector<string> ReadPhylipVector::read(vector< vector<double> >& matrix) {
+       try {
+               vector<string> names;
+               
+               ifstream in;
+               m->openInputFile(distFile, in);
+               
+               //check whether matrix is square
+               char d;
+               int square = 1;
+               int numSeqs;
+               string name;
+               
+               in >> numSeqs >> name; 
+               
+               while((d=in.get()) != EOF){
+                       
+                       //is d a number meaning its square
+                       if(isalnum(d)){ 
+                               square = 1; 
+                               break; 
+                       }
+                       
+                       //is d a line return meaning its lower triangle
+                       if(d == '\n'){
+                               square = 2;
+                               break;
+                       }
+               }
+               in.close();
+               
+               
+               //reopen and read now that you know whether you are square
+               ifstream f;
+               m->openInputFile(distFile, f);
+               
+               int rank;
+               f >> rank;
+               
+               names.resize(rank);
+               matrix.resize(rank);
+               if(square == 1){
+                       for(int i=0;i<rank;i++)
+                               matrix[i].resize(rank);
+                       for(int i=0;i<rank;i++) {
+                               f >> names[i];
+                               for(int j=0;j<rank;j++) {
+                                       if (m->control_pressed) { return names; }
+                                       
+                                       f >> matrix[i][j];
+                                       if (matrix[i][j] == -0.0000)
+                                               matrix[i][j] = 0.0000;
+                               }
+                       }
+               }
+               else if(square == 2){
+                       for(int i=0;i<rank;i++){
+                               matrix[i].resize(rank);
+                       }
+                       matrix[0][0] = 0.0000;
+                       f >> names[0];
+                       for(int i=1;i<rank;i++){
+                               f >> names[i];
+                               matrix[i][i]=0.0000;
+                               for(int j=0;j<i;j++){
+                                       if (m->control_pressed) { return names; }
+                                       f >> matrix[i][j];
+                                       if (matrix[i][j] == -0.0000)
+                                               matrix[i][j] = 0.0000;
+                                       matrix[j][i]=matrix[i][j];
+                               }
+                       }
+               }
+               
+               return names;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ReadPhylipVector", "read");
+               exit(1);
+       }
+}
+/***********************************************************************/
+vector<string> ReadPhylipVector::read(vector<seqDist>& matrix) {
+       try {
+               vector<string> names;
+               
+               ifstream in;
+               m->openInputFile(distFile, in);
+               
+               //check whether matrix is square
+               char d;
+               int square = 1;
+               int numSeqs;
+               string name;
+               
+               in >> numSeqs >> name; 
+               
+               while((d=in.get()) != EOF){
+                       
+                       //is d a number meaning its square
+                       if(isalnum(d)){ 
+                               square = 1; 
+                               break; 
+                       }
+                       
+                       //is d a line return meaning its lower triangle
+                       if(d == '\n'){
+                               square = 2;
+                               break;
+                       }
+               }
+               in.close();
+               
+               
+               //reopen and read now that you know whether you are square
+               ifstream f;
+               m->openInputFile(distFile, f);
+               
+               int rank;
+               float temp;
+               f >> rank;
+               
+               names.resize(rank);
+               if(square == 1){
+                       for(int i=0;i<rank;i++) {
+                               f >> names[i];
+                               for(int j=0;j<rank;j++) {
+                                       if (m->control_pressed) { return names; }
+                                       
+                                       f >> temp;
+                                       
+                                       if (j < i) { //only save lt
+                                               seqDist dist(i, j, temp);
+                                               matrix.push_back(dist);
+                                       }
+                               }
+                       }
+               }
+               else if(square == 2){
+                       f >> names[0];
+                       for(int i=1;i<rank;i++){
+                               f >> names[i];
+                               for(int j=0;j<i;j++){
+                                       if (m->control_pressed) { return names; }
+                                       f >> temp;
+                                       seqDist dist(i, j, temp);
+                                       matrix.push_back(dist);
+                               }
+                       }
+               }
+               
+               return names;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ReadPhylipVector", "read");
+               exit(1);
+       }
+}
+/***********************************************************************/
+
+
diff --git a/readphylipvector.h b/readphylipvector.h
new file mode 100644 (file)
index 0000000..aa056c7
--- /dev/null
@@ -0,0 +1,34 @@
+#ifndef READPHYLIPVECTOR_H
+#define READPHYLIPVECTOR_H
+
+/*
+ *  readphylipvector.h
+ *  mothur
+ *
+ *  Created by westcott on 1/11/11.
+ *  Copyright 2011 Schloss Lab. All rights reserved.
+ *
+ */
+
+
+#include "mothur.h"
+#include "mothurout.h"
+
+/******************************************************/
+
+class ReadPhylipVector {
+       
+public:
+       ReadPhylipVector(string); //phylipfile - lt or square
+       ~ReadPhylipVector() {}
+       vector<string> read(vector< vector<double> >&); //pass in matrix to fill with values, returns vector of strings containing names in phylipfile
+       vector<string> read(vector<seqDist>&); //pass in matrix to fill with values, returns vector of strings containing names in phylipfile
+       
+private:
+       string distFile;
+       MothurOut* m;
+};
+
+/******************************************************/
+
+#endif
index 64bcd36764e044a911295870025095016be7fcc3..626659501a531db80f5193bef33e88793414a874 100644 (file)
@@ -669,6 +669,21 @@ int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
                                                }
                                        }
                                        
+                                       //createdistance file from simMatrix
+                                       /*string o = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist";
+                                       ofstream outDist;
+                                       m->openOutputFile(o, outDist);
+                                       outDist << simMatrix.size() << endl;
+                                       for (int k = 0; k < simMatrix.size(); k++) {
+                                               outDist << thisLookup[k]->getGroup() << '\t';
+                                               for (int l = 0; l < k; l++) {
+                                                       outDist << (1.0-simMatrix[k][l]) << '\t';
+                                               }
+                                               outDist << endl;
+                                       }
+                                       outDist.close();*/
+
+                                       
                                        if (m->control_pressed) { return 1; }
                                        //creates tree from similarity matrix and write out file
                                        createTree();
index d5df4730c9f118966de5f3514b4ec276faffce05..2dc07794be5abb2f239e4b228c3aacbf51c51b6e 100644 (file)
@@ -556,16 +556,16 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
 
                                
                                if(qFileName != ""){
+                                       int origLength = currSeq.getNumBases();
                                        
                                        if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
                                        else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
                                        else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
                                        else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
                                        else                                            {       success = 1;                            }
-
-//                                     if (qtrim == 1 && (origSeq.length() != currSeq.getUnaligned().length())) { 
-//                                             success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap
-//                                     }
+                                       
+                                       //you don't want to trim, if it fails above then scrap it
+                                       if ((!qtrim) && (origLength != currSeq.getNumBases())) { success = 0; }
                                        
                                        if(!success)                            {       trashCode += 'q';       }
                                }