From: westcott Date: Fri, 14 Jan 2011 16:09:49 +0000 (+0000) Subject: fixed trim.seqs bug with qtrim parameter and added num=1 special case to database... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=d04f948b1a2a1a2984fc4a45d04403b8c121c5bc fixed trim.seqs bug with qtrim parameter and added num=1 special case to database search classes --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index c59979f..1b87a4f 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -9,6 +9,8 @@ /* Begin PBXBuildFile section */ 8DD76FB00486AB0100D96B5E /* mothur.1 in CopyFiles */ = {isa = PBXBuildFile; fileRef = C6A0FF2C0290799A04C91782 /* mothur.1 */; }; A70332B712D3A13400761E33 /* makefile in Sources */ = {isa = PBXBuildFile; fileRef = A70332B512D3A13400761E33 /* makefile */; }; + A713EBAC12DC7613000092AC /* readphylipvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBAB12DC7613000092AC /* readphylipvector.cpp */; }; + A713EBED12DC7C5E000092AC /* nmdscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */; }; A7E9B88112D37EC400DA6239 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B64F12D37EC300DA6239 /* ace.cpp */; }; A7E9B88212D37EC400DA6239 /* aligncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65112D37EC300DA6239 /* aligncommand.cpp */; }; A7E9B88312D37EC400DA6239 /* alignment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65312D37EC300DA6239 /* alignment.cpp */; }; @@ -300,6 +302,10 @@ /* Begin PBXFileReference section */ 8DD76FB20486AB0100D96B5E /* Mothur */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = Mothur; sourceTree = BUILT_PRODUCTS_DIR; }; A70332B512D3A13400761E33 /* makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; path = makefile; sourceTree = ""; }; + A713EBAA12DC7613000092AC /* readphylipvector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readphylipvector.h; sourceTree = ""; }; + A713EBAB12DC7613000092AC /* readphylipvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readphylipvector.cpp; sourceTree = ""; }; + A713EBEB12DC7C5E000092AC /* nmdscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = nmdscommand.h; sourceTree = ""; }; + A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = nmdscommand.cpp; sourceTree = ""; }; A7E9B64F12D37EC300DA6239 /* ace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ace.cpp; sourceTree = ""; }; A7E9B65012D37EC300DA6239 /* ace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ace.h; sourceTree = ""; }; A7E9B65112D37EC300DA6239 /* aligncommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligncommand.cpp; sourceTree = ""; }; @@ -1108,6 +1114,8 @@ A7E9B76912D37EC400DA6239 /* nocommands.cpp */, A7E9B76B12D37EC400DA6239 /* normalizesharedcommand.cpp */, A7E9B76C12D37EC400DA6239 /* normalizesharedcommand.h */, + A713EBEB12DC7C5E000092AC /* nmdscommand.h */, + A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */, A7E9B77912D37EC400DA6239 /* otuhierarchycommand.cpp */, A7E9B77A12D37EC400DA6239 /* otuhierarchycommand.h */, A7E9B77D12D37EC400DA6239 /* pairwiseseqscommand.cpp */, @@ -1521,6 +1529,8 @@ A7E9B7BE12D37EC400DA6239 /* readphylip.h */, A7E9B7BF12D37EC400DA6239 /* readtree.cpp */, A7E9B7C012D37EC400DA6239 /* readtree.h */, + A713EBAA12DC7613000092AC /* readphylipvector.h */, + A713EBAB12DC7613000092AC /* readphylipvector.cpp */, A7E9B84312D37EC400DA6239 /* splitmatrix.cpp */, A7E9B84412D37EC400DA6239 /* splitmatrix.h */, ); @@ -1868,6 +1878,8 @@ A70332B712D3A13400761E33 /* makefile in Sources */, A7FC480E12D788F20055BC5C /* linearalgebra.cpp in Sources */, A7FC486712D795D60055BC5C /* pcacommand.cpp in Sources */, + A713EBAC12DC7613000092AC /* readphylipvector.cpp in Sources */, + A713EBED12DC7C5E000092AC /* nmdscommand.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/commandfactory.cpp b/commandfactory.cpp index f581b6d..ae59a8d 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -106,6 +106,7 @@ #include "corraxescommand.h" #include "shhhercommand.h" #include "pcacommand.h" +#include "nmdscommand.h" /*******************************************************/ @@ -215,6 +216,7 @@ CommandFactory::CommandFactory(){ commands["consensus.seqs"] = "consensus.seqs"; commands["corr.axes"] = "corr.axes"; commands["pca"] = "pca"; + commands["nmds"] = "nmds"; commands["pairwise.seqs"] = "MPIEnabled"; commands["pipeline.pds"] = "MPIEnabled"; commands["classify.seqs"] = "MPIEnabled"; @@ -337,6 +339,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "pre.cluster") { command = new PreClusterCommand(optionString); } else if(commandName == "pcoa") { command = new PCOACommand(optionString); } else if(commandName == "pca") { command = new PCACommand(optionString); } + else if(commandName == "nmds") { command = new NMDSCommand(optionString); } else if(commandName == "otu.hierarchy") { command = new OtuHierarchyCommand(optionString); } else if(commandName == "set.dir") { command = new SetDirectoryCommand(optionString); } else if(commandName == "set.logfile") { command = new SetLogFileCommand(optionString); } @@ -462,6 +465,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str else if(commandName == "pre.cluster") { pipecommand = new PreClusterCommand(optionString); } else if(commandName == "pcoa") { pipecommand = new PCOACommand(optionString); } else if(commandName == "pca") { pipecommand = new PCACommand(optionString); } + else if(commandName == "nmds") { pipecommand = new NMDSCommand(optionString); } else if(commandName == "otu.hierarchy") { pipecommand = new OtuHierarchyCommand(optionString); } else if(commandName == "set.dir") { pipecommand = new SetDirectoryCommand(optionString); } else if(commandName == "set.logfile") { pipecommand = new SetLogFileCommand(optionString); } @@ -574,6 +578,7 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "pre.cluster") { shellcommand = new PreClusterCommand(); } else if(commandName == "pcoa") { shellcommand = new PCOACommand(); } else if(commandName == "pca") { shellcommand = new PCACommand(); } + else if(commandName == "nmds") { shellcommand = new NMDSCommand(); } else if(commandName == "otu.hierarchy") { shellcommand = new OtuHierarchyCommand(); } else if(commandName == "set.dir") { shellcommand = new SetDirectoryCommand(); } else if(commandName == "set.logfile") { shellcommand = new SetLogFileCommand(); } diff --git a/database.hpp b/database.hpp index 9293f13..bbe01c4 100644 --- a/database.hpp +++ b/database.hpp @@ -20,6 +20,7 @@ struct seqMatch { //used to select top n matches int seq; int match; + seqMatch() {} seqMatch(int s, int m) : seq(s), match(m) {} }; /**************************************************************************************************/ diff --git a/distancedb.cpp b/distancedb.cpp index ca6ffe8..b5c22b3 100644 --- a/distancedb.cpp +++ b/distancedb.cpp @@ -47,7 +47,7 @@ vector DistanceDB::findClosestSequences(Sequence* query, int numWanted){ vector topMatches; bool templateSameLength = true; string sequence = query->getAligned(); - vector dists; + vector dists; searchScore = -1.0; @@ -56,24 +56,46 @@ vector DistanceDB::findClosestSequences(Sequence* query, int numWanted){ if (sequence.length() != templateSeqsLength) { templateSameLength = false; } if (templateSameLength && templateAligned) { - //calc distance from this sequence to every sequence in the template - for (int i = 0; i < data.size(); i++) { - distCalculator->calcDist(*query, data[i]); - float dist = distCalculator->getDist(); + if (numWanted != 1) { - //save distance to each template sequence - seqDist temp(-1, i, dist); - dists.push_back(temp); - } - - sort(dists.begin(), dists.end(), compareSequenceDistance); //sorts by distance lowest to highest - - //save distance of best match - searchScore = dists[0].dist; - - //fill topmatches with numwanted closest sequences indexes - for (int i = 0; i < numWanted; i++) { - topMatches.push_back(dists[i].seq2); + dists.resize(data.size()); + + //calc distance from this sequence to every sequence in the template + for (int i = 0; i < data.size(); i++) { + distCalculator->calcDist(*query, data[i]); + float dist = distCalculator->getDist(); + + //save distance to each template sequence + dists[i].seq1 = -1; + dists[i].seq2 = i; + dists[i].dist = dist; + } + + sort(dists.begin(), dists.end(), compareSequenceDistance); //sorts by distance lowest to highest + + //save distance of best match + searchScore = dists[0].dist; + + //fill topmatches with numwanted closest sequences indexes + for (int i = 0; i < numWanted; i++) { + topMatches.push_back(dists[i].seq2); + } + }else { + int bestIndex = 0; + float smallDist = 100000; + for (int i = 0; i < data.size(); i++) { + distCalculator->calcDist(*query, data[i]); + float dist = distCalculator->getDist(); + + //are you smaller? + if (dist < smallDist) { + bestIndex = i; + smallDist = dist; + } + } + + searchScore = smallDist; + topMatches.push_back(bestIndex); } }else{ diff --git a/getseqscommand.cpp b/getseqscommand.cpp index a16accb..a191515 100644 --- a/getseqscommand.cpp +++ b/getseqscommand.cpp @@ -190,6 +190,7 @@ GetSeqsCommand::GetSeqsCommand(string option) { accnosfile2 = validParameter.validFile(parameters, "accnos2", true); if (accnosfile2 == "not open") { abort = true; } + if (accnosfile2 == "not found") { accnosfile2 = ""; } fastafile = validParameter.validFile(parameters, "fasta", true); if (fastafile == "not open") { abort = true; } diff --git a/kmerdb.cpp b/kmerdb.cpp index 108b207..e7e8ab2 100644 --- a/kmerdb.cpp +++ b/kmerdb.cpp @@ -75,24 +75,39 @@ vector KmerDB::findClosestSequences(Sequence* candidateSeq, int num){ } timesKmerFound[kmerNumber] = 1; // ok, we've seen the kmer now } - - vector seqMatches; - for(int i=0;i seqMatches; seqMatches.resize(numSeqs); + for(int i=0;i bestMatch) { + bestIndex = i; + bestMatch = matches[i]; + } + } + + searchScore = bestMatch; + searchScore = 100 * searchScore / (float) numKmers; // return the Sequence object corresponding to the db + topMatches.push_back(bestIndex); } - return topMatches; } catch(exception& e) { diff --git a/linearalgebra.cpp b/linearalgebra.cpp index 192701a..6b56597 100644 --- a/linearalgebra.cpp +++ b/linearalgebra.cpp @@ -234,6 +234,7 @@ int LinearAlgebra::qtli(vector& d, vector& e, vector > LinearAlgebra::calculateEuclidianDistance(vector< vector >& axes, int dimensions){ try { //make square matrix @@ -252,38 +253,70 @@ vector< vector > LinearAlgebra::calculateEuclidianDistance(vector< vecto } } - }else if (dimensions == 2) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2) + }else if (dimensions > 1) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)... for (int i = 0; i < dists.size(); i++) { if (m->control_pressed) { return dists; } for (int j = 0; j < i; j++) { - double firstDim = ((axes[i][0] - axes[j][0]) * (axes[i][0] - axes[j][0])); - double secondDim = ((axes[i][1] - axes[j][1]) * (axes[i][1] - axes[j][1])); + double sum = 0.0; + for (int k = 0; k < dimensions; k++) { + sum += ((axes[i][k] - axes[j][k]) * (axes[i][k] - axes[j][k])); + } - dists[i][j] = sqrt((firstDim + secondDim)); + dists[i][j] = sqrt(sum); dists[j][i] = dists[i][j]; } } - }else if (dimensions == 3) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2 + (x3 - y3)^2) + } + + return dists; + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance"); + exit(1); + } +} +/*********************************************************************************************************************************/ +//returns groups by dimensions from dimensions by groups +vector< vector > LinearAlgebra::calculateEuclidianDistance(vector< vector >& axes){ + try { + //make square matrix + vector< vector > dists; dists.resize(axes[0].size()); + for (int i = 0; i < dists.size(); i++) { dists[i].resize(axes[0].size(), 0.0); } + + if (axes.size() == 1) { //one dimension calc = abs(x-y) for (int i = 0; i < dists.size(); i++) { if (m->control_pressed) { return dists; } for (int j = 0; j < i; j++) { - double firstDim = ((axes[i][0] - axes[j][0]) * (axes[i][0] - axes[j][0])); - double secondDim = ((axes[i][1] - axes[j][1]) * (axes[i][1] - axes[j][1])); - double thirdDim = ((axes[i][2] - axes[j][2]) * (axes[i][2] - axes[j][2])); + dists[i][j] = abs(axes[0][i] - axes[0][j]); + dists[j][i] = dists[i][j]; + } + } + + }else if (axes.size() > 1) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)... + + for (int i = 0; i < dists[0].size(); i++) { + + if (m->control_pressed) { return dists; } + + for (int j = 0; j < i; j++) { + double sum = 0.0; + for (int k = 0; k < axes.size(); k++) { + sum += ((axes[k][i] - axes[k][j]) * (axes[k][i] - axes[k][j])); + } - dists[i][j] = sqrt((firstDim + secondDim + thirdDim)); + dists[i][j] = sqrt(sum); dists[j][i] = dists[i][j]; } } - }else { m->mothurOut("[ERROR]: too many dimensions, aborting."); m->mothurOutEndLine(); m->control_pressed = true; } + } return dists; } diff --git a/linearalgebra.h b/linearalgebra.h index 2866493..70f7699 100644 --- a/linearalgebra.h +++ b/linearalgebra.h @@ -21,7 +21,8 @@ public: vector > matrix_mult(vector >, vector >); int tred2(vector >&, vector&, vector&); int qtli(vector&, vector&, vector >&); - vector< vector > calculateEuclidianDistance(vector >&, int); + vector< vector > calculateEuclidianDistance(vector >&, int); //pass in axes and number of dimensions + vector< vector > calculateEuclidianDistance(vector >&); //pass in axes double calcPearson(vector >&, vector >&); private: diff --git a/nmdscommand.cpp b/nmdscommand.cpp new file mode 100644 index 0000000..a9d361d --- /dev/null +++ b/nmdscommand.cpp @@ -0,0 +1,657 @@ +/* + * nmdscommand.cpp + * mothur + * + * Created by westcott on 1/11/11. + * Copyright 2011 Schloss Lab. All rights reserved. + * + */ + +#include "nmdscommand.h" +#include "readphylipvector.h" + +//********************************************************************************************************************** +vector NMDSCommand::getValidParameters(){ + try { + string Array[] = {"phylip","axes","dimension","maxiters","step","outputdir","inputdir"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "getValidParameters"); + exit(1); + } +} +//********************************************************************************************************************** +NMDSCommand::NMDSCommand(){ + try { + abort = true; + //initialize outputTypes + vector tempOutNames; + outputTypes["nmds"] = tempOutNames; + outputTypes["stress"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "NMDSCommand"); + exit(1); + } +} +//********************************************************************************************************************** +vector NMDSCommand::getRequiredParameters(){ + try { + string Array[] = {"phylip"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "getRequiredParameters"); + exit(1); + } +} +//********************************************************************************************************************** +vector NMDSCommand::getRequiredFiles(){ + try { + vector myArray; + return myArray; + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "getRequiredFiles"); + exit(1); + } +} +//********************************************************************************************************************** + +NMDSCommand::NMDSCommand(string option) { + try { + abort = false; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"phylip","axes","dimension","maxiters","step","outputdir", "inputdir"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + OptionParser parser(option); + map parameters = parser. getParameters(); + + ValidParameters validParameter; + map::iterator it; + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("phylip"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["phylip"] = inputDir + it->second; } + } + + it = parameters.find("axes"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["axes"] = inputDir + it->second; } + } + } + + //initialize outputTypes + vector tempOutNames; + outputTypes["nmds"] = tempOutNames; + outputTypes["stress"] = tempOutNames; + + //required parameters + phylipfile = validParameter.validFile(parameters, "phylip", true); + if (phylipfile == "not open") { phylipfile = ""; abort = true; } + else if (phylipfile == "not found") { phylipfile = ""; m->mothurOut("You must provide a distance file before running the nmds command."); m->mothurOutEndLine(); abort = true; } + + axesfile = validParameter.validFile(parameters, "axes", true); + if (axesfile == "not open") { axesfile = ""; abort = true; } + else if (axesfile == "not found") { axesfile = ""; } + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ + outputDir = ""; + outputDir += m->hasPath(phylipfile); //if user entered a file with a path then preserve it + } + + string temp = validParameter.validFile(parameters, "dimension", false); if (temp == "not found") { temp = "2"; } + convert(temp, dimension); + + temp = validParameter.validFile(parameters, "maxiters", false); if (temp == "not found") { temp = "1000"; } + convert(temp, maxIters); + + temp = validParameter.validFile(parameters, "step", false); if (temp == "not found") { temp = "0.2"; } + convert(temp, step); + + temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "2"; } + convert(temp, cutoff); + cutoff /= 100.0; + } + + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "NMDSCommand"); + exit(1); + } +} +//********************************************************************************************************************** +void NMDSCommand::help(){ + try { + + m->mothurOut("The nmds command parameters are phylip, axes, dimension, maxiters, cutoff and step."); m->mothurOutEndLine(); + m->mothurOut("The phylip parameter allows you to enter your distance file."); m->mothurOutEndLine(); + m->mothurOut("The axes parameter allows you to enter a file containing a starting configuration."); m->mothurOutEndLine(); + m->mothurOut("The dimension parameter allows you to select how many dimensions to use. Default=2"); m->mothurOutEndLine(); + m->mothurOut("The maxiters parameter allows you to select the maximum number of iters to try. Default=1000"); m->mothurOutEndLine(); + m->mothurOut("The cutoff parameter allows you to select set an acceptable percentage of magnitude. Default=2, meaning when magnitude of g reaches 2% of it's starting value the process will stop."); m->mothurOutEndLine(); + m->mothurOut("The step parameter allows you to set a starting step. Default=0.2"); m->mothurOutEndLine(); + m->mothurOut("Example nmds(phylip=yourDistanceFile).\n"); + m->mothurOut("Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourDistanceFile).\n\n"); + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "help"); + exit(1); + } +} +//********************************************************************************************************************** +NMDSCommand::~NMDSCommand(){} +//********************************************************************************************************************** +int NMDSCommand::execute(){ + try { + + if (abort == true) { return 0; } + + cout.setf(ios::fixed, ios::floatfield); + cout.setf(ios::showpoint); + cerr.setf(ios::fixed, ios::floatfield); + cerr.setf(ios::showpoint); + + vector names; + vector matrix; //seqDist = int, int, float - index of seq1 in names, index of seq2 in names, their distance + + //read in phylip file + ReadPhylipVector readFile(phylipfile); + names = readFile.read(matrix); + if (m->control_pressed) { return 0; } + + //randomly generate the starting configuration - step 2 + vector< vector > axes; + if (axesfile == "") { axes = generateStartingConfiguration(names.size()); } + else { axes = readAxes(names); } + if (m->control_pressed) { return 0; } + + //sort matrix from smallest distance to largest - step 5 + sort(matrix.begin(), matrix.end(), compareSequenceDistance); + + bool stable = false; + int count = 0; + vector previousStresses; + vector< vector > previousGradient = axes; + double initialMagnitude; + m->mothurOutEndLine(); m->mothurOut("Iter\tStress\tMagnitude"); m->mothurOutEndLine(); + while ((count != maxIters) && (!stable)) { + count++; + + //normalize axes - step 3 + normalizeConfiguration(axes, names.size()); + if (m->control_pressed) { return 0; } + + //calculate Euclidean distances - step 4 + vector< vector > euclid = linearCalc.calculateEuclidianDistance(axes); + if (m->control_pressed) { return 0; } + + //order euclid elements in same order as matrix - step 6 + //if there are ties in the matrix we want to arrange the euclid distances in the best way so we do not to add unnecessary stress + vector eDists; + vector ties; + for (int i = 0; i < matrix.size(); i++) { + + seqDist temp(matrix[i].seq1, matrix[i].seq2, euclid[matrix[i].seq1][matrix[i].seq2]); + ties.push_back(temp); + + if (i != matrix.size()-1) { // you are not the last so you can look ahead + if (matrix[i].dist != matrix[i+1].dist) { // you are done with ties, sort and save them, then continue + sort(ties.begin(), ties.end(), compareSequenceDistance); + for (int k = 0; k < ties.size(); k++) { eDists.push_back(ties[k]); } + ties.clear(); + } + }else { // you are the last one + sort(ties.begin(), ties.end(), compareSequenceDistance); + for (int k = 0; k < ties.size(); k++) { eDists.push_back(ties[k]); } + } + } + + for (int i = 0; i < euclid.size(); i++) { euclid[i].clear(); } euclid.clear(); + if (m->control_pressed) { return 0; } + + //find D - from step 7 + vector D = satisfyMonotonicity(eDists); + if (m->control_pressed) { return 0; } + + //calculate the raw stress and normalize it - steps 8 and 9 + double rawStress; + double stress = calculateStress(eDists, D, rawStress); + previousStresses.push_back(stress); + if (stress == 0) { m->mothurOut("Stress reached zero after " + toString(count) + " iters, stopping."); m->mothurOutEndLine(); break; } + if (m->control_pressed) { return 0; } + + //calculate stress gradient - step 10 + vector< vector > stressGradient = calculateStressGradientVector(eDists, D, rawStress, stress, axes); + if (m->control_pressed) { return 0; } + + //calculate magnitude + double magnitude = calculateMagnitude(stressGradient); + if (count == 1) { initialMagnitude = magnitude; } + if (m->control_pressed) { return 0; } + + //save gradient before adjusting config. + previousGradient = stressGradient; + + if ((count % 100) == 0) { m->mothurOut(toString(count) + "\t" + toString(previousStresses[previousStresses.size()-1]) + "\t" + toString(magnitude)); m->mothurOutEndLine(); } + + //are we done - we are done if percentage of magnitude compared to initial magnitude is less than cutoff + double percentage = magnitude / initialMagnitude; + if (percentage < cutoff) { stable = true; } + else { + + //calculate new step size + step = calculateStep(previousGradient, stressGradient, previousStresses); + cout << "count = " << count << '\t' << step << endl; + if (m->control_pressed) { return 0; } + + //find new config. + axes = calculateNewConfiguration(magnitude, axes, stressGradient); + if (m->control_pressed) { return 0; } + } + } + + if (m->control_pressed) { return 0; } + + string outputFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "nmds"; + string stressFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "stress.nmds"; + outputNames.push_back(outputFileName); outputTypes["nmds"].push_back(outputFileName); + outputNames.push_back(stressFileName); outputTypes["stress"].push_back(stressFileName); + + output(outputFileName, stressFileName, previousGradient, previousStresses, names); + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "execute"); + exit(1); + } +} +//********************************************************************************************************************** +//generate random config +vector< vector > NMDSCommand::generateStartingConfiguration(int numNames) { + try { + vector< vector > axes; axes.resize(dimension); + for (int i = 0; i < axes.size(); i++) { axes[i].resize(numNames); } + + //generate random number between -1 and 1, precision 6 + for (int i = 0; i < axes.size(); i++) { + for (int j = 0; j < axes[i].size(); j++) { + + if (m->control_pressed) { return axes; } + + //generate random int between 0 and 99999 + int myrand = (int)((float)(rand()) / ((RAND_MAX / 99998) + 1)); + + //generate random sign + int mysign = (int)((float)(rand()) / ((RAND_MAX / 99998) + 1)); + + //if mysign is even then sign = positive, else sign = negative + if ((mysign % 2) == 0) { mysign = 1.0; } + else { mysign = -1.0; } + + axes[i][j] = mysign * myrand / (float) 100000; + } + } + + return axes; + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "generateStartingConfiguration"); + exit(1); + } +} +//********************************************************************************************************************** +//normalize configuration +int NMDSCommand::normalizeConfiguration(vector< vector >& axes, int numNames) { + try { + vector averageAxes; averageAxes.resize(dimension, 0.0); + + //find average + for (int i = 0; i < axes.size(); i++) { + for (int j = 0; j < axes[i].size(); j++) { averageAxes[i] += axes[i][j]; } + + averageAxes[i] /= (float) numNames; + } + + //normalize axes + double sumDenom = 0.0; + for (int i = 0; i < axes.size(); i++) { + for (int j = 0; j < axes[i].size(); j++) { + sumDenom += ((axes[i][j] - averageAxes[i]) * (axes[i][j] - averageAxes[i])); + } + } + + double denom = sqrt((sumDenom / (float) (axes.size() * numNames))); + + for (int i = 0; i < axes.size(); i++) { + for (int j = 0; j < axes[i].size(); j++) { + axes[i][j] = (axes[i][j] - averageAxes[i]) / denom; + } + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "normalizeConfiguration"); + exit(1); + } +} +//********************************************************************************************************************** +//adjust eDists so that it creates monotonically increasing series of succesive values that increase or stay the same, but never decrease +vector NMDSCommand::satisfyMonotonicity(vector eDists) { + try { + + vector D = eDists; + + for (int i = 0; i < (D.size()-1); i++) { + + if (m->control_pressed) { return D; } + + //is the distance in i+1 smaller than i, if yes then adjust + if (D[i+1].dist < D[i].dist) { D[i+1].dist = D[i].dist; } + } + + return D; + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "satisfyMonotonicity"); + exit(1); + } +} +//********************************************************************************************************************** +//find raw stress, and normalize using +double NMDSCommand::calculateStress(vector& eDists, vector& D, double& rawStress) { + try { + double normStress = 0.0; + double denom = 0.0; + rawStress = 0.0; + + //find raw stress + for (int i = 0; i < D.size(); i++) { + + if (m->control_pressed) { return normStress; } + + rawStress += ((eDists[i].dist - D[i].dist) * (eDists[i].dist - D[i].dist)); + denom += (eDists[i].dist * eDists[i].dist); + } + + //normalize stress + if (rawStress != 0.0) { + normStress = 100 * sqrt((rawStress / denom)); + } + + return normStress; + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "calculateStress"); + exit(1); + } +} +//********************************************************************************************************************** +vector< vector > NMDSCommand::calculateStressGradientVector(vector& eDists, vector& D, double rawStress, double stress, vector< vector >& axes) { + try { + vector< vector > gradient; gradient.resize(dimension); + for (int i = 0; i < gradient.size(); i++) { gradient[i].resize(axes[0].size(), 0.0); } + + double sumDij = 0.0; + for (int i = 0; i < eDists.size(); i++) { sumDij += (eDists[i].dist * eDists[i].dist); } + + for (int i = 0; i < eDists.size(); i++) { + + for (int j = 0; j < dimension; j++) { + + if (m->control_pressed) { return gradient; } + + double firstTerm1 = (stress / rawStress) * (eDists[i].dist - D[i].dist); + double firstTerm2 = eDists[i].dist * (stress / sumDij); + double firstTerm = firstTerm1 - firstTerm2; + + double secondTerm = (axes[j][eDists[i].seq1] - axes[j][eDists[i].seq2]) / eDists[i].dist; + + double results = (firstTerm * secondTerm); + + gradient[j][eDists[i].seq1] += results; + gradient[j][eDists[i].seq2] -= results; + } + } + + return gradient; + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "calculateStressGradientVector"); + exit(1); + } +} +//********************************************************************************************************************** +double NMDSCommand::calculateMagnitude(vector< vector >& gradient) { + try { + double magnitude = 0.0; + + double sum = 0.0; + for (int i = 0; i < gradient.size(); i++) { + for (int j = 0; j < gradient[i].size(); j++) { + sum += (gradient[i][j] * gradient[i][j]); + } + } + + magnitude = sqrt(((1.0/(float)gradient[0].size()) * sum)); + + return magnitude; + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "calculateMagnitude"); + exit(1); + } +} +//********************************************************************************************************************** +//described in Kruskal paper page 121 + 122 +double NMDSCommand::calculateStep(vector< vector >& prevGrad, vector< vector >& grad, vector& prevStress) { + try { + double newStep = step; + + //calc the cos theta + double sumNum = 0.0; + double sumDenom1 = 0.0; + double sumDenom2 = 0.0; + for (int i = 0; i < prevGrad.size(); i++) { + for (int j = 0; j < prevGrad[i].size(); j++) { + sumDenom1 += (grad[i][j] * grad[i][j]); + sumDenom2 += (prevGrad[i][j] * prevGrad[i][j]); + sumNum += (grad[i][j] * prevGrad[i][j]); + } + } + + double cosTheta = sumNum / (sqrt(sumDenom1) * sqrt(sumDenom2)); + cosTheta *= cosTheta; + + //calc angle factor + double angle = pow(4.0, cosTheta); + + //calc 5 step ratio + double currentStress = prevStress[prevStress.size()-1]; + double lastStress = prevStress[0]; + if (prevStress.size() > 1) { lastStress = prevStress[prevStress.size()-2]; } + double fivePrevStress = prevStress[0]; + if (prevStress.size() > 5) { fivePrevStress = prevStress[prevStress.size()-6]; } + + double fiveStepRatio = min(1.0, (currentStress / fivePrevStress)); + + //calc relaxation factor + double relaxation = 1.3 / (1.0 + pow(fiveStepRatio, 5.0)); + + //calc good luck factor + double goodLuck = min(1.0, (currentStress / lastStress)); + + //calc newStep + cout << "\ncos = " << cosTheta << " step = " << step << " angle = " << angle << " relaxation = " << relaxation << " goodluck = " << goodLuck << endl; + newStep = step * angle * relaxation * goodLuck; + + return newStep; + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "calculateStep"); + exit(1); + } +} +//********************************************************************************************************************** +vector< vector > NMDSCommand::calculateNewConfiguration(double magnitude, vector< vector >& axes, vector< vector >& gradient) { + try { + + vector< vector > newAxes = axes; + + for (int i = 0; i < newAxes.size(); i++) { + + if (m->control_pressed) { return newAxes; } + + for (int j = 0; j < newAxes[i].size(); j++) { + newAxes[i][j] = axes[i][j] + ((step / magnitude) * gradient[i][j]); + } + } + + return newAxes; + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "calculateNewConfiguration"); + exit(1); + } +} +//********************************************************************************************************************** +int NMDSCommand::output(string outputFileName, string stressFileName, vector< vector >& config, vector& stresses, vector& names) { + try { + + ofstream out, out2; + m->openOutputFile(outputFileName, out); + m->openOutputFile(stressFileName, out2); + + //output headers + out << "group\t"; + for (int i = 0; i < dimension; i++) { out << "axis" << (i+1) << '\t'; } + out << endl; + + out2 << "Iter\tStress" << endl; + + //output nmds file + for (int i = 0; i < config[0].size(); i++) { + + if (m->control_pressed) { out.close(); out2.close(); return 0; } + + out << names[i] << '\t'; + + for (int j = 0; j < config.size(); j++) { + out << config[j][i] << '\t'; + } + + out << endl; + } + out.close(); + + //output stress file + for (int j = 0; j < stresses.size(); j++) { + if (m->control_pressed) { out2.close(); return 0; } + + out2 << (j+1) << '\t' << stresses[j] << endl; + } + out2.close(); + + + return 0; + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "output"); + exit(1); + } +} +/*****************************************************************/ +vector< vector > NMDSCommand::readAxes(vector names){ + try { + vector< vector > axes; + + ifstream in; + m->openInputFile(axesfile, in); + + string headerLine = m->getline(in); m->gobble(in); + + //count the number of axis you are reading + bool done = false; + int count = 0; + while (!done) { + int pos = headerLine.find("axis"); + if (pos != string::npos) { + count++; + headerLine = headerLine.substr(pos+4); + }else { done = true; } + } + + if (dimension > count) { m->mothurOut("You requested " + toString(dimension) + " axes, but your file only includes " + toString(count) + ". Using " + toString(count) + "."); m->mothurOutEndLine(); dimension = count; } + + while (!in.eof()) { + + if (m->control_pressed) { in.close(); return axes; } + + string group = ""; + in >> group; m->gobble(in); + + bool ignore = false; + if (!m->inUsersGroups(group, names)) { ignore = true; m->mothurOut(group + " is in your axes file and not in your distance file, ignoring."); m->mothurOutEndLine(); } + + vector thisGroupsAxes; + for (int i = 0; i < count; i++) { + float temp = 0.0; + in >> temp; + + //only save the axis we want + if (i < dimension) { thisGroupsAxes.push_back(temp); } + } + + if (!ignore) { axes.push_back(thisGroupsAxes); } + + m->gobble(in); + } + in.close(); + + //sanity check + if (names.size() != axes.size()) { m->mothurOut("[ERROR]: your axes file does not match your distance file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; } + + return axes; + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "readAxes"); + exit(1); + } +} +//********************************************************************************************************************** + + diff --git a/nmdscommand.h b/nmdscommand.h new file mode 100644 index 0000000..5e4e655 --- /dev/null +++ b/nmdscommand.h @@ -0,0 +1,60 @@ +#ifndef NMDSCOMMAND_H +#define NMDSCOMMAND_H + +/* + * nmdscommand.h + * mothur + * + * Created by westcott on 1/11/11. + * Copyright 2011 Schloss Lab. All rights reserved. + * + */ + +#include "command.hpp" +#include "linearalgebra.h" + + +/* references used to make this command: "Nonmetric Multidimensional Scalling: A Numerical Method" + by J. B. Kruskal Psychometrika - Vol 29, No. 2 June 1964 */ + +/*****************************************************************/ +class NMDSCommand : public Command { + +public: + NMDSCommand(string); + NMDSCommand(); + ~NMDSCommand(); + vector getRequiredParameters(); + vector getValidParameters(); + vector getRequiredFiles(); + map > getOutputFiles() { return outputTypes; } + int execute(); + void help(); + +private: + + bool abort; + string phylipfile, outputDir, axesfile; + int dimension, maxIters; + double step, cutoff; + vector outputNames; + map > outputTypes; + LinearAlgebra linearCalc; + + vector< vector > generateStartingConfiguration(int); //pass in numNames, return axes + int normalizeConfiguration(vector< vector >&, int); + vector satisfyMonotonicity(vector); + double calculateStress(vector&, vector&, double&); + vector< vector > calculateStressGradientVector(vector&, vector&, double, double, vector< vector >&); + double calculateMagnitude(vector< vector >&); + double calculateStep(vector< vector >&, vector< vector >&, vector&); + vector< vector > calculateNewConfiguration(double, vector< vector >&, vector< vector >&); + vector< vector > readAxes(vector); + int output(string, string, vector< vector >&, vector&, vector&); +}; + +/*****************************************************************/ + +#endif + + diff --git a/pcoacommand.cpp b/pcoacommand.cpp index d1d919c..6777b5a 100644 --- a/pcoacommand.cpp +++ b/pcoacommand.cpp @@ -9,6 +9,7 @@ */ #include "pcoacommand.h" +#include "readphylipvector.h" //********************************************************************************************************************** vector PCOACommand::getValidParameters(){ @@ -160,7 +161,8 @@ int PCOACommand::execute(){ fbase = outputDir + m->getRootName(m->getSimpleName(filename)); - read(filename, names, D); + ReadPhylipVector readFile(filename); + names = readFile.read(D); if (m->control_pressed) { return 0; } @@ -227,103 +229,6 @@ void PCOACommand::get_comment(istream& f, char begin, char end){ exit(1); } } - -/*********************************************************************************************************************************/ - -int PCOACommand::read_phylip(istream& f, int square_m, vector& name_list, vector >& d){ - try { - // int count1=0; - // int count2=0; - - int rank; - f >> rank; - - name_list.resize(rank); - d.resize(rank); - if(square_m == 1){ - for(int i=0;i> name_list[i]; - // cout << i << "\t" << name_list[i] << endl; - for(int j=0;jcontrol_pressed) { return 0; } - - f >> d[i][j]; - if (d[i][j] == -0.0000) - d[i][j] = 0.0000; - } - } - } - else if(square_m == 2){ - for(int i=0;i> name_list[0]; - for(int i=1;i> name_list[i]; - d[i][i]=0.0000; - for(int j=0;jcontrol_pressed) { return 0; } - f >> d[i][j]; - if (d[i][j] == -0.0000) - d[i][j] = 0.0000; - d[j][i]=d[i][j]; - } - } - } - - return 0; - } - catch(exception& e) { - m->errorOut(e, "PCOACommand", "read_phylip"); - exit(1); - } - -} - -/*********************************************************************************************************************************/ - -void PCOACommand::read(string fname, vector& names, vector >& D){ - try { - ifstream f; - m->openInputFile(fname, f); - - //check whether matrix is square - char d; - int q = 1; - int numSeqs; - string name; - - f >> numSeqs >> name; - - while((d=f.get()) != EOF){ - - //is d a number meaning its square - if(isalnum(d)){ - q = 1; - break; - } - - //is d a line return meaning its lower triangle - if(d == '\n'){ - q = 2; - break; - } - } - f.close(); - - //reopen to get back to beginning - m->openInputFile(fname, f); - read_phylip(f, q, names, D); - } - catch(exception& e) { - m->errorOut(e, "PCOACommand", "read"); - exit(1); - } -} - /*********************************************************************************************************************************/ void PCOACommand::recenter(double offset, vector > D, vector >& G){ diff --git a/pcoacommand.h b/pcoacommand.h index c62b3d6..02e562e 100644 --- a/pcoacommand.h +++ b/pcoacommand.h @@ -31,15 +31,12 @@ public: private: bool abort, metric; - string phylipfile, columnfile, namefile, format, filename, fbase, outputDir; - float cutoff, precision; + string phylipfile, filename, fbase, outputDir; vector outputNames; map > outputTypes; LinearAlgebra linearCalc; void get_comment(istream&, char, char); - int read_phylip(istream&, int, vector&, vector >&); - void read(string, vector&, vector >&); void recenter(double, vector >, vector >&); void output(string, vector, vector >&, vector); diff --git a/qualityscores.cpp b/qualityscores.cpp index aafa579..4dd5b38 100644 --- a/qualityscores.cpp +++ b/qualityscores.cpp @@ -16,6 +16,7 @@ QualityScores::QualityScores(){ m = MothurOut::getInstance(); seqName = ""; seqLength = -1; + } catch(exception& e) { m->errorOut(e, "QualityScores", "QualityScores"); @@ -197,9 +198,12 @@ bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){ } } + //every score passed + if (end == (seqLength-1)) { end = seqLength; } + sequence.setUnaligned(rawSequence.substr(0,end)); trimQScores(-1, end); - + return 1; } catch(exception& e) { @@ -237,9 +241,11 @@ bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshol if(end == -1){ end = seqLength; } + sequence.setUnaligned(rawSequence.substr(0,end)); trimQScores(-1, end); + return 1; } catch(exception& e) { @@ -286,6 +292,7 @@ bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int if(end == -1){ end = seqLength; } + sequence.setUnaligned(rawSequence.substr(0,end)); trimQScores(-1, end); diff --git a/readphylipvector.cpp b/readphylipvector.cpp new file mode 100644 index 0000000..f8907cc --- /dev/null +++ b/readphylipvector.cpp @@ -0,0 +1,184 @@ +/* + * readphylipvector.cpp + * mothur + * + * Created by westcott on 1/11/11. + * Copyright 2011 Schloss Lab. All rights reserved. + * + */ + +#include "readphylipvector.h" + +/***********************************************************************/ +ReadPhylipVector::ReadPhylipVector(string d) { + try { + m = MothurOut::getInstance(); + distFile = d; + } + catch(exception& e) { + m->errorOut(e, "ReadPhylipVector", "ReadPhylipVector"); + exit(1); + } +} +/***********************************************************************/ +vector ReadPhylipVector::read(vector< vector >& matrix) { + try { + vector names; + + ifstream in; + m->openInputFile(distFile, in); + + //check whether matrix is square + char d; + int square = 1; + int numSeqs; + string name; + + in >> numSeqs >> name; + + while((d=in.get()) != EOF){ + + //is d a number meaning its square + if(isalnum(d)){ + square = 1; + break; + } + + //is d a line return meaning its lower triangle + if(d == '\n'){ + square = 2; + break; + } + } + in.close(); + + + //reopen and read now that you know whether you are square + ifstream f; + m->openInputFile(distFile, f); + + int rank; + f >> rank; + + names.resize(rank); + matrix.resize(rank); + if(square == 1){ + for(int i=0;i> names[i]; + for(int j=0;jcontrol_pressed) { return names; } + + f >> matrix[i][j]; + if (matrix[i][j] == -0.0000) + matrix[i][j] = 0.0000; + } + } + } + else if(square == 2){ + for(int i=0;i> names[0]; + for(int i=1;i> names[i]; + matrix[i][i]=0.0000; + for(int j=0;jcontrol_pressed) { return names; } + f >> matrix[i][j]; + if (matrix[i][j] == -0.0000) + matrix[i][j] = 0.0000; + matrix[j][i]=matrix[i][j]; + } + } + } + + return names; + } + catch(exception& e) { + m->errorOut(e, "ReadPhylipVector", "read"); + exit(1); + } +} +/***********************************************************************/ +vector ReadPhylipVector::read(vector& matrix) { + try { + vector names; + + ifstream in; + m->openInputFile(distFile, in); + + //check whether matrix is square + char d; + int square = 1; + int numSeqs; + string name; + + in >> numSeqs >> name; + + while((d=in.get()) != EOF){ + + //is d a number meaning its square + if(isalnum(d)){ + square = 1; + break; + } + + //is d a line return meaning its lower triangle + if(d == '\n'){ + square = 2; + break; + } + } + in.close(); + + + //reopen and read now that you know whether you are square + ifstream f; + m->openInputFile(distFile, f); + + int rank; + float temp; + f >> rank; + + names.resize(rank); + if(square == 1){ + for(int i=0;i> names[i]; + for(int j=0;jcontrol_pressed) { return names; } + + f >> temp; + + if (j < i) { //only save lt + seqDist dist(i, j, temp); + matrix.push_back(dist); + } + } + } + } + else if(square == 2){ + f >> names[0]; + for(int i=1;i> names[i]; + for(int j=0;jcontrol_pressed) { return names; } + f >> temp; + seqDist dist(i, j, temp); + matrix.push_back(dist); + } + } + } + + return names; + } + catch(exception& e) { + m->errorOut(e, "ReadPhylipVector", "read"); + exit(1); + } +} +/***********************************************************************/ + + diff --git a/readphylipvector.h b/readphylipvector.h new file mode 100644 index 0000000..aa056c7 --- /dev/null +++ b/readphylipvector.h @@ -0,0 +1,34 @@ +#ifndef READPHYLIPVECTOR_H +#define READPHYLIPVECTOR_H + +/* + * readphylipvector.h + * mothur + * + * Created by westcott on 1/11/11. + * Copyright 2011 Schloss Lab. All rights reserved. + * + */ + + +#include "mothur.h" +#include "mothurout.h" + +/******************************************************/ + +class ReadPhylipVector { + +public: + ReadPhylipVector(string); //phylipfile - lt or square + ~ReadPhylipVector() {} + vector read(vector< vector >&); //pass in matrix to fill with values, returns vector of strings containing names in phylipfile + vector read(vector&); //pass in matrix to fill with values, returns vector of strings containing names in phylipfile + +private: + string distFile; + MothurOut* m; +}; + +/******************************************************/ + +#endif diff --git a/treegroupscommand.cpp b/treegroupscommand.cpp index 64bcd36..6266595 100644 --- a/treegroupscommand.cpp +++ b/treegroupscommand.cpp @@ -669,6 +669,21 @@ int TreeGroupCommand::process(vector thisLookup) { } } + //createdistance file from simMatrix + /*string o = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist"; + ofstream outDist; + m->openOutputFile(o, outDist); + outDist << simMatrix.size() << endl; + for (int k = 0; k < simMatrix.size(); k++) { + outDist << thisLookup[k]->getGroup() << '\t'; + for (int l = 0; l < k; l++) { + outDist << (1.0-simMatrix[k][l]) << '\t'; + } + outDist << endl; + } + outDist.close();*/ + + if (m->control_pressed) { return 1; } //creates tree from similarity matrix and write out file createTree(); diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index d5df473..2dc0779 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -556,16 +556,16 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string if(qFileName != ""){ + int origLength = currSeq.getNumBases(); if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); } else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); } else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); } else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); } else { success = 1; } - -// if (qtrim == 1 && (origSeq.length() != currSeq.getUnaligned().length())) { -// success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap -// } + + //you don't want to trim, if it fails above then scrap it + if ((!qtrim) && (origLength != currSeq.getNumBases())) { success = 0; } if(!success) { trashCode += 'q'; } }