From c921bbf0623d5200f69b5d83b3c70ea533c69412 Mon Sep 17 00:00:00 2001 From: westcott Date: Wed, 19 Jan 2011 18:31:21 +0000 Subject: [PATCH] working on nmds command --- nmdscommand.cpp | 743 +++++++++++++++++++++++++++++------------------- nmdscommand.h | 41 ++- 2 files changed, 475 insertions(+), 309 deletions(-) diff --git a/nmdscommand.cpp b/nmdscommand.cpp index a9d361d..9984247 100644 --- a/nmdscommand.cpp +++ b/nmdscommand.cpp @@ -13,7 +13,7 @@ //********************************************************************************************************************** vector NMDSCommand::getValidParameters(){ try { - string Array[] = {"phylip","axes","dimension","maxiters","step","outputdir","inputdir"}; + string Array[] = {"phylip","axes","mindim","maxdim","iters","maxiters","trace","epsilon","outputdir","inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); return myArray; } @@ -70,7 +70,7 @@ NMDSCommand::NMDSCommand(string option) { else { //valid paramters for this command - string Array[] = {"phylip","axes","dimension","maxiters","step","outputdir", "inputdir"}; + string Array[] = {"phylip","axes","mindim","maxdim","iters","maxiters","trace","epsilon","outputdir", "inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -125,18 +125,26 @@ NMDSCommand::NMDSCommand(string option) { outputDir += m->hasPath(phylipfile); //if user entered a file with a path then preserve it } - string temp = validParameter.validFile(parameters, "dimension", false); if (temp == "not found") { temp = "2"; } - convert(temp, dimension); + string temp = validParameter.validFile(parameters, "mindim", false); if (temp == "not found") { temp = "1"; } + convert(temp, mindim); - temp = validParameter.validFile(parameters, "maxiters", false); if (temp == "not found") { temp = "1000"; } + temp = validParameter.validFile(parameters, "maxiters", false); if (temp == "not found") { temp = "500"; } convert(temp, maxIters); - temp = validParameter.validFile(parameters, "step", false); if (temp == "not found") { temp = "0.2"; } - convert(temp, step); + temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "10"; } + convert(temp, iters); - temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "2"; } - convert(temp, cutoff); - cutoff /= 100.0; + temp = validParameter.validFile(parameters, "maxdim", false); if (temp == "not found") { temp = "2"; } + convert(temp, maxdim); + + temp = validParameter.validFile(parameters, "epsilon", false); if (temp == "not found") { temp = "0.000000000001"; } + convert(temp, epsilon); + + temp = validParameter.validFile(parameters, "trace", false); if (temp == "not found") { temp = "F"; } + trace = m->isTrue(temp); + + if (mindim < 1) { m->mothurOut("mindim must be at least 1."); m->mothurOutEndLine(); abort = true; } + if (maxdim < mindim) { m->mothurOut("maxdim must be greater than mindim."); m->mothurOutEndLine(); abort = true; } } } @@ -148,14 +156,16 @@ NMDSCommand::NMDSCommand(string option) { //********************************************************************************************************************** void NMDSCommand::help(){ try { - - m->mothurOut("The nmds command parameters are phylip, axes, dimension, maxiters, cutoff and step."); m->mothurOutEndLine(); + m->mothurOut("The nmds command is modelled after the nmds code written in R by Sarah Goslee, using Non-metric multidimensional scaling function using the majorization algorithm from Borg & Groenen 1997, Modern Multidimensional Scaling."); m->mothurOutEndLine(); + m->mothurOut("The nmds command parameters are phylip, axes, mindim, maxdim, maxiters, iters, epsilon and trace."); m->mothurOutEndLine(); m->mothurOut("The phylip parameter allows you to enter your distance file."); m->mothurOutEndLine(); m->mothurOut("The axes parameter allows you to enter a file containing a starting configuration."); m->mothurOutEndLine(); - m->mothurOut("The dimension parameter allows you to select how many dimensions to use. Default=2"); m->mothurOutEndLine(); - m->mothurOut("The maxiters parameter allows you to select the maximum number of iters to try. Default=1000"); m->mothurOutEndLine(); - m->mothurOut("The cutoff parameter allows you to select set an acceptable percentage of magnitude. Default=2, meaning when magnitude of g reaches 2% of it's starting value the process will stop."); m->mothurOutEndLine(); - m->mothurOut("The step parameter allows you to set a starting step. Default=0.2"); m->mothurOutEndLine(); + m->mothurOut("The maxdim parameter allows you to select how maximum dimensions to use. Default=2"); m->mothurOutEndLine(); + m->mothurOut("The mindim parameter allows you to select how minimum dimensions to use. Default=1"); m->mothurOutEndLine(); + m->mothurOut("The maxiters parameter allows you to select the maximum number of iters to try with each random configuration. Default=500"); m->mothurOutEndLine(); + m->mothurOut("The iters parameter allows you to select the number of random configuration to try. Default=10"); m->mothurOutEndLine(); + m->mothurOut("The epsilon parameter allows you to select set an acceptable stopping point. Default=1e-12."); m->mothurOutEndLine(); + m->mothurOut("The trace parameter allows you to see the output after each iter. Default=F"); m->mothurOutEndLine(); m->mothurOut("Example nmds(phylip=yourDistanceFile).\n"); m->mothurOut("Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourDistanceFile).\n\n"); } @@ -174,117 +184,76 @@ int NMDSCommand::execute(){ cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); - cerr.setf(ios::fixed, ios::floatfield); - cerr.setf(ios::showpoint); vector names; - vector matrix; //seqDist = int, int, float - index of seq1 in names, index of seq2 in names, their distance + vector< vector< double> > matrix; //read in phylip file ReadPhylipVector readFile(phylipfile); names = readFile.read(matrix); if (m->control_pressed) { return 0; } - - //randomly generate the starting configuration - step 2 - vector< vector > axes; - if (axesfile == "") { axes = generateStartingConfiguration(names.size()); } - else { axes = readAxes(names); } - if (m->control_pressed) { return 0; } - //sort matrix from smallest distance to largest - step 5 - sort(matrix.begin(), matrix.end(), compareSequenceDistance); + //read axes + vector< vector > axes; + if (axesfile != "") { axes = readAxes(names); } - bool stable = false; - int count = 0; - vector previousStresses; - vector< vector > previousGradient = axes; - double initialMagnitude; - m->mothurOutEndLine(); m->mothurOut("Iter\tStress\tMagnitude"); m->mothurOutEndLine(); - while ((count != maxIters) && (!stable)) { - count++; - - //normalize axes - step 3 - normalizeConfiguration(axes, names.size()); - if (m->control_pressed) { return 0; } + for (int i = mindim; i <= maxdim; i++) { + m->mothurOut("Processing Dimension: " + toString(i)); m->mothurOutEndLine(); - //calculate Euclidean distances - step 4 - vector< vector > euclid = linearCalc.calculateEuclidianDistance(axes); - if (m->control_pressed) { return 0; } + string outputFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "dim" + toString(i) + ".nmds"; + string stressFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "dim" + toString(i) + ".stress.nmds"; + outputNames.push_back(outputFileName); outputTypes["nmds"].push_back(outputFileName); + outputNames.push_back(stressFileName); outputTypes["stress"].push_back(stressFileName); - //order euclid elements in same order as matrix - step 6 - //if there are ties in the matrix we want to arrange the euclid distances in the best way so we do not to add unnecessary stress - vector eDists; - vector ties; - for (int i = 0; i < matrix.size(); i++) { - - seqDist temp(matrix[i].seq1, matrix[i].seq2, euclid[matrix[i].seq1][matrix[i].seq2]); - ties.push_back(temp); - - if (i != matrix.size()-1) { // you are not the last so you can look ahead - if (matrix[i].dist != matrix[i+1].dist) { // you are done with ties, sort and save them, then continue - sort(ties.begin(), ties.end(), compareSequenceDistance); - for (int k = 0; k < ties.size(); k++) { eDists.push_back(ties[k]); } - ties.clear(); - } - }else { // you are the last one - sort(ties.begin(), ties.end(), compareSequenceDistance); - for (int k = 0; k < ties.size(); k++) { eDists.push_back(ties[k]); } - } - } + ofstream out, out2; + m->openOutputFile(outputFileName, out); + m->openOutputFile(stressFileName, out2); - for (int i = 0; i < euclid.size(); i++) { euclid[i].clear(); } euclid.clear(); - if (m->control_pressed) { return 0; } + out2.setf(ios::fixed, ios::floatfield); + out2.setf(ios::showpoint); + out.setf(ios::fixed, ios::floatfield); + out.setf(ios::showpoint); - //find D - from step 7 - vector D = satisfyMonotonicity(eDists); - if (m->control_pressed) { return 0; } + out2 << "Iter\tStress\tCorr" << endl; - //calculate the raw stress and normalize it - steps 8 and 9 - double rawStress; - double stress = calculateStress(eDists, D, rawStress); - previousStresses.push_back(stress); - if (stress == 0) { m->mothurOut("Stress reached zero after " + toString(count) + " iters, stopping."); m->mothurOutEndLine(); break; } - if (m->control_pressed) { return 0; } - - //calculate stress gradient - step 10 - vector< vector > stressGradient = calculateStressGradientVector(eDists, D, rawStress, stress, axes); - if (m->control_pressed) { return 0; } - - //calculate magnitude - double magnitude = calculateMagnitude(stressGradient); - if (count == 1) { initialMagnitude = magnitude; } - if (m->control_pressed) { return 0; } - - //save gradient before adjusting config. - previousGradient = stressGradient; - - if ((count % 100) == 0) { m->mothurOut(toString(count) + "\t" + toString(previousStresses[previousStresses.size()-1]) + "\t" + toString(magnitude)); m->mothurOutEndLine(); } + for (int j = 0; j < iters; j++) { + if (trace) { m->mothurOut(toString(j+1)); m->mothurOutEndLine(); } + + //get configuration - either randomly generate or resize to this dimension + vector< vector > thisConfig; + if (axesfile == "") { thisConfig = generateStartingConfiguration(names.size(), i); } + else { thisConfig = getConfiguration(axes, i); } + if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { remove(outputNames[k].c_str()); } return 0; } + + //calc nmds for this dimension + double stress; + vector< vector > endConfig = nmdsCalc(matrix, thisConfig, stress); + if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { remove(outputNames[k].c_str()); } return 0; } + + //calc euclid distances for new config + vector< vector > newEuclid = linearCalc.calculateEuclidianDistance(endConfig); + if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { remove(outputNames[k].c_str()); } return 0; } + + //calc correlation between original distances and euclidean distances from this config + double corr = linearCalc.calcPearson(matrix, newEuclid); + corr *= corr; + if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { remove(outputNames[k].c_str()); } return 0; } + + //output results + out << "Config" << (j+1) << '\t'; + for (int k = 0; k < i; k++) { out << "X" << (k+1) << '\t'; } + out << endl; + out2 << (j+1) << '\t' << stress << '\t' << corr << endl; + + output(endConfig, names, out); + + if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { remove(outputNames[k].c_str()); } return 0; } - //are we done - we are done if percentage of magnitude compared to initial magnitude is less than cutoff - double percentage = magnitude / initialMagnitude; - if (percentage < cutoff) { stable = true; } - else { - - //calculate new step size - step = calculateStep(previousGradient, stressGradient, previousStresses); - cout << "count = " << count << '\t' << step << endl; - if (m->control_pressed) { return 0; } - - //find new config. - axes = calculateNewConfiguration(magnitude, axes, stressGradient); - if (m->control_pressed) { return 0; } } + + out.close(); out2.close(); } - if (m->control_pressed) { return 0; } - - string outputFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "nmds"; - string stressFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "stress.nmds"; - outputNames.push_back(outputFileName); outputTypes["nmds"].push_back(outputFileName); - outputNames.push_back(stressFileName); outputTypes["stress"].push_back(stressFileName); - - output(outputFileName, stressFileName, previousGradient, previousStresses, names); - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } m->mothurOutEndLine(); @@ -299,9 +268,77 @@ int NMDSCommand::execute(){ exit(1); } } +//********************************************************************************************************************** +vector< vector > NMDSCommand::nmdsCalc(vector< vector >& matrix, vector< vector >& config, double& stress1) { + try { + + vector< vector > newConfig = config; + + //calc euclid distances + vector< vector > euclid = linearCalc.calculateEuclidianDistance(newConfig); + if (m->control_pressed) { return newConfig; } + + double stress2 = calculateStress(matrix, euclid); + stress1 = stress2 + 1.0 + epsilon; + + if (trace) { m->mothurOutEndLine(); m->mothurOut("Iter\tStress"); m->mothurOutEndLine(); } + + int count = 0; + while ((count < maxIters) && (abs(stress1 - stress2) > epsilon)) { + count++; + + stress1 = stress2; + + if (m->control_pressed) { return newConfig; } + + vector< vector > b; b.resize(euclid.size()); + for (int i = 0; i < b.size(); i++) { b[i].resize(euclid[i].size(), 0.0); } + + vector columnSums; columnSums.resize(euclid.size(), 0.0); + for (int i = 0; i < euclid.size(); i++) { + for (int j = 0; j < euclid[i].size(); j++) { + //eliminate divide by zero error + if (euclid[i][j] != 0) { + b[i][j] = matrix[i][j] / euclid[i][j]; + columnSums[j] += b[i][j]; + b[i][j] *= -1.0; + } + } + } + + //put in diagonal sums + for (int i = 0; i < euclid.size(); i++) { b[i][i] = columnSums[i]; } + + int numInLowerTriangle = matrix.size() * (matrix.size()-1) / 2.0; + double n = (1.0 + sqrt(1.0 + 8.0 * numInLowerTriangle)) / 2.0; + + //matrix mult + newConfig = linearCalc.matrix_mult(newConfig, b); + for (int i = 0; i < newConfig.size(); i++) { + for (int j = 0; j < newConfig[i].size(); j++) { + newConfig[i][j] *= (1.0 / n); + } + } + + euclid = linearCalc.calculateEuclidianDistance(newConfig); + + stress2 = calculateStress(matrix, euclid); + + if (trace) { m->mothurOut(count + "\t" + toString(stress1)); m->mothurOutEndLine(); } + + } + + return newConfig; + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "generateStartingConfiguration"); + exit(1); + } +} + //********************************************************************************************************************** //generate random config -vector< vector > NMDSCommand::generateStartingConfiguration(int numNames) { +vector< vector > NMDSCommand::generateStartingConfiguration(int numNames, int dimension) { try { vector< vector > axes; axes.resize(dimension); for (int i = 0; i < axes.size(); i++) { axes[i].resize(numNames); } @@ -335,7 +372,7 @@ vector< vector > NMDSCommand::generateStartingConfiguration(int numNames } //********************************************************************************************************************** //normalize configuration -int NMDSCommand::normalizeConfiguration(vector< vector >& axes, int numNames) { +int NMDSCommand::normalizeConfiguration(vector< vector >& axes, int numNames, int dimension) { try { vector averageAxes; averageAxes.resize(dimension, 0.0); @@ -370,49 +407,45 @@ int NMDSCommand::normalizeConfiguration(vector< vector >& axes, int numN } } //********************************************************************************************************************** -//adjust eDists so that it creates monotonically increasing series of succesive values that increase or stay the same, but never decrease -vector NMDSCommand::satisfyMonotonicity(vector eDists) { +//get configuration +vector< vector > NMDSCommand::getConfiguration(vector< vector >& axes, int dimension) { try { + vector< vector > newAxes; newAxes.resize(dimension); - vector D = eDists; - - for (int i = 0; i < (D.size()-1); i++) { - - if (m->control_pressed) { return D; } - - //is the distance in i+1 smaller than i, if yes then adjust - if (D[i+1].dist < D[i].dist) { D[i+1].dist = D[i].dist; } + for (int i = 0; i < dimension; i++) { + newAxes[i] = axes[i]; } - - return D; + + return newAxes; } catch(exception& e) { - m->errorOut(e, "NMDSCommand", "satisfyMonotonicity"); + m->errorOut(e, "NMDSCommand", "getConfiguration"); exit(1); } } //********************************************************************************************************************** //find raw stress, and normalize using -double NMDSCommand::calculateStress(vector& eDists, vector& D, double& rawStress) { +double NMDSCommand::calculateStress(vector< vector >& matrix, vector< vector >& config) { try { double normStress = 0.0; double denom = 0.0; - rawStress = 0.0; + double rawStress = 0.0; //find raw stress - for (int i = 0; i < D.size(); i++) { - - if (m->control_pressed) { return normStress; } - - rawStress += ((eDists[i].dist - D[i].dist) * (eDists[i].dist - D[i].dist)); - denom += (eDists[i].dist * eDists[i].dist); + for (int i = 0; i < matrix.size(); i++) { + for (int j = 0; j < matrix[i].size(); j++) { + if (m->control_pressed) { return normStress; } + + rawStress += ((matrix[i][j] - config[i][j]) * (matrix[i][j] - config[i][j])); + denom += (config[i][j] * config[i][j]); + } } //normalize stress - if (rawStress != 0.0) { - normStress = 100 * sqrt((rawStress / denom)); + if ((rawStress != 0.0) && (denom != 0.0)) { + normStress = sqrt((rawStress / denom)); } - + return normStress; } catch(exception& e) { @@ -420,153 +453,12 @@ double NMDSCommand::calculateStress(vector& eDists, vector& D, exit(1); } } + //********************************************************************************************************************** -vector< vector > NMDSCommand::calculateStressGradientVector(vector& eDists, vector& D, double rawStress, double stress, vector< vector >& axes) { - try { - vector< vector > gradient; gradient.resize(dimension); - for (int i = 0; i < gradient.size(); i++) { gradient[i].resize(axes[0].size(), 0.0); } - - double sumDij = 0.0; - for (int i = 0; i < eDists.size(); i++) { sumDij += (eDists[i].dist * eDists[i].dist); } - - for (int i = 0; i < eDists.size(); i++) { - - for (int j = 0; j < dimension; j++) { - - if (m->control_pressed) { return gradient; } - - double firstTerm1 = (stress / rawStress) * (eDists[i].dist - D[i].dist); - double firstTerm2 = eDists[i].dist * (stress / sumDij); - double firstTerm = firstTerm1 - firstTerm2; - - double secondTerm = (axes[j][eDists[i].seq1] - axes[j][eDists[i].seq2]) / eDists[i].dist; - - double results = (firstTerm * secondTerm); - - gradient[j][eDists[i].seq1] += results; - gradient[j][eDists[i].seq2] -= results; - } - } - - return gradient; - } - catch(exception& e) { - m->errorOut(e, "NMDSCommand", "calculateStressGradientVector"); - exit(1); - } -} -//********************************************************************************************************************** -double NMDSCommand::calculateMagnitude(vector< vector >& gradient) { - try { - double magnitude = 0.0; - - double sum = 0.0; - for (int i = 0; i < gradient.size(); i++) { - for (int j = 0; j < gradient[i].size(); j++) { - sum += (gradient[i][j] * gradient[i][j]); - } - } - - magnitude = sqrt(((1.0/(float)gradient[0].size()) * sum)); - - return magnitude; - } - catch(exception& e) { - m->errorOut(e, "NMDSCommand", "calculateMagnitude"); - exit(1); - } -} -//********************************************************************************************************************** -//described in Kruskal paper page 121 + 122 -double NMDSCommand::calculateStep(vector< vector >& prevGrad, vector< vector >& grad, vector& prevStress) { - try { - double newStep = step; - - //calc the cos theta - double sumNum = 0.0; - double sumDenom1 = 0.0; - double sumDenom2 = 0.0; - for (int i = 0; i < prevGrad.size(); i++) { - for (int j = 0; j < prevGrad[i].size(); j++) { - sumDenom1 += (grad[i][j] * grad[i][j]); - sumDenom2 += (prevGrad[i][j] * prevGrad[i][j]); - sumNum += (grad[i][j] * prevGrad[i][j]); - } - } - - double cosTheta = sumNum / (sqrt(sumDenom1) * sqrt(sumDenom2)); - cosTheta *= cosTheta; - - //calc angle factor - double angle = pow(4.0, cosTheta); - - //calc 5 step ratio - double currentStress = prevStress[prevStress.size()-1]; - double lastStress = prevStress[0]; - if (prevStress.size() > 1) { lastStress = prevStress[prevStress.size()-2]; } - double fivePrevStress = prevStress[0]; - if (prevStress.size() > 5) { fivePrevStress = prevStress[prevStress.size()-6]; } - - double fiveStepRatio = min(1.0, (currentStress / fivePrevStress)); - - //calc relaxation factor - double relaxation = 1.3 / (1.0 + pow(fiveStepRatio, 5.0)); - - //calc good luck factor - double goodLuck = min(1.0, (currentStress / lastStress)); - - //calc newStep - cout << "\ncos = " << cosTheta << " step = " << step << " angle = " << angle << " relaxation = " << relaxation << " goodluck = " << goodLuck << endl; - newStep = step * angle * relaxation * goodLuck; - - return newStep; - } - catch(exception& e) { - m->errorOut(e, "NMDSCommand", "calculateStep"); - exit(1); - } -} -//********************************************************************************************************************** -vector< vector > NMDSCommand::calculateNewConfiguration(double magnitude, vector< vector >& axes, vector< vector >& gradient) { - try { - - vector< vector > newAxes = axes; - - for (int i = 0; i < newAxes.size(); i++) { - - if (m->control_pressed) { return newAxes; } - - for (int j = 0; j < newAxes[i].size(); j++) { - newAxes[i][j] = axes[i][j] + ((step / magnitude) * gradient[i][j]); - } - } - - return newAxes; - } - catch(exception& e) { - m->errorOut(e, "NMDSCommand", "calculateNewConfiguration"); - exit(1); - } -} -//********************************************************************************************************************** -int NMDSCommand::output(string outputFileName, string stressFileName, vector< vector >& config, vector& stresses, vector& names) { +int NMDSCommand::output(vector< vector >& config, vector& names, ofstream& out) { try { - ofstream out, out2; - m->openOutputFile(outputFileName, out); - m->openOutputFile(stressFileName, out2); - - //output headers - out << "group\t"; - for (int i = 0; i < dimension; i++) { out << "axis" << (i+1) << '\t'; } - out << endl; - - out2 << "Iter\tStress" << endl; - - //output nmds file - for (int i = 0; i < config[0].size(); i++) { - - if (m->control_pressed) { out.close(); out2.close(); return 0; } + for (int i = 0; i < names.size(); i++) { out << names[i] << '\t'; @@ -576,17 +468,9 @@ int NMDSCommand::output(string outputFileName, string stressFileName, vector< ve out << endl; } - out.close(); - //output stress file - for (int j = 0; j < stresses.size(); j++) { - if (m->control_pressed) { out2.close(); return 0; } + out << endl << endl; - out2 << (j+1) << '\t' << stresses[j] << endl; - } - out2.close(); - - return 0; } catch(exception& e) { @@ -597,8 +481,6 @@ int NMDSCommand::output(string outputFileName, string stressFileName, vector< ve /*****************************************************************/ vector< vector > NMDSCommand::readAxes(vector names){ try { - vector< vector > axes; - ifstream in; m->openInputFile(axesfile, in); @@ -615,7 +497,17 @@ vector< vector > NMDSCommand::readAxes(vector names){ }else { done = true; } } - if (dimension > count) { m->mothurOut("You requested " + toString(dimension) + " axes, but your file only includes " + toString(count) + ". Using " + toString(count) + "."); m->mothurOutEndLine(); dimension = count; } + if (maxdim > count) { + m->mothurOut("You requested maxdim = " + toString(maxdim) + ", but your file only includes " + toString(count) + ". Using " + toString(count) + "."); m->mothurOutEndLine(); + maxdim = count; + if (maxdim < mindim) { m->mothurOut("Also adjusting mindim to " + toString(maxdim-1) + "."); m->mothurOutEndLine(); } + } + + vector< vector > axes; axes.resize(maxdim); + for (int i = 0; i < axes.size(); i++) { axes[i].resize(names.size(), 0.0); } + + map > orderedAxes; + map >::iterator it; while (!in.eof()) { @@ -633,17 +525,31 @@ vector< vector > NMDSCommand::readAxes(vector names){ in >> temp; //only save the axis we want - if (i < dimension) { thisGroupsAxes.push_back(temp); } + if (i < maxdim) { thisGroupsAxes.push_back(temp); } } - if (!ignore) { axes.push_back(thisGroupsAxes); } + if (!ignore) { orderedAxes[group] = thisGroupsAxes; } m->gobble(in); } in.close(); - + //sanity check - if (names.size() != axes.size()) { m->mothurOut("[ERROR]: your axes file does not match your distance file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; } + if (names.size() != orderedAxes.size()) { m->mothurOut("[ERROR]: your axes file does not match your distance file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; return axes; } + + //put axes info in same order as distance file, just in case + for (int i = 0; i < names.size(); i++) { + it = orderedAxes.find(names[i]); + + if (it != orderedAxes.end()) { + vector thisGroupsAxes = it->second; + + for (int j = 0; j < thisGroupsAxes.size(); j++) { + axes[j][i] = thisGroupsAxes[j]; + } + + }else { m->mothurOut("[ERROR]: your axes file does not match your distance file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; return axes; } + } return axes; } @@ -652,6 +558,253 @@ vector< vector > NMDSCommand::readAxes(vector names){ exit(1); } } +/********************************************************************************************************************** + vector< vector > NMDSCommand::calculateStressGradientVector(vector& eDists, vector& D, double rawStress, double stress, vector< vector >& axes) { + try { + vector< vector > gradient; gradient.resize(dimension); + for (int i = 0; i < gradient.size(); i++) { gradient[i].resize(axes[0].size(), 0.0); } + + double sumDij = 0.0; + for (int i = 0; i < eDists.size(); i++) { sumDij += (eDists[i].dist * eDists[i].dist); } + + for (int i = 0; i < eDists.size(); i++) { + + for (int j = 0; j < dimension; j++) { + + if (m->control_pressed) { return gradient; } + + double firstTerm1 = (stress / rawStress) * (eDists[i].dist - D[i].dist); + double firstTerm2 = (stress / sumDij) * eDists[i].dist; + double firstTerm = firstTerm1 - firstTerm2; + + float r = (dimension-1.0); + double temp = 1.0 / (pow(eDists[i].dist, r)); + float absTemp = abs(axes[j][eDists[i].seq1] - axes[j][eDists[i].seq2]); + double secondTerm = pow(absTemp, r) * temp; + + double sigNum = 1.0; + if ((axes[j][eDists[i].seq1] - axes[j][eDists[i].seq2]) == 0) { sigNum = 0.0; } + else if ((axes[j][eDists[i].seq1] - axes[j][eDists[i].seq2]) < 0) { sigNum = -1.0; } + + double results = (firstTerm * secondTerm * sigNum); + cout << i << '\t' << j << '\t' << "results = " << results << endl; + gradient[j][eDists[i].seq1] += results; + gradient[j][eDists[i].seq2] -= results; + } + } + + return gradient; + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "calculateStressGradientVector"); + exit(1); + } + } + //********************************************************************************************************************** + double NMDSCommand::calculateMagnitude(vector< vector >& gradient) { + try { + double magnitude = 0.0; + + double sum = 0.0; + for (int i = 0; i < gradient.size(); i++) { + for (int j = 0; j < gradient[i].size(); j++) { + sum += (gradient[i][j] * gradient[i][j]); + } + } + + magnitude = sqrt(((1.0/(float)gradient[0].size()) * sum)); + + return magnitude; + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "calculateMagnitude"); + exit(1); + } + } + //********************************************************************************************************************** + //described in Kruskal paper page 121 + 122 + double NMDSCommand::calculateStep(vector< vector >& prevGrad, vector< vector >& grad, vector& prevStress) { + try { + double newStep = step; + + //calc the cos theta + double sumNum = 0.0; + double sumDenom1 = 0.0; + double sumDenom2 = 0.0; + for (int i = 0; i < prevGrad.size(); i++) { + for (int j = 0; j < prevGrad[i].size(); j++) { + sumDenom1 += (grad[i][j] * grad[i][j]); + sumDenom2 += (prevGrad[i][j] * prevGrad[i][j]); + sumNum += (grad[i][j] * prevGrad[i][j]); + } + } + + double cosTheta = sumNum / (sqrt(sumDenom1) * sqrt(sumDenom2)); + cosTheta *= cosTheta; + + //calc angle factor + double angle = pow(4.0, cosTheta); + + //calc 5 step ratio + double currentStress = prevStress[prevStress.size()-1]; + double lastStress = prevStress[0]; + if (prevStress.size() > 1) { lastStress = prevStress[prevStress.size()-2]; } + double fivePrevStress = prevStress[0]; + if (prevStress.size() > 5) { fivePrevStress = prevStress[prevStress.size()-6]; } + + double fiveStepRatio = min(1.0, (currentStress / fivePrevStress)); + + //calc relaxation factor + double relaxation = 1.3 / (1.0 + pow(fiveStepRatio, 5.0)); + + //calc good luck factor + double goodLuck = min(1.0, (currentStress / lastStress)); + + //calc newStep + //cout << "\ncos = " << cosTheta << " step = " << step << " angle = " << angle << " relaxation = " << relaxation << " goodluck = " << goodLuck << endl; + newStep = step * angle * relaxation * goodLuck; + + return newStep; + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "calculateStep"); + exit(1); + } + } + //********************************************************************************************************************** + vector< vector > NMDSCommand::calculateNewConfiguration(double magnitude, vector< vector >& axes, vector< vector >& gradient) { + try { + + vector< vector > newAxes = axes; + + for (int i = 0; i < newAxes.size(); i++) { + + if (m->control_pressed) { return newAxes; } + + for (int j = 0; j < newAxes[i].size(); j++) { + newAxes[i][j] = axes[i][j] + ((step / magnitude) * gradient[i][j]); + } + } + + return newAxes; + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "calculateNewConfiguration"); + exit(1); + } + }*/ +/********************************************************************************************************************** + //adjust eDists so that it creates monotonically increasing series of succesive values that increase or stay the same, but never decrease + vector NMDSCommand::satisfyMonotonicity(vector eDists, vector partitions) { + try { + + //find averages of each partitions + vector sums; sums.resize(partitions.size(), 0.0); + vector sizes; sizes.resize(partitions.size(), 0); + + for (int i = 0; i < partitions.size(); i++) { + //i is not the last one + int start = partitions[i]; + int end; + if (i != (partitions.size()-1)) { end = partitions[i+1]; } + else{ end = eDists.size(); } + + for (int j = start; j < end; j++) { sums[i] += eDists[j].dist; } + + sizes[i] = (end - start); + } + + + vector D = eDists; + + //i represents the "active block" + int i = 0; + while (i < partitions.size()) { + + if (m->control_pressed) { return D; } + + bool upActive = true; + bool upSatisfied = false; + bool downSatisfied = false; + + //while we are not done with this block + while ((!upSatisfied) || (!downSatisfied)) { + + if (upActive) { + + //are we are upSatisfied? - is the average of the next block greater than mine? + if (i != (partitions.size()-1)) { //if we are the last guy then we are upsatisfied + if ((sums[i+1]/(float)sizes[i+1]) >= (sums[i]/(float)sizes[i])) { + upSatisfied = true; + upActive = false; + }else { + //find new weighted average + double newSum = sums[i] + sums[i+1]; + + //merge blocks - putting everything in i + sums[i] = newSum; + sizes[i] += sizes[i+1]; + partitions[i] = partitions[i+1]; + + sums.erase(sums.begin()+(i+1)); + sizes.erase(sizes.begin()+(i+1)); + partitions.erase(partitions.begin()+(i+1)); + + upActive = false; + } + }else { upSatisfied = true; upActive = false; } + + }else { //downActive + + //are we are DownSatisfied? - is the average of the previous block less than mine? + if (i != 0) { //if we are the first guy then we are downSatisfied + if ((sums[i-1]/(float)sizes[i-1]) <= (sums[i]/(float)sizes[i])) { + downSatisfied = true; + upActive = true; + }else { + //find new weighted average + double newSum = sums[i] + sums[i-1];; + + //merge blocks - putting everything in i-1 + sums[i-1] = newSum; + sizes[i-1] += sizes[i]; + + sums.erase(sums.begin()+i); + sizes.erase(sizes.begin()+i); + partitions.erase(partitions.begin()+i); + i--; + + upActive = true; + } + }else { downSatisfied = true; upActive = true; } + } + } + + i++; // go to next block + } + + //sanity check - for rounding errors + vector averages; averages.resize(sums.size(), 0.0); + for (int i = 0; i < sums.size(); i++) { averages[i] = sums[i] / (float) sizes[i]; } + for (int i = 0; i < averages.size(); i++) { if (averages[i+1] < averages[i]) { averages[i+1] = averages[i]; } } + + //fill D + int placeHolder = 0; + for (int i = 0; i < averages.size(); i++) { + for (int j = 0; j < sizes[i]; j++) { + D[placeHolder].dist = averages[i]; + placeHolder++; + } + } + + return D; + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "satisfyMonotonicity"); + exit(1); + } + }*/ + //********************************************************************************************************************** diff --git a/nmdscommand.h b/nmdscommand.h index 5e4e655..d7bedcf 100644 --- a/nmdscommand.h +++ b/nmdscommand.h @@ -14,8 +14,17 @@ #include "linearalgebra.h" -/* references used to make this command: "Nonmetric Multidimensional Scalling: A Numerical Method" - by J. B. Kruskal Psychometrika - Vol 29, No. 2 June 1964 */ +/* + Translated from the nmds.R code written by Sarah Goslee using, + + # Non-metric multidimensional scaling function + # using the majorization algorithm from + # Borg & Groenen 1997, Modern Multidimensional Scaling. + # + + # also referenced (Kruskal 1964) + + */ /*****************************************************************/ class NMDSCommand : public Command { @@ -33,24 +42,28 @@ public: private: - bool abort; + bool abort, trace; string phylipfile, outputDir, axesfile; - int dimension, maxIters; - double step, cutoff; + int maxdim, mindim, maxIters, iters; + double epsilon; vector outputNames; map > outputTypes; LinearAlgebra linearCalc; - vector< vector > generateStartingConfiguration(int); //pass in numNames, return axes - int normalizeConfiguration(vector< vector >&, int); - vector satisfyMonotonicity(vector); - double calculateStress(vector&, vector&, double&); - vector< vector > calculateStressGradientVector(vector&, vector&, double, double, vector< vector >&); - double calculateMagnitude(vector< vector >&); - double calculateStep(vector< vector >&, vector< vector >&, vector&); - vector< vector > calculateNewConfiguration(double, vector< vector >&, vector< vector >&); + vector< vector > nmdsCalc(vector< vector >&, vector< vector >&, double&); + vector< vector > getConfiguration(vector< vector >&, int); + vector< vector > generateStartingConfiguration(int, int); //pass in numNames, return axes + int normalizeConfiguration(vector< vector >&, int, int); + double calculateStress(vector< vector >&, vector< vector >&); vector< vector > readAxes(vector); - int output(string, string, vector< vector >&, vector&, vector&); + int output(vector< vector >&, vector&, ofstream&); + + //vector satisfyMonotonicity(vector, vector); + //vector< vector > calculateStressGradientVector(vector&, vector&, double, double, vector< vector >&); + //double calculateMagnitude(vector< vector >&); + //double calculateStep(vector< vector >&, vector< vector >&, vector&); + //vector< vector > calculateNewConfiguration(double, vector< vector >&, vector< vector >&); + }; /*****************************************************************/ -- 2.39.2