X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=nmdscommand.cpp;h=a90ed2965888928f4a90049fe04fe888d773c450;hp=a9d361d3c6d9737ec1ef7bb09a2ac1764439d3ae;hb=a8e2df1b96a57f5f29576b08361b86a96a8eff4f;hpb=d04f948b1a2a1a2984fc4a45d04403b8c121c5bc diff --git a/nmdscommand.cpp b/nmdscommand.cpp index a9d361d..a90ed29 100644 --- a/nmdscommand.cpp +++ b/nmdscommand.cpp @@ -11,51 +11,79 @@ #include "readphylipvector.h" //********************************************************************************************************************** -vector NMDSCommand::getValidParameters(){ +vector NMDSCommand::setParameters(){ try { - string Array[] = {"phylip","axes","dimension","maxiters","step","outputdir","inputdir"}; - vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + CommandParameter paxes("axes", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(paxes); + CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none","nmds-stress",false,true,true); parameters.push_back(pphylip); + CommandParameter pmaxdim("maxdim", "Number", "", "2", "", "", "","",false,false); parameters.push_back(pmaxdim); + CommandParameter pmindim("mindim", "Number", "", "2", "", "", "","",false,false); parameters.push_back(pmindim); + CommandParameter piters("iters", "Number", "", "10", "", "", "","",false,false); parameters.push_back(piters); + CommandParameter pmaxiters("maxiters", "Number", "", "500", "", "", "","",false,false); parameters.push_back(pmaxiters); + CommandParameter pepsilon("epsilon", "Number", "", "0.000000000001", "", "", "","",false,false); parameters.push_back(pepsilon); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); + + vector myArray; + for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { - m->errorOut(e, "NMDSCommand", "getValidParameters"); + m->errorOut(e, "NMDSCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** -NMDSCommand::NMDSCommand(){ +string NMDSCommand::getHelpString(){ try { - abort = true; - //initialize outputTypes - vector tempOutNames; - outputTypes["nmds"] = tempOutNames; - outputTypes["stress"] = tempOutNames; + string helpString = ""; + helpString += "The nmds command is modelled after the nmds code written in R by Sarah Goslee, using Non-metric multidimensional scaling function using the majorization algorithm from Borg & Groenen 1997, Modern Multidimensional Scaling.\n"; + helpString += "The nmds command parameters are phylip, axes, mindim, maxdim, maxiters, iters and epsilon.\n"; + helpString += "The phylip parameter allows you to enter your distance file.\n"; + helpString += "The axes parameter allows you to enter a file containing a starting configuration.\n"; + helpString += "The maxdim parameter allows you to select the maximum dimensions to use. Default=2\n"; + helpString += "The mindim parameter allows you to select the minimum dimensions to use. Default=2\n"; + helpString += "The maxiters parameter allows you to select the maximum number of iters to try with each random configuration. Default=500\n"; + helpString += "The iters parameter allows you to select the number of random configuration to try. Default=10\n"; + helpString += "The epsilon parameter allows you to select set an acceptable stopping point. Default=1e-12.\n"; + helpString += "Example nmds(phylip=yourDistanceFile).\n"; + helpString += "Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourDistanceFile).\n"; + return helpString; } catch(exception& e) { - m->errorOut(e, "NMDSCommand", "NMDSCommand"); + m->errorOut(e, "NMDSCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** -vector NMDSCommand::getRequiredParameters(){ - try { - string Array[] = {"phylip"}; - vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); - return myArray; - } - catch(exception& e) { - m->errorOut(e, "NMDSCommand", "getRequiredParameters"); - exit(1); - } +string NMDSCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "nmds") { pattern = "[filename],nmds.axes"; } + else if (type == "stress") { pattern = "[filename],nmds.stress"; } + else if (type == "iters") { pattern = "[filename],nmds.iters"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "getOutputPattern"); + exit(1); + } } + //********************************************************************************************************************** -vector NMDSCommand::getRequiredFiles(){ +NMDSCommand::NMDSCommand(){ try { - vector myArray; - return myArray; + abort = true; calledHelp = true; + setParameters(); + vector tempOutNames; + outputTypes["nmds"] = tempOutNames; + outputTypes["stress"] = tempOutNames; + outputTypes["iters"] = tempOutNames; } catch(exception& e) { - m->errorOut(e, "NMDSCommand", "getRequiredFiles"); + m->errorOut(e, "NMDSCommand", "NMDSCommand"); exit(1); } } @@ -63,15 +91,14 @@ vector NMDSCommand::getRequiredFiles(){ NMDSCommand::NMDSCommand(string option) { try { - abort = false; + abort = false; calledHelp = false; //allow user to run help - if(option == "help") { help(); abort = true; } + if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} else { - //valid paramters for this command - string Array[] = {"phylip","axes","dimension","maxiters","step","outputdir", "inputdir"}; - vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + vector myArray = setParameters(); OptionParser parser(option); map parameters = parser. getParameters(); @@ -108,12 +135,18 @@ NMDSCommand::NMDSCommand(string option) { //initialize outputTypes vector tempOutNames; outputTypes["nmds"] = tempOutNames; + outputTypes["iters"] = tempOutNames; outputTypes["stress"] = tempOutNames; //required parameters phylipfile = validParameter.validFile(parameters, "phylip", true); if (phylipfile == "not open") { phylipfile = ""; abort = true; } - else if (phylipfile == "not found") { phylipfile = ""; m->mothurOut("You must provide a distance file before running the nmds command."); m->mothurOutEndLine(); abort = true; } + else if (phylipfile == "not found") { + //if there is a current phylip file, use it + phylipfile = m->getPhylipFile(); + if (phylipfile != "") { m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("You have no current phylip file and the phylip parameter is required."); m->mothurOutEndLine(); abort = true; } + }else { m->setPhylipFile(phylipfile); } axesfile = validParameter.validFile(parameters, "axes", true); if (axesfile == "not open") { axesfile = ""; abort = true; } @@ -125,18 +158,23 @@ NMDSCommand::NMDSCommand(string option) { outputDir += m->hasPath(phylipfile); //if user entered a file with a path then preserve it } - string temp = validParameter.validFile(parameters, "dimension", false); if (temp == "not found") { temp = "2"; } - convert(temp, dimension); + string temp = validParameter.validFile(parameters, "mindim", false); if (temp == "not found") { temp = "2"; } + m->mothurConvert(temp, mindim); + + temp = validParameter.validFile(parameters, "maxiters", false); if (temp == "not found") { temp = "500"; } + m->mothurConvert(temp, maxIters); - temp = validParameter.validFile(parameters, "maxiters", false); if (temp == "not found") { temp = "1000"; } - convert(temp, maxIters); + temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "10"; } + m->mothurConvert(temp, iters); - temp = validParameter.validFile(parameters, "step", false); if (temp == "not found") { temp = "0.2"; } - convert(temp, step); + temp = validParameter.validFile(parameters, "maxdim", false); if (temp == "not found") { temp = "2"; } + m->mothurConvert(temp, maxdim); - temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "2"; } - convert(temp, cutoff); - cutoff /= 100.0; + temp = validParameter.validFile(parameters, "epsilon", false); if (temp == "not found") { temp = "0.000000000001"; } + m->mothurConvert(temp, epsilon); + + if (mindim < 1) { m->mothurOut("mindim must be at least 1."); m->mothurOutEndLine(); abort = true; } + if (maxdim < mindim) { maxdim = mindim; } } } @@ -146,146 +184,119 @@ NMDSCommand::NMDSCommand(string option) { } } //********************************************************************************************************************** -void NMDSCommand::help(){ - try { - - m->mothurOut("The nmds command parameters are phylip, axes, dimension, maxiters, cutoff and step."); m->mothurOutEndLine(); - m->mothurOut("The phylip parameter allows you to enter your distance file."); m->mothurOutEndLine(); - m->mothurOut("The axes parameter allows you to enter a file containing a starting configuration."); m->mothurOutEndLine(); - m->mothurOut("The dimension parameter allows you to select how many dimensions to use. Default=2"); m->mothurOutEndLine(); - m->mothurOut("The maxiters parameter allows you to select the maximum number of iters to try. Default=1000"); m->mothurOutEndLine(); - m->mothurOut("The cutoff parameter allows you to select set an acceptable percentage of magnitude. Default=2, meaning when magnitude of g reaches 2% of it's starting value the process will stop."); m->mothurOutEndLine(); - m->mothurOut("The step parameter allows you to set a starting step. Default=0.2"); m->mothurOutEndLine(); - m->mothurOut("Example nmds(phylip=yourDistanceFile).\n"); - m->mothurOut("Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourDistanceFile).\n\n"); - } - catch(exception& e) { - m->errorOut(e, "NMDSCommand", "help"); - exit(1); - } -} -//********************************************************************************************************************** -NMDSCommand::~NMDSCommand(){} -//********************************************************************************************************************** int NMDSCommand::execute(){ try { - if (abort == true) { return 0; } + if (abort == true) { if (calledHelp) { return 0; } return 2; } cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); - cerr.setf(ios::fixed, ios::floatfield); - cerr.setf(ios::showpoint); vector names; - vector matrix; //seqDist = int, int, float - index of seq1 in names, index of seq2 in names, their distance + vector< vector< double> > matrix; //read in phylip file ReadPhylipVector readFile(phylipfile); names = readFile.read(matrix); if (m->control_pressed) { return 0; } - - //randomly generate the starting configuration - step 2 + + //read axes vector< vector > axes; - if (axesfile == "") { axes = generateStartingConfiguration(names.size()); } - else { axes = readAxes(names); } - if (m->control_pressed) { return 0; } + if (axesfile != "") { axes = readAxes(names); } - //sort matrix from smallest distance to largest - step 5 - sort(matrix.begin(), matrix.end(), compareSequenceDistance); + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(phylipfile)); + string outputFileName = getOutputFileName("iters",variables); + string stressFileName = getOutputFileName("stress",variables); + outputNames.push_back(outputFileName); outputTypes["iters"].push_back(outputFileName); + outputNames.push_back(stressFileName); outputTypes["stress"].push_back(stressFileName); - bool stable = false; - int count = 0; - vector previousStresses; - vector< vector > previousGradient = axes; - double initialMagnitude; - m->mothurOutEndLine(); m->mothurOut("Iter\tStress\tMagnitude"); m->mothurOutEndLine(); - while ((count != maxIters) && (!stable)) { - count++; - - //normalize axes - step 3 - normalizeConfiguration(axes, names.size()); - if (m->control_pressed) { return 0; } - - //calculate Euclidean distances - step 4 - vector< vector > euclid = linearCalc.calculateEuclidianDistance(axes); - if (m->control_pressed) { return 0; } + ofstream out, out2; + m->openOutputFile(outputFileName, out); + m->openOutputFile(stressFileName, out2); + + out2.setf(ios::fixed, ios::floatfield); + out2.setf(ios::showpoint); + out.setf(ios::fixed, ios::floatfield); + out.setf(ios::showpoint); + + out2 << "Dimension\tIter\tStress\tRsq" << endl; + + double bestStress = 10000000; + double bestR2 = 10000000; + vector< vector > bestConfig; + int bestDim = 0; + + for (int i = mindim; i <= maxdim; i++) { + m->mothurOut("Processing Dimension: " + toString(i)); m->mothurOutEndLine(); - //order euclid elements in same order as matrix - step 6 - //if there are ties in the matrix we want to arrange the euclid distances in the best way so we do not to add unnecessary stress - vector eDists; - vector ties; - for (int i = 0; i < matrix.size(); i++) { + for (int j = 0; j < iters; j++) { + m->mothurOut(toString(j+1)); m->mothurOutEndLine(); - seqDist temp(matrix[i].seq1, matrix[i].seq2, euclid[matrix[i].seq1][matrix[i].seq2]); - ties.push_back(temp); + //get configuration - either randomly generate or resize to this dimension + vector< vector > thisConfig; + if (axesfile == "") { thisConfig = generateStartingConfiguration(names.size(), i); } + else { thisConfig = getConfiguration(axes, i); } + if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { m->mothurRemove(outputNames[k]); } return 0; } - if (i != matrix.size()-1) { // you are not the last so you can look ahead - if (matrix[i].dist != matrix[i+1].dist) { // you are done with ties, sort and save them, then continue - sort(ties.begin(), ties.end(), compareSequenceDistance); - for (int k = 0; k < ties.size(); k++) { eDists.push_back(ties[k]); } - ties.clear(); - } - }else { // you are the last one - sort(ties.begin(), ties.end(), compareSequenceDistance); - for (int k = 0; k < ties.size(); k++) { eDists.push_back(ties[k]); } + //calc nmds for this dimension + double stress; + vector< vector > endConfig = nmdsCalc(matrix, thisConfig, stress); + if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { m->mothurRemove(outputNames[k]); } return 0; } + + //calc euclid distances for new config + vector< vector > newEuclid = linearCalc.calculateEuclidianDistance(endConfig); + if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { m->mothurRemove(outputNames[k]); } return 0; } + + //calc correlation between original distances and euclidean distances from this config + double rsquared = linearCalc.calcPearson(newEuclid, matrix); + rsquared *= rsquared; + if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { m->mothurRemove(outputNames[k]); } return 0; } + + //output results + out << "Config" << (j+1) << '\t'; + for (int k = 0; k < i; k++) { out << "axis" << (k+1) << '\t'; } + out << endl; + out2 << i << '\t' << (j+1) << '\t' << stress << '\t' << rsquared << endl; + + output(endConfig, names, out); + + //save best + if (stress < bestStress) { + bestDim = i; + bestStress = stress; + bestR2 = rsquared; + bestConfig = endConfig; } - } - - for (int i = 0; i < euclid.size(); i++) { euclid[i].clear(); } euclid.clear(); - if (m->control_pressed) { return 0; } - - //find D - from step 7 - vector D = satisfyMonotonicity(eDists); - if (m->control_pressed) { return 0; } - - //calculate the raw stress and normalize it - steps 8 and 9 - double rawStress; - double stress = calculateStress(eDists, D, rawStress); - previousStresses.push_back(stress); - if (stress == 0) { m->mothurOut("Stress reached zero after " + toString(count) + " iters, stopping."); m->mothurOutEndLine(); break; } - if (m->control_pressed) { return 0; } - - //calculate stress gradient - step 10 - vector< vector > stressGradient = calculateStressGradientVector(eDists, D, rawStress, stress, axes); - if (m->control_pressed) { return 0; } - - //calculate magnitude - double magnitude = calculateMagnitude(stressGradient); - if (count == 1) { initialMagnitude = magnitude; } - if (m->control_pressed) { return 0; } - - //save gradient before adjusting config. - previousGradient = stressGradient; - - if ((count % 100) == 0) { m->mothurOut(toString(count) + "\t" + toString(previousStresses[previousStresses.size()-1]) + "\t" + toString(magnitude)); m->mothurOutEndLine(); } - - //are we done - we are done if percentage of magnitude compared to initial magnitude is less than cutoff - double percentage = magnitude / initialMagnitude; - if (percentage < cutoff) { stable = true; } - else { - - //calculate new step size - step = calculateStep(previousGradient, stressGradient, previousStresses); - cout << "count = " << count << '\t' << step << endl; - if (m->control_pressed) { return 0; } - - //find new config. - axes = calculateNewConfiguration(magnitude, axes, stressGradient); - if (m->control_pressed) { return 0; } + + if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { m->mothurRemove(outputNames[k]); } return 0; } } } - if (m->control_pressed) { return 0; } + out.close(); out2.close(); - string outputFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "nmds"; - string stressFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "stress.nmds"; - outputNames.push_back(outputFileName); outputTypes["nmds"].push_back(outputFileName); - outputNames.push_back(stressFileName); outputTypes["stress"].push_back(stressFileName); + //output best config + string BestFileName = getOutputFileName("nmds",variables); + outputNames.push_back(BestFileName); outputTypes["nmds"].push_back(BestFileName); + + m->mothurOut("\nNumber of dimensions:\t" + toString(bestDim) + "\n"); + m->mothurOut("Lowest stress :\t" + toString(bestStress) + "\n"); + m->mothurOut("R-squared for configuration:\t" + toString(bestR2) + "\n"); + + ofstream outBest; + m->openOutputFile(BestFileName, outBest); + outBest.setf(ios::fixed, ios::floatfield); + outBest.setf(ios::showpoint); - output(outputFileName, stressFileName, previousGradient, previousStresses, names); + outBest << '\t'; + for (int k = 0; k < bestConfig.size(); k++) { outBest << "axis" << (k+1) << '\t'; } + outBest << endl; - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + output(bestConfig, names, outBest); + + outBest.close(); + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); @@ -299,9 +310,72 @@ int NMDSCommand::execute(){ exit(1); } } +//********************************************************************************************************************** +vector< vector > NMDSCommand::nmdsCalc(vector< vector >& matrix, vector< vector >& config, double& stress1) { + try { + + vector< vector > newConfig = config; + + //calc euclid distances + vector< vector > euclid = linearCalc.calculateEuclidianDistance(newConfig); + if (m->control_pressed) { return newConfig; } + + double stress2 = calculateStress(matrix, euclid); + stress1 = stress2 + 1.0 + epsilon; + + int count = 0; + while ((count < maxIters) && (abs(stress1 - stress2) > epsilon)) { + count++; + + stress1 = stress2; + + if (m->control_pressed) { return newConfig; } + + vector< vector > b; b.resize(euclid.size()); + for (int i = 0; i < b.size(); i++) { b[i].resize(euclid[i].size(), 0.0); } + + vector columnSums; columnSums.resize(euclid.size(), 0.0); + for (int i = 0; i < euclid.size(); i++) { + for (int j = 0; j < euclid[i].size(); j++) { + //eliminate divide by zero error + if (euclid[i][j] != 0) { + b[i][j] = matrix[i][j] / euclid[i][j]; + columnSums[j] += b[i][j]; + b[i][j] *= -1.0; + } + } + } + + //put in diagonal sums + for (int i = 0; i < euclid.size(); i++) { b[i][i] = columnSums[i]; } + + int numInLowerTriangle = matrix.size() * (matrix.size()-1) / 2.0; + double n = (1.0 + sqrt(1.0 + 8.0 * numInLowerTriangle)) / 2.0; + + //matrix mult + newConfig = linearCalc.matrix_mult(newConfig, b); + for (int i = 0; i < newConfig.size(); i++) { + for (int j = 0; j < newConfig[i].size(); j++) { + newConfig[i][j] *= (1.0 / n); + } + } + + euclid = linearCalc.calculateEuclidianDistance(newConfig); + + stress2 = calculateStress(matrix, euclid); + } + + return newConfig; + } + catch(exception& e) { + m->errorOut(e, "NMDSCommand", "generateStartingConfiguration"); + exit(1); + } +} + //********************************************************************************************************************** //generate random config -vector< vector > NMDSCommand::generateStartingConfiguration(int numNames) { +vector< vector > NMDSCommand::generateStartingConfiguration(int numNames, int dimension) { try { vector< vector > axes; axes.resize(dimension); for (int i = 0; i < axes.size(); i++) { axes[i].resize(numNames); } @@ -335,7 +409,7 @@ vector< vector > NMDSCommand::generateStartingConfiguration(int numNames } //********************************************************************************************************************** //normalize configuration -int NMDSCommand::normalizeConfiguration(vector< vector >& axes, int numNames) { +int NMDSCommand::normalizeConfiguration(vector< vector >& axes, int numNames, int dimension) { try { vector averageAxes; averageAxes.resize(dimension, 0.0); @@ -370,49 +444,45 @@ int NMDSCommand::normalizeConfiguration(vector< vector >& axes, int numN } } //********************************************************************************************************************** -//adjust eDists so that it creates monotonically increasing series of succesive values that increase or stay the same, but never decrease -vector NMDSCommand::satisfyMonotonicity(vector eDists) { +//get configuration +vector< vector > NMDSCommand::getConfiguration(vector< vector >& axes, int dimension) { try { + vector< vector > newAxes; newAxes.resize(dimension); - vector D = eDists; - - for (int i = 0; i < (D.size()-1); i++) { - - if (m->control_pressed) { return D; } - - //is the distance in i+1 smaller than i, if yes then adjust - if (D[i+1].dist < D[i].dist) { D[i+1].dist = D[i].dist; } + for (int i = 0; i < dimension; i++) { + newAxes[i] = axes[i]; } - - return D; + + return newAxes; } catch(exception& e) { - m->errorOut(e, "NMDSCommand", "satisfyMonotonicity"); + m->errorOut(e, "NMDSCommand", "getConfiguration"); exit(1); } } //********************************************************************************************************************** //find raw stress, and normalize using -double NMDSCommand::calculateStress(vector& eDists, vector& D, double& rawStress) { +double NMDSCommand::calculateStress(vector< vector >& matrix, vector< vector >& config) { try { double normStress = 0.0; double denom = 0.0; - rawStress = 0.0; + double rawStress = 0.0; //find raw stress - for (int i = 0; i < D.size(); i++) { - - if (m->control_pressed) { return normStress; } - - rawStress += ((eDists[i].dist - D[i].dist) * (eDists[i].dist - D[i].dist)); - denom += (eDists[i].dist * eDists[i].dist); + for (int i = 0; i < matrix.size(); i++) { + for (int j = 0; j < matrix[i].size(); j++) { + if (m->control_pressed) { return normStress; } + + rawStress += ((matrix[i][j] - config[i][j]) * (matrix[i][j] - config[i][j])); + denom += (config[i][j] * config[i][j]); + } } //normalize stress - if (rawStress != 0.0) { - normStress = 100 * sqrt((rawStress / denom)); + if ((rawStress != 0.0) && (denom != 0.0)) { + normStress = sqrt((rawStress / denom)); } - + return normStress; } catch(exception& e) { @@ -420,153 +490,12 @@ double NMDSCommand::calculateStress(vector& eDists, vector& D, exit(1); } } + //********************************************************************************************************************** -vector< vector > NMDSCommand::calculateStressGradientVector(vector& eDists, vector& D, double rawStress, double stress, vector< vector >& axes) { - try { - vector< vector > gradient; gradient.resize(dimension); - for (int i = 0; i < gradient.size(); i++) { gradient[i].resize(axes[0].size(), 0.0); } - - double sumDij = 0.0; - for (int i = 0; i < eDists.size(); i++) { sumDij += (eDists[i].dist * eDists[i].dist); } - - for (int i = 0; i < eDists.size(); i++) { - - for (int j = 0; j < dimension; j++) { - - if (m->control_pressed) { return gradient; } - - double firstTerm1 = (stress / rawStress) * (eDists[i].dist - D[i].dist); - double firstTerm2 = eDists[i].dist * (stress / sumDij); - double firstTerm = firstTerm1 - firstTerm2; - - double secondTerm = (axes[j][eDists[i].seq1] - axes[j][eDists[i].seq2]) / eDists[i].dist; - - double results = (firstTerm * secondTerm); - - gradient[j][eDists[i].seq1] += results; - gradient[j][eDists[i].seq2] -= results; - } - } - - return gradient; - } - catch(exception& e) { - m->errorOut(e, "NMDSCommand", "calculateStressGradientVector"); - exit(1); - } -} -//********************************************************************************************************************** -double NMDSCommand::calculateMagnitude(vector< vector >& gradient) { - try { - double magnitude = 0.0; - - double sum = 0.0; - for (int i = 0; i < gradient.size(); i++) { - for (int j = 0; j < gradient[i].size(); j++) { - sum += (gradient[i][j] * gradient[i][j]); - } - } - - magnitude = sqrt(((1.0/(float)gradient[0].size()) * sum)); - - return magnitude; - } - catch(exception& e) { - m->errorOut(e, "NMDSCommand", "calculateMagnitude"); - exit(1); - } -} -//********************************************************************************************************************** -//described in Kruskal paper page 121 + 122 -double NMDSCommand::calculateStep(vector< vector >& prevGrad, vector< vector >& grad, vector& prevStress) { - try { - double newStep = step; - - //calc the cos theta - double sumNum = 0.0; - double sumDenom1 = 0.0; - double sumDenom2 = 0.0; - for (int i = 0; i < prevGrad.size(); i++) { - for (int j = 0; j < prevGrad[i].size(); j++) { - sumDenom1 += (grad[i][j] * grad[i][j]); - sumDenom2 += (prevGrad[i][j] * prevGrad[i][j]); - sumNum += (grad[i][j] * prevGrad[i][j]); - } - } - - double cosTheta = sumNum / (sqrt(sumDenom1) * sqrt(sumDenom2)); - cosTheta *= cosTheta; - - //calc angle factor - double angle = pow(4.0, cosTheta); - - //calc 5 step ratio - double currentStress = prevStress[prevStress.size()-1]; - double lastStress = prevStress[0]; - if (prevStress.size() > 1) { lastStress = prevStress[prevStress.size()-2]; } - double fivePrevStress = prevStress[0]; - if (prevStress.size() > 5) { fivePrevStress = prevStress[prevStress.size()-6]; } - - double fiveStepRatio = min(1.0, (currentStress / fivePrevStress)); - - //calc relaxation factor - double relaxation = 1.3 / (1.0 + pow(fiveStepRatio, 5.0)); - - //calc good luck factor - double goodLuck = min(1.0, (currentStress / lastStress)); - - //calc newStep - cout << "\ncos = " << cosTheta << " step = " << step << " angle = " << angle << " relaxation = " << relaxation << " goodluck = " << goodLuck << endl; - newStep = step * angle * relaxation * goodLuck; - - return newStep; - } - catch(exception& e) { - m->errorOut(e, "NMDSCommand", "calculateStep"); - exit(1); - } -} -//********************************************************************************************************************** -vector< vector > NMDSCommand::calculateNewConfiguration(double magnitude, vector< vector >& axes, vector< vector >& gradient) { +int NMDSCommand::output(vector< vector >& config, vector& names, ofstream& out) { try { - vector< vector > newAxes = axes; - - for (int i = 0; i < newAxes.size(); i++) { - - if (m->control_pressed) { return newAxes; } - - for (int j = 0; j < newAxes[i].size(); j++) { - newAxes[i][j] = axes[i][j] + ((step / magnitude) * gradient[i][j]); - } - } - - return newAxes; - } - catch(exception& e) { - m->errorOut(e, "NMDSCommand", "calculateNewConfiguration"); - exit(1); - } -} -//********************************************************************************************************************** -int NMDSCommand::output(string outputFileName, string stressFileName, vector< vector >& config, vector& stresses, vector& names) { - try { - - ofstream out, out2; - m->openOutputFile(outputFileName, out); - m->openOutputFile(stressFileName, out2); - - //output headers - out << "group\t"; - for (int i = 0; i < dimension; i++) { out << "axis" << (i+1) << '\t'; } - out << endl; - - out2 << "Iter\tStress" << endl; - - //output nmds file - for (int i = 0; i < config[0].size(); i++) { - - if (m->control_pressed) { out.close(); out2.close(); return 0; } + for (int i = 0; i < names.size(); i++) { out << names[i] << '\t'; @@ -576,17 +505,9 @@ int NMDSCommand::output(string outputFileName, string stressFileName, vector< ve out << endl; } - out.close(); - //output stress file - for (int j = 0; j < stresses.size(); j++) { - if (m->control_pressed) { out2.close(); return 0; } + out << endl << endl; - out2 << (j+1) << '\t' << stresses[j] << endl; - } - out2.close(); - - return 0; } catch(exception& e) { @@ -597,8 +518,6 @@ int NMDSCommand::output(string outputFileName, string stressFileName, vector< ve /*****************************************************************/ vector< vector > NMDSCommand::readAxes(vector names){ try { - vector< vector > axes; - ifstream in; m->openInputFile(axesfile, in); @@ -615,7 +534,17 @@ vector< vector > NMDSCommand::readAxes(vector names){ }else { done = true; } } - if (dimension > count) { m->mothurOut("You requested " + toString(dimension) + " axes, but your file only includes " + toString(count) + ". Using " + toString(count) + "."); m->mothurOutEndLine(); dimension = count; } + if (maxdim > count) { + m->mothurOut("You requested maxdim = " + toString(maxdim) + ", but your file only includes " + toString(count) + ". Using " + toString(count) + "."); m->mothurOutEndLine(); + maxdim = count; + if (maxdim < mindim) { m->mothurOut("Also adjusting mindim to " + toString(maxdim-1) + "."); m->mothurOutEndLine(); } + } + + vector< vector > axes; axes.resize(maxdim); + for (int i = 0; i < axes.size(); i++) { axes[i].resize(names.size(), 0.0); } + + map > orderedAxes; + map >::iterator it; while (!in.eof()) { @@ -633,17 +562,31 @@ vector< vector > NMDSCommand::readAxes(vector names){ in >> temp; //only save the axis we want - if (i < dimension) { thisGroupsAxes.push_back(temp); } + if (i < maxdim) { thisGroupsAxes.push_back(temp); } } - if (!ignore) { axes.push_back(thisGroupsAxes); } + if (!ignore) { orderedAxes[group] = thisGroupsAxes; } m->gobble(in); } in.close(); - + //sanity check - if (names.size() != axes.size()) { m->mothurOut("[ERROR]: your axes file does not match your distance file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; } + if (names.size() != orderedAxes.size()) { m->mothurOut("[ERROR]: your axes file does not match your distance file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; return axes; } + + //put axes info in same order as distance file, just in case + for (int i = 0; i < names.size(); i++) { + it = orderedAxes.find(names[i]); + + if (it != orderedAxes.end()) { + vector thisGroupsAxes = it->second; + + for (int j = 0; j < thisGroupsAxes.size(); j++) { + axes[j][i] = thisGroupsAxes[j]; + } + + }else { m->mothurOut("[ERROR]: your axes file does not match your distance file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; return axes; } + } return axes; } @@ -652,6 +595,7 @@ vector< vector > NMDSCommand::readAxes(vector names){ exit(1); } } -//********************************************************************************************************************** +/**********************************************************************************************************************/ +