X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=treegroupscommand.cpp;h=6633e5160559297c23a542c373ed1b1cc9a3ccdc;hb=529ec122f7cac4af987e121d150b878d7c7a0d5d;hp=0150a7a4cd49eb490ba7a208ce4f74fb691b3e92;hpb=82723a54e6109e2d46d84c10e87727cebd5a18ea;p=mothur.git diff --git a/treegroupscommand.cpp b/treegroupscommand.cpp index 0150a7a..6633e51 100644 --- a/treegroupscommand.cpp +++ b/treegroupscommand.cpp @@ -27,7 +27,7 @@ vector TreeGroupCommand::setParameters(){ CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson", "jclass-thetayc", "", "", "",true,false); parameters.push_back(pcalc); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); - CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "",false,false); parameters.push_back(poutput); +//CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "",false,false); parameters.push_back(poutput); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); @@ -70,6 +70,26 @@ string TreeGroupCommand::getHelpString(){ } } //********************************************************************************************************************** +string TreeGroupCommand::getOutputFileNameTag(string type, string inputName=""){ + try { + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "tree") { outputFileName = "tre"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; + } + catch(exception& e) { + m->errorOut(e, "TreeGroupCommand", "getOutputFileNameTag"); + exit(1); + } +} +//********************************************************************************************************************** TreeGroupCommand::TreeGroupCommand(){ try { abort = true; calledHelp = true; @@ -266,7 +286,7 @@ TreeGroupCommand::TreeGroupCommand(string option) { TreeGroupCommand::~TreeGroupCommand(){ if (abort == false) { if (format == "sharedfile") { delete input; } - else { delete readMatrix; delete matrix; delete list; } + else { delete list; } delete tmap; } @@ -398,7 +418,8 @@ int TreeGroupCommand::execute(){ }else{ //read in dist file filename = inputfile; - + + ReadMatrix* readMatrix; if (format == "column") { readMatrix = new ReadColumnMatrix(filename); } else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); } @@ -414,7 +435,7 @@ int TreeGroupCommand::execute(){ readMatrix->read(nameMap); list = readMatrix->getListVector(); - matrix = readMatrix->getMatrix(); + SparseDistanceMatrix* dMatrix = readMatrix->getDMatrix(); //make treemap tmap = new TreeMap(); @@ -437,12 +458,14 @@ int TreeGroupCommand::execute(){ if (m->control_pressed) { return 0; } - vector< vector > matrix = makeSimsDist(); + vector< vector > matrix = makeSimsDist(dMatrix); + delete readMatrix; + delete dMatrix; if (m->control_pressed) { return 0; } //create a new filename - string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + "tre"; + string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + getOutputFileNameTag("tree"); outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); Tree* newTree = createTree(matrix); @@ -482,76 +505,15 @@ int TreeGroupCommand::execute(){ Tree* TreeGroupCommand::createTree(vector< vector >& simMatrix){ try { //create tree - t = new Tree(tmap); + t = new Tree(tmap, simMatrix); - //initialize index - map index; //maps row in simMatrix to vector index in the tree - for (int g = 0; g < numGroups; g++) { index[g] = g; } + if (m->control_pressed) { delete t; t = NULL; return t; } - //do merges and create tree structure by setting parents and children - //there are numGroups - 1 merges to do - for (int i = 0; i < (numGroups - 1); i++) { - float largest = -1000.0; - - if (m->control_pressed) { delete t; t = NULL; return t; } - - int row, column; - //find largest value in sims matrix by searching lower triangle - for (int j = 1; j < simMatrix.size(); j++) { - for (int k = 0; k < j; k++) { - if (simMatrix[j][k] > largest) { largest = simMatrix[j][k]; row = j; column = k; } - } - } + //assemble tree + map empty; + t->assembleTree(empty); - //set non-leaf node info and update leaves to know their parents - //non-leaf - t->tree[numGroups + i].setChildren(index[row], index[column]); - - //parents - t->tree[index[row]].setParent(numGroups + i); - t->tree[index[column]].setParent(numGroups + i); - - //blength = distance / 2; - float blength = ((1.0 - largest) / 2); - - //branchlengths - t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves()); - t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves()); - - //set your length to leaves to your childs length plus branchlength - t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength()); - - - //update index - index[row] = numGroups+i; - index[column] = numGroups+i; - - //remove highest value that caused the merge. - simMatrix[row][column] = -1000.0; - simMatrix[column][row] = -1000.0; - - //merge values in simsMatrix - for (int n = 0; n < simMatrix.size(); n++) { - //row becomes merge of 2 groups - simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2; - simMatrix[n][row] = simMatrix[row][n]; - //delete column - simMatrix[column][n] = -1000.0; - simMatrix[n][column] = -1000.0; - } - } - - //adjust tree to make sure root to tip length is .5 - int root = t->findRoot(); - t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves())); - - //assemble tree - t->assembleTree(); - - if (m->control_pressed) { delete t; t = NULL; return t; } - return t; - } catch(exception& e) { m->errorOut(e, "TreeGroupCommand", "createTree"); @@ -595,7 +557,7 @@ void TreeGroupCommand::printSims(ostream& out, vector< vector >& simMatr } } /***********************************************************/ -vector< vector > TreeGroupCommand::makeSimsDist() { +vector< vector > TreeGroupCommand::makeSimsDist(SparseDistanceMatrix* matrix) { try { numGroups = list->size(); @@ -610,13 +572,17 @@ vector< vector > TreeGroupCommand::makeSimsDist() { //go through sparse matrix and fill sims //go through each cell in the sparsematrix - for(MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++){ - //similairity = -(distance-1) - simMatrix[currentCell->row][currentCell->column] = -(currentCell->dist -1.0); - simMatrix[currentCell->column][currentCell->row] = -(currentCell->dist -1.0); - - if (m->control_pressed) { return simMatrix; } + for (int i = 0; i < matrix->seqVec.size(); i++) { + for (int j = 0; j < matrix->seqVec[i].size(); j++) { + + //already checked everyone else in row + if (i < matrix->seqVec[i][j].index) { + simMatrix[i][matrix->seqVec[i][j].index] = -(matrix->seqVec[i][j].dist -1.0); + simMatrix[matrix->seqVec[i][j].index][i] = -(matrix->seqVec[i][j].dist -1.0); + if (m->control_pressed) { return simMatrix; } + } + } } return simMatrix; @@ -631,13 +597,6 @@ vector< vector > TreeGroupCommand::makeSimsDist() { int TreeGroupCommand::makeSimsShared() { try { - numGroups = lookup.size(); - lines.resize(processors); - for (int i = 0; i < processors; i++) { - lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups); - lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups); - } - if (subsample) { if (subsampleSize == -1) { //user has not set size, set size = smallest samples size subsampleSize = lookup[0]->getNumSeqs(); @@ -662,8 +621,17 @@ int TreeGroupCommand::makeSimsShared() { lookup = temp; m->setGroups(Groups); } + + if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; return 0; } } + numGroups = lookup.size(); + lines.resize(processors); + for (int i = 0; i < processors; i++) { + lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups); + lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups); + } + set processedLabels; set userLabels = labels; @@ -955,7 +923,7 @@ int TreeGroupCommand::process(vector thisLookup) { } //create a new filename - string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".ave.tre"; + string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".ave." + getOutputFileNameTag("tree"); outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); //creates tree from similarity matrix and write out file @@ -968,7 +936,7 @@ int TreeGroupCommand::process(vector thisLookup) { if (m->control_pressed) { break; } //create a new filename - string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".all.tre"; + string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".all." + getOutputFileNameTag("tree"); outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); ofstream outAll; @@ -1004,11 +972,13 @@ int TreeGroupCommand::process(vector thisLookup) { if (m->control_pressed) { for (int k = 0; k < trees.size(); k++) { delete trees[k]; } } Consensus consensus; - Tree* conTree = consensus.getTree(trees, tmap); + //clear old tree names if any + m->Treenames.clear(); m->Treenames = m->getGroups(); //may have changed if subsample eliminated groups + Tree* conTree = consensus.getTree(trees); //create a new filename - string conFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".cons.tre"; - outputNames.push_back(conFile); outputTypes["tree"].push_back(outputFile); + string conFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".cons." + getOutputFileNameTag("tree"); + outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); ofstream outTree; m->openOutputFile(conFile, outTree); @@ -1035,7 +1005,7 @@ int TreeGroupCommand::process(vector thisLookup) { } //create a new filename - string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".tre"; + string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + getOutputFileNameTag("tree"); outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); //creates tree from similarity matrix and write out file