From 9354b70bb84f6dd52ff4a1955754bcbf7edeedbf Mon Sep 17 00:00:00 2001 From: westcott Date: Thu, 19 Nov 2009 20:04:04 +0000 Subject: [PATCH] fixed bug in libshuff --- fullmatrix.cpp | 154 ++++++++++++++++++++++---------------------- fullmatrix.h | 5 +- groupmap.cpp | 22 ++++++- groupmap.h | 4 +- libshuff.cpp | 6 +- libshuffcommand.cpp | 68 ++++++++++++------- readdistcommand.cpp | 3 +- venn.cpp | 2 +- 8 files changed, 156 insertions(+), 108 deletions(-) diff --git a/fullmatrix.cpp b/fullmatrix.cpp index 5bb2d23..5612041 100644 --- a/fullmatrix.cpp +++ b/fullmatrix.cpp @@ -10,7 +10,6 @@ #include "fullmatrix.h" /**************************************************************************/ - //This constructor reads a distance matrix file and stores the data in the matrix. FullMatrix::FullMatrix(ifstream& filehandle) { try{ @@ -23,14 +22,13 @@ FullMatrix::FullMatrix(ifstream& filehandle) { //make the matrix filled with zeros matrix.resize(numSeqs); for(int i = 0; i < numSeqs; i++) { - matrix[i].resize(numSeqs, 0); + matrix[i].resize(numSeqs, 0.0); } - group = groupmap->getGroup(name); if(group == "not found") { mothurOut("Error: Sequence '" + name + "' was not found in the group file, please correct."); mothurOutEndLine(); exit(1); } index.resize(numSeqs); - index[0].groupName = group; index[0].seqName = name; + index[0].groupName = group; //determine if matrix is square or lower triangle //if it is square read the distances for the first sequence @@ -60,22 +58,8 @@ FullMatrix::FullMatrix(ifstream& filehandle) { if (square == true) { readSquareMatrix(filehandle); } else { readLTMatrix(filehandle); } - //sort sequences so they are gathered in groups for processing - sortGroups(0, numSeqs-1); - - groups.push_back(index[0].groupName); - sizes.push_back(1); - int groupCount = 0; - - for(int i=1;i> name; group = groupmap->getGroup(name); - index[i].groupName = group; index[i].seqName = name; + index[i].groupName = group; if(group == "not found") { mothurOut("Error: Sequence '" + name + "' was not found in the group file, please correct."); mothurOutEndLine(); exit(1); } @@ -127,25 +111,24 @@ void FullMatrix::readLTMatrix(ifstream& filehandle) { float distance; string group, name; - + for(int i=1;i> name; - + group = groupmap->getGroup(name); - index[i].groupName = group; index[i].seqName = name; + index[i].groupName = group; if(group == "not found") { mothurOut("Error: Sequence '" + name + "' was not found in the group file, please correct."); mothurOutEndLine(); exit(1); } for(int j=0;j> distance; - matrix[i][j] = distance; matrix[j][i] = distance; count++; reading->update(count); } - } + reading->finish(); delete reading; } @@ -159,61 +142,37 @@ void FullMatrix::readLTMatrix(ifstream& filehandle) { void FullMatrix::sortGroups(int low, int high){ try{ - - int i = low; - int j = high; - float y = 0; - string name; - /* compare value */ - //what group does this row belong to - string z = index[(low + high) / 2].groupName; - - /* partition */ - do { - /* find member above ... */ - while(index[i].groupName < z) i++; - - /* find element below ... */ - while(index[j].groupName > z) j--; + if (low < high) { + int i = low+1; + int j = high; + int pivot = (low+high) / 2; - if(i <= j) { - /* swap rows*/ - for (int h = 0; h < numSeqs; h++) { - y = matrix[i][h]; - matrix[i][h] = matrix[j][h]; - matrix[j][h] = y; - } + swapRows(low, pivot); //puts pivot in final spot + + /* compare value */ + //what group does this row belong to + string key = index[low].groupName; + + /* partition */ + while(i <= j) { + /* find member above ... */ + while((i <= high) && (index[i].groupName <= key)) { i++; } - /* swap columns*/ - for (int b = 0; b < numSeqs; b++) { - y = matrix[b][i]; - matrix[b][i] = matrix[b][j]; - matrix[b][j] = y; + /* find element below ... */ + while((j >= low) && (index[j].groupName > key)) { j--; } + + if(i < j) { + swapRows(i, j); } - - //swap map elements - z = index[i].groupName; - index[i].groupName = index[j].groupName; - index[j].groupName = z; - - name = index[i].seqName; - index[i].seqName = index[j].seqName; - index[j].seqName = name; - - - i++; - j--; - } - } while(i <= j); - - /* recurse */ - if(low < j) - sortGroups(low, j); - - if(i < high) - sortGroups(i, high); - + } + + swapRows(low, j); + + /* recurse */ + sortGroups(low, j-1); + sortGroups(j+1, high); + } } catch(exception& e) { @@ -222,6 +181,43 @@ void FullMatrix::sortGroups(int low, int high){ } } +/**************************************************************************/ +void FullMatrix::swapRows(int i, int j) { + try { + + float y; + string z, name; + + /* swap rows*/ + for (int h = 0; h < numSeqs; h++) { + y = matrix[i][h]; + matrix[i][h] = matrix[j][h]; + matrix[j][h] = y; + } + + /* swap columns*/ + for (int b = 0; b < numSeqs; b++) { + y = matrix[b][i]; + matrix[b][i] = matrix[b][j]; + matrix[b][j] = y; + } + + //swap map elements + z = index[i].groupName; + index[i].groupName = index[j].groupName; + index[j].groupName = z; + + name = index[i].seqName; + index[i].seqName = index[j].seqName; + index[j].seqName = name; + + + } + catch(exception& e) { + errorOut(e, "FullMatrix", "swapRows"); + exit(1); + } +} /**************************************************************************/ float FullMatrix::get(int i, int j){ return matrix[i][j]; } @@ -249,10 +245,12 @@ void FullMatrix::printMatrix(ostream& out) { for (int i = 0; i < numSeqs; i++) { out << "row " << i << " group = " << index[i].groupName << " name = " << index[i].seqName << endl; for (int j = 0; j < numSeqs; j++) { - out << matrix[i][j] << " "; + out << i << '\t' << j << '\t' << matrix[i][j] << endl; } out << endl; } + + for (int i = 0; i < numSeqs; i++) { out << i << '\t' << index[i].seqName << endl; } } catch(exception& e) { errorOut(e, "FullMatrix", "printMatrix"); diff --git a/fullmatrix.h b/fullmatrix.h index a669f70..890d07a 100644 --- a/fullmatrix.h +++ b/fullmatrix.h @@ -30,6 +30,8 @@ public: int getNumSeqs(); vector getSizes(); vector getGroups(); + void setGroups(vector names) { groups = names; } + void setSizes(vector s) { sizes = s; } int getNumGroups(); void printMatrix(ostream&); float get(int, int); @@ -41,8 +43,9 @@ private: vector index; // row in vector, sequence group. need to know this so when we sort it can be updated. vector sizes; vector groups; - void sortGroups(int, int); //this function sorts the sequences within the matrix. + void sortGroups(int, int); //this function sorts the sequences within the matrix. + void swapRows(int, int); GroupMap* groupmap; //maps sequences to groups they belong to. int numSeqs; diff --git a/groupmap.cpp b/groupmap.cpp index 6521128..b627a6b 100644 --- a/groupmap.cpp +++ b/groupmap.cpp @@ -29,8 +29,9 @@ void GroupMap::readMap() { fileHandle >> seqGroup; //read from second column setNamesOfGroups(seqGroup); - + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group gobble(fileHandle); } @@ -67,6 +68,7 @@ void GroupMap::setNamesOfGroups(string seqGroup) { } if (count == namesOfGroups.size()) { namesOfGroups.push_back(seqGroup); //new group + seqsPerGroup[seqGroup] = 0; groupIndex[seqGroup] = index; index++; } @@ -85,6 +87,24 @@ bool GroupMap::isValidGroup(string groupname) { exit(1); } } +/************************************************************/ +int GroupMap::getNumSeqs(string group) { + try { + + map::iterator itNum; + + itNum = seqsPerGroup.find(group); + + if (itNum == seqsPerGroup.end()) { return 0; } + + return seqsPerGroup[group]; + + } + catch(exception& e) { + errorOut(e, "GroupMap", "getNumSeqs"); + exit(1); + } +} /************************************************************/ vector GroupMap::getNamesSeqs(){ diff --git a/groupmap.h b/groupmap.h index e52ada5..fd6c146 100644 --- a/groupmap.h +++ b/groupmap.h @@ -25,9 +25,10 @@ public: string getGroup(string); void setGroup(string, string); vector namesOfGroups; - map groupIndex; //groupname, vectorIndex in namesOfGroups. - used by collectdisplays. + map groupIndex; //groupname, vectorIndex in namesOfGroups. - used by collectdisplays and libshuff commands. int getNumSeqs() { return groupmap.size(); } vector getNamesSeqs(); + int getNumSeqs(string); //return the number of seqs in a given group private: ifstream fileHandle; @@ -36,6 +37,7 @@ private: map::iterator it; void setNamesOfGroups(string); map groupmap; //sequence name and groupname + map seqsPerGroup; //maps groupname to number of seqs in that group }; #endif diff --git a/libshuff.cpp b/libshuff.cpp index 002cd52..e6c83f7 100644 --- a/libshuff.cpp +++ b/libshuff.cpp @@ -20,7 +20,6 @@ Libshuff::Libshuff(FullMatrix* D, int it, float step, float co) : matrix(D), ite groupNames = matrix->getGroups(); groupSizes = matrix->getSizes(); numGroups = matrix->getNumGroups(); - initializeGroups(matrix); } catch(exception& e) { @@ -43,6 +42,7 @@ void Libshuff::initializeGroups(FullMatrix* matrix){ } int index=0; for(int i=0;i > > Libshuff::getSavedMins(){ vector Libshuff::getMinX(int x){ try{ + vector minX(groupSizes[x], 0); for(int i=0;i 1 ? (i==0 ? matrix->get(groups[x][0], groups[x][1]) : matrix->get(groups[x][i], groups[x][0])) : 0.0); + minX[i] = (groupSizes[x] > 1 ? (i==0 ? matrix->get(groups[x][0], groups[x][1]) : matrix->get(groups[x][i], groups[x][0])) : 0.0); //get the first value in row i of this block + //minX[i] = matrix->get(groups[x][i], groups[x][0]); for(int j=0;jget(groups[x][i], groups[x][j]); diff --git a/libshuffcommand.cpp b/libshuffcommand.cpp index e78c00d..2d84767 100644 --- a/libshuffcommand.cpp +++ b/libshuffcommand.cpp @@ -76,7 +76,17 @@ LibShuffCommand::LibShuffCommand(string option){ if (abort == false) { matrix = globaldata->gMatrix; //get the distance matrix - setGroups(); //set the groups to be analyzed + setGroups(); //set the groups to be analyzed and sorts them + + /********************************************************************************************/ + //this is needed because when we read the matrix we sort it into groups in alphabetical order + //the rest of the command and the classes used in this command assume specific order + /********************************************************************************************/ + matrix->setGroups(globaldata->gGroupmap->namesOfGroups); + vector sizes; + for (int i = 0; i < globaldata->gGroupmap->namesOfGroups.size(); i++) { sizes.push_back(globaldata->gGroupmap->getNumSeqs(globaldata->gGroupmap->namesOfGroups[i])); } + matrix->setSizes(sizes); + if(userform == "discrete"){ form = new DLibshuff(matrix, iters, step, cutOff); @@ -136,14 +146,17 @@ int LibShuffCommand::execute(){ for(int i=0;inewLine(groupNames[i]+'-'+groupNames[j], iters); + int spoti = globaldata->gGroupmap->groupIndex[groupNames[i]]; //neccessary in case user selects groups so you know where they are in the matrix + int spotj = globaldata->gGroupmap->groupIndex[groupNames[j]]; + for(int p=0;prandomizeGroups(i,j); - if(form->evaluatePair(i,j) >= savedDXYValues[i][j]) { pValueCounts[i][j]++; } - if(form->evaluatePair(j,i) >= savedDXYValues[j][i]) { pValueCounts[j][i]++; } + form->randomizeGroups(spoti,spotj); + if(form->evaluatePair(spoti,spotj) >= savedDXYValues[spoti][spotj]) { pValueCounts[i][j]++; } + if(form->evaluatePair(spotj,spoti) >= savedDXYValues[spotj][spoti]) { pValueCounts[j][i]++; } reading->update(p); } - form->resetGroup(i); - form->resetGroup(j); + form->resetGroup(spoti); + form->resetGroup(spotj); } } reading->finish(); @@ -190,13 +203,17 @@ void LibShuffCommand::printCoverageFile() { indices[i].assign(numGroups,0); for(int j=0;jgGroupmap->groupIndex[groupNames[i]]; //neccessary in case user selects groups so you know where they are in the matrix + int spotj = globaldata->gGroupmap->groupIndex[groupNames[j]]; + + for(int k=0;kgGroupmap->groupIndex[groupNames[i]]; //neccessary in case user selects groups so you know where they are in the matrix + int spotj = globaldata->gGroupmap->groupIndex[groupNames[j]]; + if(pValueCounts[i][j]){ - cout << setw(20) << left << groupNames[i]+'-'+groupNames[j] << '\t' << setprecision(8) << savedDXYValues[i][j] << '\t' << setprecision(precision) << pValueCounts[i][j]/(float)iters << endl; - mothurOutJustToLog(groupNames[i]+"-"+groupNames[j] + "\t" + toString(savedDXYValues[i][j]) + "\t" + toString((pValueCounts[i][j]/(float)iters))); mothurOutEndLine(); - outSum << setw(20) << left << groupNames[i]+'-'+groupNames[j] << '\t' << setprecision(8) << savedDXYValues[i][j] << '\t' << setprecision(precision) << pValueCounts[i][j]/(float)iters << endl; + cout << setw(20) << left << groupNames[i]+'-'+groupNames[j] << '\t' << setprecision(8) << savedDXYValues[spoti][spotj] << '\t' << setprecision(precision) << pValueCounts[i][j]/(float)iters << endl; + mothurOutJustToLog(groupNames[i]+"-"+groupNames[j] + "\t" + toString(savedDXYValues[spoti][spotj]) + "\t" + toString((pValueCounts[i][j]/(float)iters))); mothurOutEndLine(); + outSum << setw(20) << left << groupNames[i]+'-'+groupNames[j] << '\t' << setprecision(8) << savedDXYValues[spoti][spotj] << '\t' << setprecision(precision) << pValueCounts[i][j]/(float)iters << endl; } else{ - cout << setw(20) << left << groupNames[i]+'-'+groupNames[j] << '\t' << setprecision(8) << savedDXYValues[i][j] << '\t' << '<' <gGroupmap->namesOfGroups.begin(), globaldata->gGroupmap->namesOfGroups.end()); + + for (int i = 0; i < globaldata->gGroupmap->namesOfGroups.size(); i++) { globaldata->gGroupmap->groupIndex[globaldata->gGroupmap->namesOfGroups[i]] = i; } groupNames = globaldata->Groups; diff --git a/readdistcommand.cpp b/readdistcommand.cpp index 719740c..f6b4195 100644 --- a/readdistcommand.cpp +++ b/readdistcommand.cpp @@ -72,7 +72,7 @@ ReadDistCommand::ReadDistCommand(string option){ if (columnfile != "") { if (namefile == "") { cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; } } - + //check for optional parameter and set defaults // ...at some point should added some additional type checking... //get user cutoff and precision or use defaults @@ -93,6 +93,7 @@ ReadDistCommand::ReadDistCommand(string option){ else if (format == "matrix") { groupMap = new GroupMap(groupfile); groupMap->readMap(); + if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap; } globaldata->gGroupmap = groupMap; } diff --git a/venn.cpp b/venn.cpp index 82020d4..3961cd7 100644 --- a/venn.cpp +++ b/venn.cpp @@ -168,7 +168,7 @@ void Venn::getPic(vector lookup, vector vCalcs }else { outsvg << "\n"; } outsvg << "The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(shared[0]) + "\n"; - outsvg << "Percentage of species that are shared in groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString((shared[0] / (float)(numA[0] + numB[0] - shared[0]))) + "\n"; + outsvg << "Percentage of species that are shared in groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString((shared[0] / (float)(numA[0] + numB[0] - shared[0]))*100) + "\n"; outsvg << "The total richness for all groups is " + toString((float)(numA[0] + numB[0] - shared[0])) + "\n"; //close file -- 2.39.2