#include "fullmatrix.h"
/**************************************************************************/
-
//This constructor reads a distance matrix file and stores the data in the matrix.
FullMatrix::FullMatrix(ifstream& filehandle) {
try{
//make the matrix filled with zeros
matrix.resize(numSeqs);
for(int i = 0; i < numSeqs; i++) {
- matrix[i].resize(numSeqs, 0);
+ matrix[i].resize(numSeqs, 0.0);
}
-
group = groupmap->getGroup(name);
if(group == "not found") { mothurOut("Error: Sequence '" + name + "' was not found in the group file, please correct."); mothurOutEndLine(); exit(1); }
index.resize(numSeqs);
- index[0].groupName = group;
index[0].seqName = name;
+ index[0].groupName = group;
//determine if matrix is square or lower triangle
//if it is square read the distances for the first sequence
if (square == true) { readSquareMatrix(filehandle); }
else { readLTMatrix(filehandle); }
- //sort sequences so they are gathered in groups for processing
- sortGroups(0, numSeqs-1);
-
- groups.push_back(index[0].groupName);
- sizes.push_back(1);
- int groupCount = 0;
-
- for(int i=1;i<numSeqs;i++){
- if(index[i].groupName == index[i-1].groupName){ sizes[groupCount]++; }
- else{
- sizes.push_back(1);
- groups.push_back(index[i].groupName);
- groupCount++;
- }
- }
-
+ sortGroups(0, numSeqs-1);
+
}
catch(exception& e) {
errorOut(e, "FullMatrix", "FullMatrix");
filehandle >> name;
group = groupmap->getGroup(name);
- index[i].groupName = group;
index[i].seqName = name;
+ index[i].groupName = group;
if(group == "not found") { mothurOut("Error: Sequence '" + name + "' was not found in the group file, please correct."); mothurOutEndLine(); exit(1); }
float distance;
string group, name;
-
+
for(int i=1;i<numSeqs;i++){
filehandle >> name;
-
+
group = groupmap->getGroup(name);
- index[i].groupName = group;
index[i].seqName = name;
+ index[i].groupName = group;
if(group == "not found") { mothurOut("Error: Sequence '" + name + "' was not found in the group file, please correct."); mothurOutEndLine(); exit(1); }
for(int j=0;j<i;j++){
filehandle >> distance;
-
matrix[i][j] = distance; matrix[j][i] = distance;
count++;
reading->update(count);
}
-
}
+
reading->finish();
delete reading;
}
void FullMatrix::sortGroups(int low, int high){
try{
-
- int i = low;
- int j = high;
- float y = 0;
- string name;
- /* compare value */
- //what group does this row belong to
- string z = index[(low + high) / 2].groupName;
-
- /* partition */
- do {
- /* find member above ... */
- while(index[i].groupName < z) i++;
-
- /* find element below ... */
- while(index[j].groupName > z) j--;
+ if (low < high) {
+ int i = low+1;
+ int j = high;
+ int pivot = (low+high) / 2;
- if(i <= j) {
- /* swap rows*/
- for (int h = 0; h < numSeqs; h++) {
- y = matrix[i][h];
- matrix[i][h] = matrix[j][h];
- matrix[j][h] = y;
- }
+ swapRows(low, pivot); //puts pivot in final spot
+
+ /* compare value */
+ //what group does this row belong to
+ string key = index[low].groupName;
+
+ /* partition */
+ while(i <= j) {
+ /* find member above ... */
+ while((i <= high) && (index[i].groupName <= key)) { i++; }
- /* swap columns*/
- for (int b = 0; b < numSeqs; b++) {
- y = matrix[b][i];
- matrix[b][i] = matrix[b][j];
- matrix[b][j] = y;
+ /* find element below ... */
+ while((j >= low) && (index[j].groupName > key)) { j--; }
+
+ if(i < j) {
+ swapRows(i, j);
}
-
- //swap map elements
- z = index[i].groupName;
- index[i].groupName = index[j].groupName;
- index[j].groupName = z;
-
- name = index[i].seqName;
- index[i].seqName = index[j].seqName;
- index[j].seqName = name;
-
-
- i++;
- j--;
- }
- } while(i <= j);
-
- /* recurse */
- if(low < j)
- sortGroups(low, j);
-
- if(i < high)
- sortGroups(i, high);
-
+ }
+
+ swapRows(low, j);
+
+ /* recurse */
+ sortGroups(low, j-1);
+ sortGroups(j+1, high);
+ }
}
catch(exception& e) {
}
}
+/**************************************************************************/
+void FullMatrix::swapRows(int i, int j) {
+ try {
+
+ float y;
+ string z, name;
+
+ /* swap rows*/
+ for (int h = 0; h < numSeqs; h++) {
+ y = matrix[i][h];
+ matrix[i][h] = matrix[j][h];
+ matrix[j][h] = y;
+ }
+
+ /* swap columns*/
+ for (int b = 0; b < numSeqs; b++) {
+ y = matrix[b][i];
+ matrix[b][i] = matrix[b][j];
+ matrix[b][j] = y;
+ }
+
+ //swap map elements
+ z = index[i].groupName;
+ index[i].groupName = index[j].groupName;
+ index[j].groupName = z;
+
+ name = index[i].seqName;
+ index[i].seqName = index[j].seqName;
+ index[j].seqName = name;
+
+
+ }
+ catch(exception& e) {
+ errorOut(e, "FullMatrix", "swapRows");
+ exit(1);
+ }
+}
/**************************************************************************/
float FullMatrix::get(int i, int j){ return matrix[i][j]; }
for (int i = 0; i < numSeqs; i++) {
out << "row " << i << " group = " << index[i].groupName << " name = " << index[i].seqName << endl;
for (int j = 0; j < numSeqs; j++) {
- out << matrix[i][j] << " ";
+ out << i << '\t' << j << '\t' << matrix[i][j] << endl;
}
out << endl;
}
+
+ for (int i = 0; i < numSeqs; i++) { out << i << '\t' << index[i].seqName << endl; }
}
catch(exception& e) {
errorOut(e, "FullMatrix", "printMatrix");
int getNumSeqs();
vector<int> getSizes();
vector<string> getGroups();
+ void setGroups(vector<string> names) { groups = names; }
+ void setSizes(vector<int> s) { sizes = s; }
int getNumGroups();
void printMatrix(ostream&);
float get(int, int);
vector<Names> index; // row in vector, sequence group. need to know this so when we sort it can be updated.
vector<int> sizes;
vector<string> groups;
- void sortGroups(int, int); //this function sorts the sequences within the matrix.
+ void sortGroups(int, int); //this function sorts the sequences within the matrix.
+ void swapRows(int, int);
GroupMap* groupmap; //maps sequences to groups they belong to.
int numSeqs;
fileHandle >> seqGroup; //read from second column
setNamesOfGroups(seqGroup);
-
+
groupmap[seqName] = seqGroup; //store data in map
+ seqsPerGroup[seqGroup]++; //increment number of seqs in that group
gobble(fileHandle);
}
}
if (count == namesOfGroups.size()) {
namesOfGroups.push_back(seqGroup); //new group
+ seqsPerGroup[seqGroup] = 0;
groupIndex[seqGroup] = index;
index++;
}
exit(1);
}
}
+/************************************************************/
+int GroupMap::getNumSeqs(string group) {
+ try {
+
+ map<string, int>::iterator itNum;
+
+ itNum = seqsPerGroup.find(group);
+
+ if (itNum == seqsPerGroup.end()) { return 0; }
+
+ return seqsPerGroup[group];
+
+ }
+ catch(exception& e) {
+ errorOut(e, "GroupMap", "getNumSeqs");
+ exit(1);
+ }
+}
/************************************************************/
vector<string> GroupMap::getNamesSeqs(){
string getGroup(string);
void setGroup(string, string);
vector<string> namesOfGroups;
- map<string, int> groupIndex; //groupname, vectorIndex in namesOfGroups. - used by collectdisplays.
+ map<string, int> groupIndex; //groupname, vectorIndex in namesOfGroups. - used by collectdisplays and libshuff commands.
int getNumSeqs() { return groupmap.size(); }
vector<string> getNamesSeqs();
+ int getNumSeqs(string); //return the number of seqs in a given group
private:
ifstream fileHandle;
map<string, string>::iterator it;
void setNamesOfGroups(string);
map<string, string> groupmap; //sequence name and groupname
+ map<string, int> seqsPerGroup; //maps groupname to number of seqs in that group
};
#endif
groupNames = matrix->getGroups();
groupSizes = matrix->getSizes();
numGroups = matrix->getNumGroups();
-
initializeGroups(matrix);
}
catch(exception& e) {
}
int index=0;
for(int i=0;i<numGroups;i++){
+
for(int j=0;j<groupSizes[i];j++){
savedGroups[i][j] = groups[i][j] = index++;
}
vector<double> Libshuff::getMinX(int x){
try{
+
vector<double> minX(groupSizes[x], 0);
for(int i=0;i<groupSizes[x];i++){
- minX[i] = (groupSizes[x] > 1 ? (i==0 ? matrix->get(groups[x][0], groups[x][1]) : matrix->get(groups[x][i], groups[x][0])) : 0.0);
+ minX[i] = (groupSizes[x] > 1 ? (i==0 ? matrix->get(groups[x][0], groups[x][1]) : matrix->get(groups[x][i], groups[x][0])) : 0.0); //get the first value in row i of this block
+ //minX[i] = matrix->get(groups[x][i], groups[x][0]);
for(int j=0;j<groupSizes[x];j++){
if(i != j) {
double dx = matrix->get(groups[x][i], groups[x][j]);
if (abort == false) {
matrix = globaldata->gMatrix; //get the distance matrix
- setGroups(); //set the groups to be analyzed
+ setGroups(); //set the groups to be analyzed and sorts them
+
+ /********************************************************************************************/
+ //this is needed because when we read the matrix we sort it into groups in alphabetical order
+ //the rest of the command and the classes used in this command assume specific order
+ /********************************************************************************************/
+ matrix->setGroups(globaldata->gGroupmap->namesOfGroups);
+ vector<int> sizes;
+ for (int i = 0; i < globaldata->gGroupmap->namesOfGroups.size(); i++) { sizes.push_back(globaldata->gGroupmap->getNumSeqs(globaldata->gGroupmap->namesOfGroups[i])); }
+ matrix->setSizes(sizes);
+
if(userform == "discrete"){
form = new DLibshuff(matrix, iters, step, cutOff);
for(int i=0;i<numGroups-1;i++) {
for(int j=i+1;j<numGroups;j++) {
reading->newLine(groupNames[i]+'-'+groupNames[j], iters);
+ int spoti = globaldata->gGroupmap->groupIndex[groupNames[i]]; //neccessary in case user selects groups so you know where they are in the matrix
+ int spotj = globaldata->gGroupmap->groupIndex[groupNames[j]];
+
for(int p=0;p<iters;p++) {
- form->randomizeGroups(i,j);
- if(form->evaluatePair(i,j) >= savedDXYValues[i][j]) { pValueCounts[i][j]++; }
- if(form->evaluatePair(j,i) >= savedDXYValues[j][i]) { pValueCounts[j][i]++; }
+ form->randomizeGroups(spoti,spotj);
+ if(form->evaluatePair(spoti,spotj) >= savedDXYValues[spoti][spotj]) { pValueCounts[i][j]++; }
+ if(form->evaluatePair(spotj,spoti) >= savedDXYValues[spotj][spoti]) { pValueCounts[j][i]++; }
reading->update(p);
}
- form->resetGroup(i);
- form->resetGroup(j);
+ form->resetGroup(spoti);
+ form->resetGroup(spotj);
}
}
reading->finish();
indices[i].assign(numGroups,0);
for(int j=0;j<numGroups;j++){
indices[i][j] = index++;
- for(int k=0;k<savedMinValues[i][j].size();k++){
- if(allDistances[savedMinValues[i][j][k]].size() != 0){
- allDistances[savedMinValues[i][j][k]][indices[i][j]]++;
+
+ int spoti = globaldata->gGroupmap->groupIndex[groupNames[i]]; //neccessary in case user selects groups so you know where they are in the matrix
+ int spotj = globaldata->gGroupmap->groupIndex[groupNames[j]];
+
+ for(int k=0;k<savedMinValues[spoti][spotj].size();k++){
+ if(allDistances[savedMinValues[spoti][spotj][k]].size() != 0){
+ allDistances[savedMinValues[spoti][spotj][k]][indices[i][j]]++;
}
else{
- allDistances[savedMinValues[i][j][k]].assign(numIndices, 0);
- allDistances[savedMinValues[i][j][k]][indices[i][j]] = 1;
+ allDistances[savedMinValues[spoti][spotj][k]].assign(numIndices, 0);
+ allDistances[savedMinValues[spoti][spotj][k]][indices[i][j]] = 1;
}
}
}
int precision = (int)log10(iters);
for(int i=0;i<numGroups;i++){
for(int j=i+1;j<numGroups;j++){
+ int spoti = globaldata->gGroupmap->groupIndex[groupNames[i]]; //neccessary in case user selects groups so you know where they are in the matrix
+ int spotj = globaldata->gGroupmap->groupIndex[groupNames[j]];
+
if(pValueCounts[i][j]){
- cout << setw(20) << left << groupNames[i]+'-'+groupNames[j] << '\t' << setprecision(8) << savedDXYValues[i][j] << '\t' << setprecision(precision) << pValueCounts[i][j]/(float)iters << endl;
- mothurOutJustToLog(groupNames[i]+"-"+groupNames[j] + "\t" + toString(savedDXYValues[i][j]) + "\t" + toString((pValueCounts[i][j]/(float)iters))); mothurOutEndLine();
- outSum << setw(20) << left << groupNames[i]+'-'+groupNames[j] << '\t' << setprecision(8) << savedDXYValues[i][j] << '\t' << setprecision(precision) << pValueCounts[i][j]/(float)iters << endl;
+ cout << setw(20) << left << groupNames[i]+'-'+groupNames[j] << '\t' << setprecision(8) << savedDXYValues[spoti][spotj] << '\t' << setprecision(precision) << pValueCounts[i][j]/(float)iters << endl;
+ mothurOutJustToLog(groupNames[i]+"-"+groupNames[j] + "\t" + toString(savedDXYValues[spoti][spotj]) + "\t" + toString((pValueCounts[i][j]/(float)iters))); mothurOutEndLine();
+ outSum << setw(20) << left << groupNames[i]+'-'+groupNames[j] << '\t' << setprecision(8) << savedDXYValues[spoti][spotj] << '\t' << setprecision(precision) << pValueCounts[i][j]/(float)iters << endl;
}
else{
- cout << setw(20) << left << groupNames[i]+'-'+groupNames[j] << '\t' << setprecision(8) << savedDXYValues[i][j] << '\t' << '<' <<setprecision(precision) << 1/(float)iters << endl;
- mothurOutJustToLog(groupNames[i]+"-"+groupNames[j] + "\t" + toString(savedDXYValues[i][j]) + "\t" + toString((1/(float)iters))); mothurOutEndLine();
- outSum << setw(20) << left << groupNames[i]+'-'+groupNames[j] << '\t' << setprecision(8) << savedDXYValues[i][j] << '\t' << '<' <<setprecision(precision) << 1/(float)iters << endl;
+ cout << setw(20) << left << groupNames[i]+'-'+groupNames[j] << '\t' << setprecision(8) << savedDXYValues[spoti][spotj] << '\t' << '<' <<setprecision(precision) << 1/(float)iters << endl;
+ mothurOutJustToLog(groupNames[i]+"-"+groupNames[j] + "\t" + toString(savedDXYValues[spoti][spotj]) + "\t" + toString((1/(float)iters))); mothurOutEndLine();
+ outSum << setw(20) << left << groupNames[i]+'-'+groupNames[j] << '\t' << setprecision(8) << savedDXYValues[spoti][spotj] << '\t' << '<' <<setprecision(precision) << 1/(float)iters << endl;
}
if(pValueCounts[j][i]){
- cout << setw(20) << left << groupNames[j]+'-'+groupNames[i] << '\t' << setprecision(8) << savedDXYValues[j][i] << '\t' << setprecision (precision) << pValueCounts[j][i]/(float)iters << endl;
- mothurOutJustToLog(groupNames[j]+"-"+groupNames[i] + "\t" + toString(savedDXYValues[j][i]) + "\t" + toString((pValueCounts[j][i]/(float)iters))); mothurOutEndLine();
- outSum << setw(20) << left << groupNames[j]+'-'+groupNames[i] << '\t' << setprecision(8) << savedDXYValues[j][i] << '\t' << setprecision (precision) << pValueCounts[j][i]/(float)iters << endl;
+ cout << setw(20) << left << groupNames[j]+'-'+groupNames[i] << '\t' << setprecision(8) << savedDXYValues[spotj][spoti] << '\t' << setprecision (precision) << pValueCounts[j][i]/(float)iters << endl;
+ mothurOutJustToLog(groupNames[j]+"-"+groupNames[i] + "\t" + toString(savedDXYValues[spotj][spoti]) + "\t" + toString((pValueCounts[j][i]/(float)iters))); mothurOutEndLine();
+ outSum << setw(20) << left << groupNames[j]+'-'+groupNames[i] << '\t' << setprecision(8) << savedDXYValues[spotj][spoti] << '\t' << setprecision (precision) << pValueCounts[j][i]/(float)iters << endl;
}
else{
- cout << setw(20) << left << groupNames[j]+'-'+groupNames[i] << '\t' << setprecision(8) << savedDXYValues[j][i] << '\t' << '<' <<setprecision (precision) << 1/(float)iters << endl;
- mothurOutJustToLog(groupNames[j]+"-"+groupNames[i] + "\t" + toString(savedDXYValues[j][i]) + "\t" + toString((1/(float)iters))); mothurOutEndLine();
- outSum << setw(20) << left << groupNames[j]+'-'+groupNames[i] << '\t' << setprecision(8) << savedDXYValues[j][i] << '\t' << '<' <<setprecision (precision) << 1/(float)iters << endl;
+ cout << setw(20) << left << groupNames[j]+'-'+groupNames[i] << '\t' << setprecision(8) << savedDXYValues[spotj][spoti] << '\t' << '<' <<setprecision (precision) << 1/(float)iters << endl;
+ mothurOutJustToLog(groupNames[j]+"-"+groupNames[i] + "\t" + toString(savedDXYValues[spotj][spoti]) + "\t" + toString((1/(float)iters))); mothurOutEndLine();
+ outSum << setw(20) << left << groupNames[j]+'-'+groupNames[i] << '\t' << setprecision(8) << savedDXYValues[spotj][spoti] << '\t' << '<' <<setprecision (precision) << 1/(float)iters << endl;
}
}
}
//sort
sort(globaldata->gGroupmap->namesOfGroups.begin(), globaldata->gGroupmap->namesOfGroups.end());
+
+ for (int i = 0; i < globaldata->gGroupmap->namesOfGroups.size(); i++) { globaldata->gGroupmap->groupIndex[globaldata->gGroupmap->namesOfGroups[i]] = i; }
groupNames = globaldata->Groups;
if (columnfile != "") {
if (namefile == "") { cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; }
}
-
+
//check for optional parameter and set defaults
// ...at some point should added some additional type checking...
//get user cutoff and precision or use defaults
else if (format == "matrix") {
groupMap = new GroupMap(groupfile);
groupMap->readMap();
+
if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap; }
globaldata->gGroupmap = groupMap;
}
}else { outsvg << "</text>\n"; }
outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"500\">The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(shared[0]) + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"520\">Percentage of species that are shared in groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString((shared[0] / (float)(numA[0] + numB[0] - shared[0]))) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"520\">Percentage of species that are shared in groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString((shared[0] / (float)(numA[0] + numB[0] - shared[0]))*100) + "</text>\n";
outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"540\">The total richness for all groups is " + toString((float)(numA[0] + numB[0] - shared[0])) + "</text>\n";
//close file