X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=linearalgebra.cpp;h=33464ca5b3519f615b1029b22793694d595eee2d;hb=4169642e8a8d45f71a4a7241ee02f1b1aae29520;hp=5de5a71dc35c1ab8888ff28ab8d7e92107f8cc72;hpb=37eac2026d91179acda0494c4dcca22f176551b9;p=mothur.git diff --git a/linearalgebra.cpp b/linearalgebra.cpp index 5de5a71..33464ca 100644 --- a/linearalgebra.cpp +++ b/linearalgebra.cpp @@ -27,7 +27,7 @@ vector > LinearAlgebra::matrix_mult(vector > first product.resize(first_rows); for(int i=0;i& d, vector& e, vector& d, vector& e, vector > LinearAlgebra::calculateEuclidianDistance(vector< vector >& axes, int dimensions){ try { //make square matrix @@ -252,38 +253,70 @@ vector< vector > LinearAlgebra::calculateEuclidianDistance(vector< vecto } } - }else if (dimensions == 2) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2) + }else if (dimensions > 1) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)... for (int i = 0; i < dists.size(); i++) { if (m->control_pressed) { return dists; } for (int j = 0; j < i; j++) { - double firstDim = ((axes[i][0] - axes[j][0]) * (axes[i][0] - axes[j][0])); - double secondDim = ((axes[i][1] - axes[j][1]) * (axes[i][1] - axes[j][1])); + double sum = 0.0; + for (int k = 0; k < dimensions; k++) { + sum += ((axes[i][k] - axes[j][k]) * (axes[i][k] - axes[j][k])); + } - dists[i][j] = sqrt((firstDim + secondDim)); + dists[i][j] = sqrt(sum); dists[j][i] = dists[i][j]; } } - }else if (dimensions == 3) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2 + (x3 - y3)^2) + } + + return dists; + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance"); + exit(1); + } +} +/*********************************************************************************************************************************/ +//returns groups by dimensions from dimensions by groups +vector< vector > LinearAlgebra::calculateEuclidianDistance(vector< vector >& axes){ + try { + //make square matrix + vector< vector > dists; dists.resize(axes[0].size()); + for (int i = 0; i < dists.size(); i++) { dists[i].resize(axes[0].size(), 0.0); } + + if (axes.size() == 1) { //one dimension calc = abs(x-y) for (int i = 0; i < dists.size(); i++) { if (m->control_pressed) { return dists; } for (int j = 0; j < i; j++) { - double firstDim = ((axes[i][0] - axes[j][0]) * (axes[i][0] - axes[j][0])); - double secondDim = ((axes[i][1] - axes[j][1]) * (axes[i][1] - axes[j][1])); - double thirdDim = ((axes[i][2] - axes[j][2]) * (axes[i][2] - axes[j][2])); + dists[i][j] = abs(axes[0][i] - axes[0][j]); + dists[j][i] = dists[i][j]; + } + } + + }else if (axes.size() > 1) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)... + + for (int i = 0; i < dists[0].size(); i++) { + + if (m->control_pressed) { return dists; } + + for (int j = 0; j < i; j++) { + double sum = 0.0; + for (int k = 0; k < axes.size(); k++) { + sum += ((axes[k][i] - axes[k][j]) * (axes[k][i] - axes[k][j])); + } - dists[i][j] = sqrt((firstDim + secondDim + thirdDim)); + dists[i][j] = sqrt(sum); dists[j][i] = dists[i][j]; } } - }else { m->mothurOut("[ERROR]: too many dimensions, aborting."); m->mothurOutEndLine(); m->control_pressed = true; } + } return dists; } @@ -293,54 +326,341 @@ vector< vector > LinearAlgebra::calculateEuclidianDistance(vector< vecto } } /*********************************************************************************************************************************/ +//assumes both matrices are square and the same size double LinearAlgebra::calcPearson(vector< vector >& euclidDists, vector< vector >& userDists){ try { //find average for - X - vector averageEuclid; averageEuclid.resize(euclidDists.size(), 0.0); + int count = 0; + float averageEuclid = 0.0; for (int i = 0; i < euclidDists.size(); i++) { - for (int j = 0; j < euclidDists[i].size(); j++) { - averageEuclid[i] += euclidDists[i][j]; + for (int j = 0; j < i; j++) { + averageEuclid += euclidDists[i][j]; + count++; } } - for (int i = 0; i < averageEuclid.size(); i++) { averageEuclid[i] = averageEuclid[i] / (float) euclidDists.size(); } - + averageEuclid = averageEuclid / (float) count; + //find average for - Y - vector averageUser; averageUser.resize(userDists.size(), 0.0); + count = 0; + float averageUser = 0.0; for (int i = 0; i < userDists.size(); i++) { - for (int j = 0; j < userDists[i].size(); j++) { - averageUser[i] += userDists[i][j]; + for (int j = 0; j < i; j++) { + averageUser += userDists[i][j]; + count++; } } - for (int i = 0; i < averageUser.size(); i++) { averageUser[i] = averageUser[i] / (float) userDists.size(); } - + averageUser = averageUser / (float) count; + double numerator = 0.0; double denomTerm1 = 0.0; double denomTerm2 = 0.0; for (int i = 0; i < euclidDists.size(); i++) { - for (int k = 0; k < i; k++) { + for (int k = 0; k < i; k++) { //just lt dists float Yi = userDists[i][k]; float Xi = euclidDists[i][k]; - numerator += ((Xi - averageEuclid[k]) * (Yi - averageUser[k])); - denomTerm1 += ((Xi - averageEuclid[k]) * (Xi - averageEuclid[k])); - denomTerm2 += ((Yi - averageUser[k]) * (Yi - averageUser[k])); + numerator += ((Xi - averageEuclid) * (Yi - averageUser)); + denomTerm1 += ((Xi - averageEuclid) * (Xi - averageEuclid)); + denomTerm2 += ((Yi - averageUser) * (Yi - averageUser)); } } double denom = (sqrt(denomTerm1) * sqrt(denomTerm2)); double r = numerator / denom; + //divide by zero error + if (isnan(r) || isinf(r)) { r = 0.0; } + return r; + } catch(exception& e) { - m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance"); + m->errorOut(e, "LinearAlgebra", "calcPearson"); + exit(1); + } +} +/*********************************************************************************************************************************/ +//assumes both matrices are square and the same size +double LinearAlgebra::calcSpearman(vector< vector >& euclidDists, vector< vector >& userDists){ + try { + double r; + + //format data + map tableX; + map::iterator itTable; + vector scores; + + for (int i = 0; i < euclidDists.size(); i++) { + for (int j = 0; j < i; j++) { + spearmanRank member(toString(scores.size()), euclidDists[i][j]); + scores.push_back(member); + + //count number of repeats + itTable = tableX.find(euclidDists[i][j]); + if (itTable == tableX.end()) { + tableX[euclidDists[i][j]] = 1; + }else { + tableX[euclidDists[i][j]]++; + } + } + } + + //sort scores + sort(scores.begin(), scores.end(), compareSpearman); + + //calc LX + double Lx = 0.0; + for (itTable = tableX.begin(); itTable != tableX.end(); itTable++) { + double tx = (double) itTable->second; + Lx += ((pow(tx, 3.0) - tx) / 12.0); + } + + //find ranks of xi + map rankEuclid; + vector ties; + int rankTotal = 0; + for (int j = 0; j < scores.size(); j++) { + rankTotal += (j+1); + ties.push_back(scores[j]); + + if (j != (scores.size()-1)) { // you are not the last so you can look ahead + if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue + + for (int k = 0; k < ties.size(); k++) { + float thisrank = rankTotal / (float) ties.size(); + rankEuclid[ties[k].name] = thisrank; + } + ties.clear(); + rankTotal = 0; + } + }else { // you are the last one + + for (int k = 0; k < ties.size(); k++) { + float thisrank = rankTotal / (float) ties.size(); + rankEuclid[ties[k].name] = thisrank; + } + } + } + + + //format data + map tableY; + scores.clear(); + + for (int i = 0; i < userDists.size(); i++) { + for (int j = 0; j < i; j++) { + spearmanRank member(toString(scores.size()), userDists[i][j]); + scores.push_back(member); + + //count number of repeats + itTable = tableY.find(userDists[i][j]); + if (itTable == tableY.end()) { + tableY[userDists[i][j]] = 1; + }else { + tableY[userDists[i][j]]++; + } + } + } + + //sort scores + sort(scores.begin(), scores.end(), compareSpearman); + + //calc LX + double Ly = 0.0; + for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) { + double ty = (double) itTable->second; + Ly += ((pow(ty, 3.0) - ty) / 12.0); + } + + //find ranks of yi + map rankUser; + ties.clear(); + rankTotal = 0; + for (int j = 0; j < scores.size(); j++) { + rankTotal += (j+1); + ties.push_back(scores[j]); + + if (j != (scores.size()-1)) { // you are not the last so you can look ahead + if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue + + for (int k = 0; k < ties.size(); k++) { + float thisrank = rankTotal / (float) ties.size(); + rankUser[ties[k].name] = thisrank; + } + ties.clear(); + rankTotal = 0; + } + }else { // you are the last one + + for (int k = 0; k < ties.size(); k++) { + float thisrank = rankTotal / (float) ties.size(); + rankUser[ties[k].name] = thisrank; + } + } + } + + + double di = 0.0; + int count = 0; + for (int i = 0; i < userDists.size(); i++) { + for (int j = 0; j < i; j++) { + + float xi = rankEuclid[toString(count)]; + float yi = rankUser[toString(count)]; + + di += ((xi - yi) * (xi - yi)); + + count++; + } + } + + double n = (double) count; + + double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx; + double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly; + + r = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2))); + + //divide by zero error + if (isnan(r) || isinf(r)) { r = 0.0; } + + return r; + + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "calcSpearman"); + exit(1); + } +} + +/*********************************************************************************************************************************/ +//assumes both matrices are square and the same size +double LinearAlgebra::calcKendall(vector< vector >& euclidDists, vector< vector >& userDists){ + try { + double r; + + //format data + vector scores; + for (int i = 0; i < euclidDists.size(); i++) { + for (int j = 0; j < i; j++) { + spearmanRank member(toString(scores.size()), euclidDists[i][j]); + scores.push_back(member); + } + } + + //sort scores + sort(scores.begin(), scores.end(), compareSpearman); + + //find ranks of xi + map rankEuclid; + vector ties; + int rankTotal = 0; + for (int j = 0; j < scores.size(); j++) { + rankTotal += (j+1); + ties.push_back(scores[j]); + + if (j != (scores.size()-1)) { // you are not the last so you can look ahead + if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue + + for (int k = 0; k < ties.size(); k++) { + float thisrank = rankTotal / (float) ties.size(); + rankEuclid[ties[k].name] = thisrank; + } + ties.clear(); + rankTotal = 0; + } + }else { // you are the last one + + for (int k = 0; k < ties.size(); k++) { + float thisrank = rankTotal / (float) ties.size(); + rankEuclid[ties[k].name] = thisrank; + } + } + } + + vector scoresUser; + for (int i = 0; i < userDists.size(); i++) { + for (int j = 0; j < i; j++) { + spearmanRank member(toString(scoresUser.size()), userDists[i][j]); + scoresUser.push_back(member); + } + } + + //sort scores + sort(scoresUser.begin(), scoresUser.end(), compareSpearman); + + //find ranks of yi + map rankUser; + ties.clear(); + rankTotal = 0; + for (int j = 0; j < scoresUser.size(); j++) { + rankTotal += (j+1); + ties.push_back(scoresUser[j]); + + if (j != (scoresUser.size()-1)) { // you are not the last so you can look ahead + if (scoresUser[j].score != scoresUser[j+1].score) { // you are done with ties, rank them and continue + + for (int k = 0; k < ties.size(); k++) { + float thisrank = rankTotal / (float) ties.size(); + rankUser[ties[k].name] = thisrank; + } + ties.clear(); + rankTotal = 0; + } + }else { // you are the last one + + for (int k = 0; k < ties.size(); k++) { + float thisrank = rankTotal / (float) ties.size(); + rankUser[ties[k].name] = thisrank; + } + } + } + + int numCoor = 0; + int numDisCoor = 0; + + //order user ranks + vector user; + for (int l = 0; l < scores.size(); l++) { + spearmanRank member(scores[l].name, rankUser[scores[l].name]); + user.push_back(member); + } + + int count = 0; + for (int l = 0; l < scores.size(); l++) { + + int numWithHigherRank = 0; + int numWithLowerRank = 0; + float thisrank = user[l].score; + + for (int u = l+1; u < scores.size(); u++) { + if (user[u].score > thisrank) { numWithHigherRank++; } + else if (user[u].score < thisrank) { numWithLowerRank++; } + count++; + } + + numCoor += numWithHigherRank; + numDisCoor += numWithLowerRank; + } + + r = (numCoor - numDisCoor) / (float) count; + + //divide by zero error + if (isnan(r) || isinf(r)) { r = 0.0; } + + return r; + + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "calcKendall"); exit(1); } } + /*********************************************************************************************************************************/