X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=linearalgebra.cpp;h=e4eaf481b843bd0ef27c41e77bd726b8adac2397;hp=e1b9470fdb218d94fd9f5fe9f36ccb23ac31ea8a;hb=b25ede2ad307ae76f8a610443e0ec3ec69621ce7;hpb=b8ff3bca0560a53832723f4621fcddef7ec4e499 diff --git a/linearalgebra.cpp b/linearalgebra.cpp index e1b9470..e4eaf48 100644 --- a/linearalgebra.cpp +++ b/linearalgebra.cpp @@ -805,7 +805,80 @@ double LinearAlgebra::calcSpearman(vector< vector >& euclidDists, vector exit(1); } } - +/*********************************************************************************************************************************/ +double LinearAlgebra::calcKruskalWallis(vector& values, double& pValue){ + try { + double H; + set treatments; + + //rank values + sort(values.begin(), values.end(), compareSpearman); + vector ties; + int rankTotal = 0; + vector TIES; + for (int j = 0; j < values.size(); j++) { + treatments.insert(values[j].name); + rankTotal += (j+1); + ties.push_back(&(values[j])); + + if (j != values.size()-1) { // you are not the last so you can look ahead + if (values[j].score != values[j+1].score) { // you are done with ties, rank them and continue + if (ties.size() > 1) { TIES.push_back(ties.size()); } + for (int k = 0; k < ties.size(); k++) { + double thisrank = rankTotal / (double) ties.size(); + (*ties[k]).score = thisrank; + } + ties.clear(); + rankTotal = 0; + } + }else { // you are the last one + if (ties.size() > 1) { TIES.push_back(ties.size()); } + for (int k = 0; k < ties.size(); k++) { + double thisrank = rankTotal / (double) ties.size(); + (*ties[k]).score = thisrank; + } + } + } + + + // H = 12/(N*(N+1)) * (sum Ti^2/n) - 3(N+1) + map sums; + map counts; + for (set::iterator it = treatments.begin(); it != treatments.end(); it++) { sums[*it] = 0.0; counts[*it] = 0; } + + for (int j = 0; j < values.size(); j++) { + sums[values[j].name] += values[j].score; + counts[values[j].name]+= 1.0; + } + + double middleTerm = 0.0; + for (set::iterator it = treatments.begin(); it != treatments.end(); it++) { + middleTerm += ((sums[*it]*sums[*it])/counts[*it]); + } + + double firstTerm = 12 / (double) (values.size()*(values.size()+1)); + double lastTerm = 3 * (values.size()+1); + + H = firstTerm * middleTerm - lastTerm; + + //adjust for ties + if (TIES.size() != 0) { + double sum = 0.0; + for (int j = 0; j < TIES.size(); j++) { sum += ((TIES[j]*TIES[j]*TIES[j])-TIES[j]); } + double result = 1.0 - (sum / (double) ((values.size()*values.size()*values.size())-values.size())); + H /= result; + } + + //Numerical Recipes pg221 + pValue = 1.0 - (gammp(((treatments.size()-1)/(double)2.0), H/2.0)); + + return H; + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "calcKruskalWallis"); + exit(1); + } +} /*********************************************************************************************************************************/ //assumes both matrices are square and the same size double LinearAlgebra::calcKendall(vector< vector >& euclidDists, vector< vector >& userDists){ @@ -1228,7 +1301,73 @@ double LinearAlgebra::calcKendallSig(double n, double r){ exit(1); } } +/*********************************************************************************************************************************/ +double LinearAlgebra::calcWilcoxon(vector& x, vector& y, double& sig){ + try { + if (x.size() != y.size()) { m->mothurOut("[ERROR]: vector size mismatch."); m->mothurOutEndLine(); return 0.0; } + + double W = 0.0; + sig = 0.0; + + vector signPairs; + vector absV; + for (int i = 0; i < x.size(); i++) { + if (m->control_pressed) { return W; } + double temp = y[i]-x[i]; + double signV = sign(temp); + if (signV != 0) { // exclude zeros + spearmanRank member(toString(i), abs(temp)); + absV.push_back(member); + signPairs.push_back(signV); + } + } + + //rank absV + //sort xscores + sort(absV.begin(), absV.end(), compareSpearman); + + //convert scores to ranks of x + vector ties; + int rankTotal = 0; + for (int j = 0; j < absV.size(); j++) { + if (m->control_pressed) { return W; } + rankTotal += (j+1); + ties.push_back(&(absV[j])); + + if (j != absV.size()-1) { // you are not the last so you can look ahead + if (absV[j].score != absV[j+1].score) { // you are done with ties, rank them and continue + for (int k = 0; k < ties.size(); k++) { + float thisrank = rankTotal / (float) ties.size(); + (*ties[k]).score = thisrank; + } + ties.clear(); + rankTotal = 0; + } + }else { // you are the last one + for (int k = 0; k < ties.size(); k++) { + float thisrank = rankTotal / (float) ties.size(); + (*ties[k]).score = thisrank; + } + } + } + //sum ranks times sign + for (int i = 0; i < absV.size(); i++) { + if (m->control_pressed) { return W; } + W += (absV[i].score*signPairs[i]); + } + W = abs(W); + + //find zScore + cout << "still need to find sig!!" << endl; + + return W; + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "calcWilcoxon"); + exit(1); + } +} /*********************************************************************************************************************************/ double LinearAlgebra::calcSpearman(vector& x, vector& y, double& sig){ try {