X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=randomforest.cpp;h=acf87dfebcd022d37cf6974f8331c7ad940a017a;hp=852da372291d00b3e37caabefbea4e4fe3981f3f;hb=499f4ac6e321f9f03d4c3aa25c3b6880892c8b83;hpb=b682d361a9d59e832a0bd9dcc76aee39769b89e7 diff --git a/randomforest.cpp b/randomforest.cpp index 852da37..acf87df 100644 --- a/randomforest.cpp +++ b/randomforest.cpp @@ -70,22 +70,31 @@ int RandomForest::printConfusionMatrix(map intToTreatmentMap) { vector::iterator maxPredictedOutComeIterator = max_element(predictedOutComes.begin(), predictedOutComes.end()); int majorityVotedOutcome = (int)(maxPredictedOutComeIterator - predictedOutComes.begin()); int realOutcome = dataSet[indexOfSample][numFeatures]; - cm[realOutcome][majorityVotedOutcome] = cm[realOutcome][majorityVotedOutcome] + 1; + cm[realOutcome][majorityVotedOutcome] = cm[realOutcome][majorityVotedOutcome] + 1; + } + + vector fw; + for (int w = 0; w mothurOut("confusion matrix:\n\t\t"); - for (int i = 0; i < numGroups; i++) { - m->mothurOut(intToTreatmentMap[i] + "\t"); + for (int k = 0; k < numGroups; k++) { + //m->mothurOut(intToTreatmentMap[k] + "\t"); + cout << setw(fw[k]) << intToTreatmentMap[k] << "\t"; } for (int i = 0; i < numGroups; i++) { + cout << "\n" << setw(fw[i]) << intToTreatmentMap[i] << "\t"; //m->mothurOut("\n" + intToTreatmentMap[i] + "\t"); if (m->control_pressed) { return 0; } for (int j = 0; j < numGroups; j++) { - m->mothurOut(cm[i][j] + "\t"); + //m->mothurOut(toString(cm[i][j]) + "\t"); + cout << setw(fw[i]) << cm[i][j] << "\t"; } } - m->mothurOut("\n"); - + //m->mothurOut("\n"); + cout << "\n"; + return 0; } @@ -95,6 +104,38 @@ int RandomForest::printConfusionMatrix(map intToTreatmentMap) { } } +/***********************************************************************/ + +int RandomForest::getMissclassifications(string filename, map intToTreatmentMap, vector names) { + try { + ofstream out; + m->openOutputFile(filename, out); + out <<"Sample\tRF classification\tActual classification\n"; + for (map >::iterator it = globalOutOfBagEstimates.begin(); it != globalOutOfBagEstimates.end(); it++) { + + if (m->control_pressed) { return 0; } + + int indexOfSample = it->first; + vector predictedOutComes = it->second; + vector::iterator maxPredictedOutComeIterator = max_element(predictedOutComes.begin(), predictedOutComes.end()); + int majorityVotedOutcome = (int)(maxPredictedOutComeIterator - predictedOutComes.begin()); + int realOutcome = dataSet[indexOfSample][numFeatures]; + + if (majorityVotedOutcome != realOutcome) { + out << names[indexOfSample] << "\t" << intToTreatmentMap[majorityVotedOutcome] << "\t" << intToTreatmentMap[realOutcome] << endl; + + } + } + + out.close(); + return 0; + } + catch(exception& e) { + m->errorOut(e, "RandomForest", "getMissclassifications"); + exit(1); + } +} + /***********************************************************************/ int RandomForest::calcForrestVariableImportance(string filename) { try { @@ -139,7 +180,7 @@ int RandomForest::calcForrestVariableImportance(string filename) { m->openOutputFile(filename, out); out <<"OTU\tMean decrease accuracy\n"; for (int i = 0; i < globalVariableRanks.size(); i++) { - out << m->currentBinLabels[(int)globalVariableRanks[i].first] << '\t' << globalVariableImportanceList[globalVariableRanks[i].first] << endl; + out << m->currentSharedBinLabels[(int)globalVariableRanks[i].first] << '\t' << globalVariableImportanceList[globalVariableRanks[i].first] << endl; } out.close(); return 0;