X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=getoturepcommand.cpp;h=c64549334ded9f4d32140ad64cbe898c5c41a023;hp=d476cc3499844c88014bcef9e1a005459fab3dda;hb=b206f634aae1b4ce13978d203247fb64757d5482;hpb=875ab4b2eec77b920e9fa0042f9a2aae2faff2b0 diff --git a/getoturepcommand.cpp b/getoturepcommand.cpp index d476cc3..c645493 100644 --- a/getoturepcommand.cpp +++ b/getoturepcommand.cpp @@ -23,7 +23,7 @@ inline bool compareName(repStruct left, repStruct right){ //******************************************************************************************************************** //sorts lowest to highest inline bool compareBin(repStruct left, repStruct right){ - return (left.bin < right.bin); + return (left.simpleBin < right.simpleBin); } //******************************************************************************************************************** //sorts lowest to highest @@ -261,7 +261,7 @@ GetOTURepCommand::GetOTURepCommand(string option) { else if (countfile == "not open") { abort = true; countfile = ""; } else { m->setCountTableFile(countfile); - ct.readTable(countfile, true); + ct.readTable(countfile, true, false); if (ct.hasGroupInfo()) { hasGroups = true; } } @@ -760,6 +760,8 @@ string GetOTURepCommand::findRepAbund(vector names, string group) { try{ vector reps; string rep = "notFound"; + + if (m->debug) { m->mothurOut("[DEBUG]: group=" + group + " names.size() = " + toString(names.size()) + " " + names[0] + "\n"); } if ((names.size() == 1)) { return names[0]; @@ -773,7 +775,7 @@ string GetOTURepCommand::findRepAbund(vector names, string group) { if (countfile != "") { //if countfile is not blank then we can assume the list file contains only uniques, otherwise we assume list file contains everyone. int numRep = 0; if (group != "") { numRep = ct.getGroupCount(names[i], group); } - else { numRep = ct.getGroupCount(names[i]); } + else { numRep = ct.getNumSeqs(names[i]); } if (numRep > maxAbund) { reps.clear(); reps.push_back(names[i]); @@ -834,7 +836,7 @@ string GetOTURepCommand::findRep(vector names, string group) { if (countfile != "") { //if countfile is not blank then we can assume the list file contains only uniques, otherwise we assume list file contains everyone. int numRep = 0; if (group != "") { numRep = ct.getGroupCount(names[i], group); } - else { numRep = ct.getGroupCount(names[i]); } + else { numRep = ct.getNumSeqs(names[i]); } for (int j = 1; j < numRep; j++) { //don't add yourself again seqIndex.push_back(nameToIndex[names[i]]); } @@ -979,6 +981,7 @@ int GetOTURepCommand::process(ListVector* processList) { } //for each bin in the list vector + vector binLabels = processList->getLabels(); for (int i = 0; i < processList->size(); i++) { if (m->control_pressed) { out.close(); @@ -999,7 +1002,7 @@ int GetOTURepCommand::process(ListVector* processList) { if (Groups.size() == 0) { nameRep = findRep(namesInBin, ""); - newNamesOutput << i << '\t' << nameRep << '\t'; + newNamesOutput << binLabels[i] << '\t' << nameRep << '\t'; //put rep at first position in names line string outputString = nameRep + ","; @@ -1042,7 +1045,7 @@ int GetOTURepCommand::process(ListVector* processList) { nameRep = findRep(NamesInGroup[Groups[j]], Groups[j]); //output group rep and other members of this group - (*(filehandles[Groups[j]])) << i << '\t' << nameRep << '\t'; + (*(filehandles[Groups[j]])) << binLabels[i] << '\t' << nameRep << '\t'; //put rep at first position in names line string outputString = nameRep + ","; @@ -1100,20 +1103,19 @@ int GetOTURepCommand::processFastaNames(string filename, string label, FastaMap* ifstream in; m->openInputFile(filename, in); - int i = 0; string tempGroup = ""; in >> tempGroup; m->gobble(in); CountTable thisCt; if (countfile != "") { - thisCt.readTable(countfile, true); + thisCt.readTable(countfile, true, false); if (tempGroup != "noGroup") { out2 << "Representative_Sequence\ttotal\t" << tempGroup << endl; } } int thistotal = 0; while (!in.eof()) { - string rep, binnames; - in >> i >> rep >> binnames; m->gobble(in); + string rep, binnames, binLabel; + in >> binLabel >> rep >> binnames; m->gobble(in); vector names; m->splitAtComma(binnames, names); @@ -1178,7 +1180,7 @@ int GetOTURepCommand::processFastaNames(string filename, string label, FastaMap* if (sequence != "not found") { if (sorted == "") { //print them out - rep = rep + "\t" + toString(i+1); + rep = rep + "\t" + binLabel; rep = rep + "|" + toString(binsize); if (group != "") { rep = rep + "|" + group; @@ -1186,7 +1188,9 @@ int GetOTURepCommand::processFastaNames(string filename, string label, FastaMap* out << ">" << rep << endl; out << sequence << endl; }else { //save them - repStruct newRep(rep, i+1, binsize, group); + int simpleLabel; + m->mothurConvert(m->getSimpleLabel(binLabel), simpleLabel); + repStruct newRep(rep, binLabel, simpleLabel, binsize, group); reps.push_back(newRep); } }else { @@ -1204,7 +1208,7 @@ int GetOTURepCommand::processFastaNames(string filename, string label, FastaMap* //print them for (int i = 0; i < reps.size(); i++) { string sequence = fasta->getSequence(reps[i].name); - string outputName = reps[i].name + "\t" + toString(reps[i].bin); + string outputName = reps[i].name + "\t" + reps[i].bin; outputName = outputName + "|" + toString(reps[i].size); if (reps[i].group != "") { outputName = outputName + "|" + reps[i].group; @@ -1245,7 +1249,6 @@ int GetOTURepCommand::processNames(string filename, string label) { ifstream in; m->openInputFile(filename, in); - int i = 0; string rep, binnames; string tempGroup = ""; @@ -1253,13 +1256,14 @@ int GetOTURepCommand::processNames(string filename, string label) { CountTable thisCt; if (countfile != "") { - thisCt.readTable(countfile, true); + thisCt.readTable(countfile, true, false); if (tempGroup != "noGroup") { out2 << "Representative_Sequence\ttotal\t" << tempGroup << endl; } } while (!in.eof()) { if (m->control_pressed) { break; } - in >> i >> rep >> binnames; m->gobble(in); + string binLabel; + in >> binLabel >> rep >> binnames; m->gobble(in); if (countfile == "") { out2 << rep << '\t' << binnames << endl; } else {