X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=getoturepcommand.cpp;h=c64549334ded9f4d32140ad64cbe898c5c41a023;hp=dbd19a8bfc7d5a266b131e74b18f3f2980c40aba;hb=b206f634aae1b4ce13978d203247fb64757d5482;hpb=af0a94ea8f02421b2b73e77e68753a2b4c37768e diff --git a/getoturepcommand.cpp b/getoturepcommand.cpp index dbd19a8..c645493 100644 --- a/getoturepcommand.cpp +++ b/getoturepcommand.cpp @@ -23,7 +23,7 @@ inline bool compareName(repStruct left, repStruct right){ //******************************************************************************************************************** //sorts lowest to highest inline bool compareBin(repStruct left, repStruct right){ - return (left.bin < right.bin); + return (left.simpleBin < right.simpleBin); } //******************************************************************************************************************** //sorts lowest to highest @@ -261,11 +261,15 @@ GetOTURepCommand::GetOTURepCommand(string option) { else if (countfile == "not open") { abort = true; countfile = ""; } else { m->setCountTableFile(countfile); - ct.readTable(countfile, true); + ct.readTable(countfile, true, false); if (ct.hasGroupInfo()) { hasGroups = true; } } - + groupfile = validParameter.validFile(parameters, "group", true); + if (groupfile == "not open") { groupfile = ""; abort = true; } + else if (groupfile == "not found") { groupfile = ""; } + else { m->setGroupFile(groupfile); } + method = validParameter.validFile(parameters, "method", false); if (method == "not found"){ method = "distance"; } if ((method != "distance") && (method != "abundance")) { m->mothurOut(method + " is not a valid option for the method parameter. The only options are: distance and abundance, aborting."); m->mothurOutEndLine(); abort = true; @@ -337,11 +341,7 @@ GetOTURepCommand::GetOTURepCommand(string option) { else { allLines = 1; } } - groupfile = validParameter.validFile(parameters, "group", true); - if (groupfile == "not open") { groupfile = ""; abort = true; } - else if (groupfile == "not found") { groupfile = ""; } - else { m->setGroupFile(groupfile); } - + sorted = validParameter.validFile(parameters, "sorted", false); if (sorted == "not found"){ sorted = ""; } if (sorted == "none") { sorted=""; } if ((sorted != "") && (sorted != "name") && (sorted != "bin") && (sorted != "size") && (sorted != "group")) { @@ -760,6 +760,8 @@ string GetOTURepCommand::findRepAbund(vector names, string group) { try{ vector reps; string rep = "notFound"; + + if (m->debug) { m->mothurOut("[DEBUG]: group=" + group + " names.size() = " + toString(names.size()) + " " + names[0] + "\n"); } if ((names.size() == 1)) { return names[0]; @@ -773,7 +775,7 @@ string GetOTURepCommand::findRepAbund(vector names, string group) { if (countfile != "") { //if countfile is not blank then we can assume the list file contains only uniques, otherwise we assume list file contains everyone. int numRep = 0; if (group != "") { numRep = ct.getGroupCount(names[i], group); } - else { numRep = ct.getGroupCount(names[i]); } + else { numRep = ct.getNumSeqs(names[i]); } if (numRep > maxAbund) { reps.clear(); reps.push_back(names[i]); @@ -834,7 +836,7 @@ string GetOTURepCommand::findRep(vector names, string group) { if (countfile != "") { //if countfile is not blank then we can assume the list file contains only uniques, otherwise we assume list file contains everyone. int numRep = 0; if (group != "") { numRep = ct.getGroupCount(names[i], group); } - else { numRep = ct.getGroupCount(names[i]); } + else { numRep = ct.getNumSeqs(names[i]); } for (int j = 1; j < numRep; j++) { //don't add yourself again seqIndex.push_back(nameToIndex[names[i]]); } @@ -979,6 +981,7 @@ int GetOTURepCommand::process(ListVector* processList) { } //for each bin in the list vector + vector binLabels = processList->getLabels(); for (int i = 0; i < processList->size(); i++) { if (m->control_pressed) { out.close(); @@ -999,7 +1002,7 @@ int GetOTURepCommand::process(ListVector* processList) { if (Groups.size() == 0) { nameRep = findRep(namesInBin, ""); - newNamesOutput << i << '\t' << nameRep << '\t'; + newNamesOutput << binLabels[i] << '\t' << nameRep << '\t'; //put rep at first position in names line string outputString = nameRep + ","; @@ -1042,7 +1045,7 @@ int GetOTURepCommand::process(ListVector* processList) { nameRep = findRep(NamesInGroup[Groups[j]], Groups[j]); //output group rep and other members of this group - (*(filehandles[Groups[j]])) << i << '\t' << nameRep << '\t'; + (*(filehandles[Groups[j]])) << binLabels[i] << '\t' << nameRep << '\t'; //put rep at first position in names line string outputString = nameRep + ","; @@ -1100,20 +1103,19 @@ int GetOTURepCommand::processFastaNames(string filename, string label, FastaMap* ifstream in; m->openInputFile(filename, in); - int i = 0; string tempGroup = ""; in >> tempGroup; m->gobble(in); CountTable thisCt; if (countfile != "") { - thisCt.readTable(countfile, true); + thisCt.readTable(countfile, true, false); if (tempGroup != "noGroup") { out2 << "Representative_Sequence\ttotal\t" << tempGroup << endl; } } int thistotal = 0; while (!in.eof()) { - string rep, binnames; - in >> i >> rep >> binnames; m->gobble(in); + string rep, binnames, binLabel; + in >> binLabel >> rep >> binnames; m->gobble(in); vector names; m->splitAtComma(binnames, names); @@ -1178,7 +1180,7 @@ int GetOTURepCommand::processFastaNames(string filename, string label, FastaMap* if (sequence != "not found") { if (sorted == "") { //print them out - rep = rep + "\t" + toString(i+1); + rep = rep + "\t" + binLabel; rep = rep + "|" + toString(binsize); if (group != "") { rep = rep + "|" + group; @@ -1186,7 +1188,9 @@ int GetOTURepCommand::processFastaNames(string filename, string label, FastaMap* out << ">" << rep << endl; out << sequence << endl; }else { //save them - repStruct newRep(rep, i+1, binsize, group); + int simpleLabel; + m->mothurConvert(m->getSimpleLabel(binLabel), simpleLabel); + repStruct newRep(rep, binLabel, simpleLabel, binsize, group); reps.push_back(newRep); } }else { @@ -1204,7 +1208,7 @@ int GetOTURepCommand::processFastaNames(string filename, string label, FastaMap* //print them for (int i = 0; i < reps.size(); i++) { string sequence = fasta->getSequence(reps[i].name); - string outputName = reps[i].name + "\t" + toString(reps[i].bin); + string outputName = reps[i].name + "\t" + reps[i].bin; outputName = outputName + "|" + toString(reps[i].size); if (reps[i].group != "") { outputName = outputName + "|" + reps[i].group; @@ -1245,7 +1249,6 @@ int GetOTURepCommand::processNames(string filename, string label) { ifstream in; m->openInputFile(filename, in); - int i = 0; string rep, binnames; string tempGroup = ""; @@ -1253,13 +1256,14 @@ int GetOTURepCommand::processNames(string filename, string label) { CountTable thisCt; if (countfile != "") { - thisCt.readTable(countfile, true); + thisCt.readTable(countfile, true, false); if (tempGroup != "noGroup") { out2 << "Representative_Sequence\ttotal\t" << tempGroup << endl; } } while (!in.eof()) { if (m->control_pressed) { break; } - in >> i >> rep >> binnames; m->gobble(in); + string binLabel; + in >> binLabel >> rep >> binnames; m->gobble(in); if (countfile == "") { out2 << rep << '\t' << binnames << endl; } else {