- m->mothurOut("Sampling " + toString(size) + " from " + toString(list->getNumSeqs()) + "."); m->mothurOutEndLine();
- }
- }
-
-
- //fill names
- for (int i = 0; i < list->getNumBins(); i++) {
- string binnames = list->get(i);
-
- //parse names
- string individual = "";
- int length = binnames.length();
- for(int j=0;j<length;j++){
- if(binnames[j] == ','){
-
- if (groupfile != "") { //if there is a groupfile given fill in group info
- string group = groupMap->getGroup(individual);
- if (group == "not found") { m->mothurOut("[ERROR]: " + individual + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
-
- if (pickedGroups) { //if hte user picked groups, we only want to keep the names of sequences from those groups
- if (m->inUsersGroups(group, Groups)) {
- names.push_back(individual);
- }
- }else{
- names.push_back(individual);
- }
- }else{ //save everyone, group
- names.push_back(individual);
- }
- individual = "";
- }
- else{
- individual += binnames[j];
- }
- }
- //save last name
- if (groupfile != "") { //if there is a groupfile given fill in group info
- string group = groupMap->getGroup(individual);
- if (group == "not found") { m->mothurOut("[ERROR]: " + individual + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
-
- if (pickedGroups) { //if hte user picked groups, we only want to keep the names of sequences from those groups
- if (m->inUsersGroups(group, Groups)) {
- names.push_back(individual);
- }
- }else{
- names.push_back(individual);
- }
- }else{ //save everyone, group
- names.push_back(individual);
- }
- }
-
- random_shuffle(names.begin(), names.end());
-
- //randomly select a subset of those names to include in the subsample
- set<string> subset; //dont want repeat sequence names added
- if (persample) {
- for (int i = 0; i < Groups.size(); i++) {
-
- for (int j = 0; j < size; j++) {
-
- if (m->control_pressed) { break; }
-
- //get random sequence to add, making sure we have not already added it
- bool done = false;
- int myrand;
- while (!done) {
- myrand = int((float)(names.size()) * (float)(rand()) / ((float)RAND_MAX+1.0));
-
- if (subset.count(names[myrand]) == 0) { //you are not already added
- if (groupMap->getGroup(names[myrand]) == Groups[i]) { subset.insert(names[myrand]); break; }
- }
- }
- }
- }
- }else{
- for (int j = 0; j < size; j++) {
-
- if (m->control_pressed) { break; }
-
- //get random sequence to add, making sure we have not already added it
- bool done = false;
- int myrand;
- while (!done) {
- myrand = int((float)(names.size()) * (float)(rand()) / ((float)RAND_MAX+1.0));
-
- if (subset.count(names[myrand]) == 0) { subset.insert(names[myrand]); break; }
- }
- }
- }
-
- if (groupfile != "") {
- //write out new groupfile
- for (set<string>::iterator it = subset.begin(); it != subset.end(); it++) {
- string group = groupMap->getGroup(*it);
- if (group == "not found") { group = "NOTFOUND"; }
-
- outGroup << *it << '\t' << group << endl;
- }
- outGroup.close(); delete groupMap;
- }
-
+ m->mothurOut("Sampling " + toString(size) + " from " + toString(thisSize) + "."); m->mothurOutEndLine();
+ }
+ }
+
+ set<string> subset; //dont want repeat sequence names added
+ if (countfile == "") {
+ //fill names
+ for (int i = 0; i < list->getNumBins(); i++) {
+ string binnames = list->get(i);
+ vector<string> thisBin;
+ m->splitAtComma(binnames, thisBin);
+
+ for(int j=0;j<thisBin.size();j++){
+ if (groupfile != "") { //if there is a groupfile given fill in group info
+ string group = groupMap.getGroup(thisBin[j]);
+ if (group == "not found") { m->mothurOut("[ERROR]: " + thisBin[j] + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
+
+ //if hte user picked groups, we only want to keep the names of sequences from those groups
+ if (pickedGroups) { if (m->inUsersGroups(group, Groups)) { names.push_back(thisBin[j]); } }
+ else{ names.push_back(thisBin[j]); }
+ }//save everyone, group
+ else{ names.push_back(thisBin[j]); }
+ }
+ }
+
+ random_shuffle(names.begin(), names.end());
+
+ //randomly select a subset of those names to include in the subsample
+ if (persample) {
+ //initialize counts
+ map<string, int> groupCounts;
+ map<string, int>::iterator itGroupCounts;
+ for (int i = 0; i < Groups.size(); i++) { groupCounts[Groups[i]] = 0; }
+
+ for (int j = 0; j < names.size(); j++) {
+
+ if (m->control_pressed) { delete list; delete input; return 0; }
+
+ string group = groupMap.getGroup(names[j]);
+ if (group == "not found") { m->mothurOut("[ERROR]: " + names[j] + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
+ else{
+ itGroupCounts = groupCounts.find(group);
+ if (itGroupCounts != groupCounts.end()) {
+ if (groupCounts[group] < size) { subset.insert(names[j]); groupCounts[group]++; }
+ }
+ }
+ }
+ }else{
+ for (int j = 0; j < size; j++) {
+ if (m->control_pressed) { break; }
+ subset.insert(names[j]);
+ }
+ }
+
+ if (groupfile != "") {
+ //write out new groupfile
+ for (set<string>::iterator it = subset.begin(); it != subset.end(); it++) {
+ string group = groupMap.getGroup(*it);
+ if (group == "not found") { group = "NOTFOUND"; }
+ outGroup << *it << '\t' << group << endl;
+ }
+ outGroup.close();
+ }
+ }else {
+ SubSample sample; CountTable sampledCt;
+
+ if (persample) { sampledCt = sample.getSample(ct, size, Groups); }
+ else { sampledCt = sample.getSample(ct, size, Groups, pickedGroups); }
+
+ vector<string> sampledSeqs = sampledCt.getNamesOfSeqs();
+ for (int i = 0; i < sampledSeqs.size(); i++) { subset.insert(sampledSeqs[i]); }
+
+ string countOutputDir = outputDir;
+ if (outputDir == "") { countOutputDir += m->hasPath(countfile); }
+ string countOutputFileName = countOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
+ outputTypes["count"].push_back(countOutputFileName); outputNames.push_back(countOutputFileName);
+ sampledCt.printTable(countOutputFileName);
+ }