5 // Created by Sarah Westcott on 6/26/12.
6 // Copyright (c) 2012 Schloss Lab. All rights reserved.
9 #include "counttable.h"
11 /************************************************************/
12 int CountTable::createTable(set<string>& n, map<string, string>& g, set<string>& gs) {
17 indexGroupMap.clear();
20 for (set<string>::iterator it = gs.begin(); it != gs.end(); it++) { groups.push_back(*it); hasGroups = true; }
21 numGroups = groups.size();
22 totalGroups.resize(numGroups, 0);
24 //sort groups to keep consistent with how we store the groups in groupmap
25 sort(groups.begin(), groups.end());
26 for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; }
27 m->setAllGroups(groups);
31 for (set<string>::iterator it = n.begin(); it != n.end(); it++) {
33 if (m->control_pressed) { break; }
37 vector<int> groupCounts; groupCounts.resize(numGroups, 0);
38 map<string, string>::iterator itGroup = g.find(seqName);
40 if (itGroup != g.end()) {
41 groupCounts[indexGroupMap[itGroup->second]] = 1;
42 totalGroups[indexGroupMap[itGroup->second]]++;
43 }else { m->mothurOut("[ERROR]: Your group file does not contain " + seqName + ". Please correct."); m->mothurOutEndLine(); }
45 map<string, int>::iterator it2 = indexNameMap.find(seqName);
46 if (it2 == indexNameMap.end()) {
47 if (hasGroups) { counts.push_back(groupCounts); }
48 indexNameMap[seqName] = uniques;
58 m->errorOut(e, "CountTable", "createTable");
62 /************************************************************/
63 bool CountTable::testGroups(string file) {
65 m = MothurOut::getInstance(); hasGroups = false; total = 0;
67 m->openInputFile(file, in);
69 string headers = m->getline(in); m->gobble(in);
70 vector<string> columnHeaders = m->splitWhiteSpace(headers);
71 if (columnHeaders.size() > 2) { hasGroups = true; }
75 m->errorOut(e, "CountTable", "readTable");
79 /************************************************************/
80 int CountTable::createTable(string namefile, string groupfile, bool createGroup) {
83 if (namefile == "") { m->mothurOut("[ERROR]: namefile cannot be blank when creating a count table.\n"); m->control_pressed = true; }
89 indexGroupMap.clear();
92 map<int, string> originalGroupIndexes;
94 if (groupfile != "") {
96 groupMap = new GroupMap(groupfile); groupMap->readMap();
97 numGroups = groupMap->getNumGroups();
98 groups = groupMap->getNamesOfGroups();
99 totalGroups.resize(numGroups, 0);
100 }else if(createGroup) {
103 groups.push_back("Group1");
104 totalGroups.resize(numGroups, 0);
106 //sort groups to keep consistent with how we store the groups in groupmap
107 sort(groups.begin(), groups.end());
108 for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; }
109 m->setAllGroups(groups);
119 m->openInputFile(namefile, in);
123 if (m->control_pressed) { break; }
125 string firstCol, secondCol;
126 in >> firstCol; m->gobble(in); in >> secondCol; m->gobble(in);
128 vector<string> names;
129 m->splitAtChar(secondCol, names, ',');
131 map<string, int> groupCounts;
133 if (groupfile != "") {
135 for (int i = 0; i < groups.size(); i++) { groupCounts[groups[i]] = 0; }
137 //get counts for each of the users groups
138 for (int i = 0; i < names.size(); i++) {
139 string group = groupMap->getGroup(names[i]);
141 if (group == "not found") { m->mothurOut("[ERROR]: " + names[i] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); error=true; }
143 map<string, int>::iterator it = groupCounts.find(group);
145 //if not found, then this sequence is not from a group we care about
146 if (it != groupCounts.end()) {
152 }else if (createGroup) {
153 groupCounts["Group1"]=0;
154 for (int i = 0; i < names.size(); i++) {
155 string group = "Group1";
156 groupCounts["Group1"]++; thisTotal++;
158 }else { thisTotal = names.size(); }
160 //if group info, then read it
161 vector<int> thisGroupsCount; thisGroupsCount.resize(numGroups, 0);
162 for (int i = 0; i < numGroups; i++) {
163 thisGroupsCount[i] = groupCounts[groups[i]];
164 totalGroups[i] += thisGroupsCount[i];
167 map<string, int>::iterator it = indexNameMap.find(firstCol);
168 if (it == indexNameMap.end()) {
169 if (hasGroups) { counts.push_back(thisGroupsCount); }
170 indexNameMap[firstCol] = uniques;
171 totals.push_back(thisTotal);
176 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + firstCol + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();
181 if (error) { m->control_pressed = true; }
182 if (groupfile != "") { delete groupMap; }
186 catch(exception& e) {
187 m->errorOut(e, "CountTable", "createTable");
191 /************************************************************/
192 int CountTable::readTable(string file) {
196 m->openInputFile(filename, in);
198 string headers = m->getline(in); m->gobble(in);
199 vector<string> columnHeaders = m->splitWhiteSpace(headers);
204 indexGroupMap.clear();
205 indexNameMap.clear();
207 map<int, string> originalGroupIndexes;
208 if (columnHeaders.size() > 2) { hasGroups = true; numGroups = columnHeaders.size() - 2; }
209 for (int i = 2; i < columnHeaders.size(); i++) { groups.push_back(columnHeaders[i]); originalGroupIndexes[i-2] = columnHeaders[i]; totalGroups.push_back(0); }
210 //sort groups to keep consistent with how we store the groups in groupmap
211 sort(groups.begin(), groups.end());
212 for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; }
213 m->setAllGroups(groups);
222 if (m->control_pressed) { break; }
224 in >> name; m->gobble(in); in >> thisTotal; m->gobble(in);
225 if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + toString(thisTotal) + "\n"); }
227 //if group info, then read it
228 vector<int> groupCounts; groupCounts.resize(numGroups, 0);
229 for (int i = 0; i < numGroups; i++) { int thisIndex = indexGroupMap[originalGroupIndexes[i]]; in >> groupCounts[thisIndex]; m->gobble(in); totalGroups[thisIndex] += groupCounts[thisIndex]; }
231 map<string, int>::iterator it = indexNameMap.find(name);
232 if (it == indexNameMap.end()) {
233 if (hasGroups) { counts.push_back(groupCounts); }
234 indexNameMap[name] = uniques;
235 totals.push_back(thisTotal);
240 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + name + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();
245 if (error) { m->control_pressed = true; }
249 catch(exception& e) {
250 m->errorOut(e, "CountTable", "readTable");
254 /************************************************************/
255 int CountTable::printTable(string file) {
258 m->openOutputFile(file, out);
259 out << "Representative_Sequence\ttotal\t";
260 for (int i = 0; i < groups.size(); i++) { out << groups[i] << '\t'; }
263 for (map<string, int>::iterator itNames = indexNameMap.begin(); itNames != indexNameMap.end(); itNames++) {
264 out << itNames->first << '\t' << totals[itNames->second] << '\t';
267 for (int i = 0; i < groups.size(); i++) {
268 out << counts[itNames->second][i] << '\t';
276 catch(exception& e) {
277 m->errorOut(e, "CountTable", "printTable");
281 /************************************************************/
282 int CountTable::printHeaders(ofstream& out) {
284 out << "Representative_Sequence\ttotal\t";
285 for (int i = 0; i < groups.size(); i++) { out << groups[i] << '\t'; }
289 catch(exception& e) {
290 m->errorOut(e, "CountTable", "printHeaders");
294 /************************************************************/
295 int CountTable::printSeq(ofstream& out, string seqName) {
297 map<string, int>::iterator it = indexNameMap.find(seqName);
298 if (it == indexNameMap.end()) {
299 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
301 out << it->first << '\t' << totals[it->second] << '\t';
303 for (int i = 0; i < groups.size(); i++) {
304 out << counts[it->second][i] << '\t';
311 catch(exception& e) {
312 m->errorOut(e, "CountTable", "printSeq");
316 /************************************************************/
317 //group counts for a seq
318 vector<int> CountTable::getGroupCounts(string seqName) {
322 map<string, int>::iterator it = indexNameMap.find(seqName);
323 if (it == indexNameMap.end()) {
324 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
326 temp = counts[it->second];
328 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
332 catch(exception& e) {
333 m->errorOut(e, "CountTable", "getGroupCounts");
337 /************************************************************/
338 //total number of sequences for the group
339 int CountTable::getGroupCount(string groupName) {
342 map<string, int>::iterator it = indexGroupMap.find(groupName);
343 if (it == indexGroupMap.end()) {
344 m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
346 return totalGroups[it->second];
348 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
352 catch(exception& e) {
353 m->errorOut(e, "CountTable", "getGroupCount");
357 /************************************************************/
358 //total number of sequences for the seq for the group
359 int CountTable::getGroupCount(string seqName, string groupName) {
362 map<string, int>::iterator it = indexGroupMap.find(groupName);
363 if (it == indexGroupMap.end()) {
364 m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
366 map<string, int>::iterator it2 = indexNameMap.find(seqName);
367 if (it2 == indexNameMap.end()) {
368 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
370 return counts[it2->second][it->second];
373 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
377 catch(exception& e) {
378 m->errorOut(e, "CountTable", "getGroupCount");
382 /************************************************************/
383 //set the number of sequences for the seq for the group
384 int CountTable::setAbund(string seqName, string groupName, int num) {
387 map<string, int>::iterator it = indexGroupMap.find(groupName);
388 if (it == indexGroupMap.end()) {
389 m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
391 map<string, int>::iterator it2 = indexNameMap.find(seqName);
392 if (it2 == indexNameMap.end()) {
393 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
395 int oldCount = counts[it2->second][it->second];
396 counts[it2->second][it->second] = num;
397 totalGroups[it->second] += (num - oldCount);
398 total += (num - oldCount);
399 totals[it2->second] += (num - oldCount);
402 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
406 catch(exception& e) {
407 m->errorOut(e, "CountTable", "set");
411 /************************************************************/
413 int CountTable::addGroup(string groupName) {
415 bool sanity = m->inUsersGroups(groupName, groups);
416 if (sanity) { m->mothurOut("[ERROR]: " + groupName + " is already in the count table, cannot add again.\n"); m->control_pressed = true; return 0; }
418 groups.push_back(groupName);
419 if (!hasGroups) { counts.resize(uniques); }
421 for (int i = 0; i < counts.size(); i++) { counts[i].push_back(0); }
422 totalGroups.push_back(0);
423 indexGroupMap[groupName] = groups.size()-1;
424 map<string, int> originalGroupMap = indexGroupMap;
426 //important to play well with others, :)
427 sort(groups.begin(), groups.end());
429 //fix indexGroupMap && totalGroups
430 vector<int> newTotals; newTotals.resize(groups.size(), 0);
431 for (int i = 0; i < groups.size(); i++) {
432 indexGroupMap[groups[i]] = i;
433 //find original spot of group[i]
434 int index = originalGroupMap[groups[i]];
435 newTotals[i] = totalGroups[index];
437 totalGroups = newTotals;
440 for (int i = 0; i < counts.size(); i++) {
441 vector<int> newCounts; newCounts.resize(groups.size(), 0);
442 for (int j = 0; j < groups.size(); j++) {
443 //find original spot of group[i]
444 int index = originalGroupMap[groups[j]];
445 newCounts[j] = counts[i][index];
447 counts[i] = newCounts;
453 catch(exception& e) {
454 m->errorOut(e, "CountTable", "addGroup");
458 /************************************************************/
459 //vector of groups for the seq
460 vector<string> CountTable::getGroups(string seqName) {
462 vector<string> thisGroups;
464 vector<int> thisCounts = getGroupCounts(seqName);
465 for (int i = 0; i < thisCounts.size(); i++) {
466 if (thisCounts[i] != 0) { thisGroups.push_back(groups[i]); }
468 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
472 catch(exception& e) {
473 m->errorOut(e, "CountTable", "getGroups");
477 /************************************************************/
478 //total number of seqs represented by seq
479 int CountTable::renameSeq(string oldSeqName, string newSeqName) {
482 map<string, int>::iterator it = indexNameMap.find(oldSeqName);
483 if (it == indexNameMap.end()) {
484 m->mothurOut("[ERROR]: " + oldSeqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
486 int index = it->second;
487 indexNameMap.erase(it);
488 indexNameMap[newSeqName] = index;
493 catch(exception& e) {
494 m->errorOut(e, "CountTable", "renameSeq");
499 /************************************************************/
500 //total number of seqs represented by seq
501 int CountTable::getNumSeqs(string seqName) {
504 map<string, int>::iterator it = indexNameMap.find(seqName);
505 if (it == indexNameMap.end()) {
506 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
508 return totals[it->second];
513 catch(exception& e) {
514 m->errorOut(e, "CountTable", "getNumSeqs");
518 /************************************************************/
519 //returns unique index for sequence like get in NameAssignment
520 int CountTable::get(string seqName) {
523 map<string, int>::iterator it = indexNameMap.find(seqName);
524 if (it == indexNameMap.end()) {
525 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
526 }else { return it->second; }
530 catch(exception& e) {
531 m->errorOut(e, "CountTable", "get");
535 /************************************************************/
536 //add seqeunce without group info
537 int CountTable::push_back(string seqName) {
539 map<string, int>::iterator it = indexNameMap.find(seqName);
540 if (it == indexNameMap.end()) {
541 if (hasGroups) { m->mothurOut("[ERROR]: Your count table has groups and I have no group information for " + seqName + "."); m->mothurOutEndLine(); m->control_pressed = true; }
542 indexNameMap[seqName] = uniques;
547 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); m->control_pressed = true;
552 catch(exception& e) {
553 m->errorOut(e, "CountTable", "push_back");
557 /************************************************************/
559 int CountTable::remove(string seqName) {
561 map<string, int>::iterator it = indexNameMap.find(seqName);
562 if (it == indexNameMap.end()) {
564 if (hasGroups){ //remove this sequences counts from group totals
565 for (int i = 0; i < totalGroups.size(); i++) { totalGroups[i] -= counts[it->second][i]; counts[it->second][i] = 0; }
567 int thisTotal = totals[it->second]; totals[it->second] = 0;
569 indexNameMap.erase(it);
571 m->mothurOut("[ERROR]: Your count table contains does not include " + seqName + ", cannot remove."); m->mothurOutEndLine(); m->control_pressed = true;
576 catch(exception& e) {
577 m->errorOut(e, "CountTable", "push_back");
581 /************************************************************/
582 //add seqeunce without group info
583 int CountTable::push_back(string seqName, int thisTotal) {
585 map<string, int>::iterator it = indexNameMap.find(seqName);
586 if (it == indexNameMap.end()) {
587 if (hasGroups) { m->mothurOut("[ERROR]: Your count table has groups and I have no group information for " + seqName + "."); m->mothurOutEndLine(); m->control_pressed = true; }
588 indexNameMap[seqName] = uniques;
589 totals.push_back(thisTotal);
593 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); m->control_pressed = true;
598 catch(exception& e) {
599 m->errorOut(e, "CountTable", "push_back");
603 /************************************************************/
604 //add sequence with group info
605 int CountTable::push_back(string seqName, vector<int> groupCounts) {
607 map<string, int>::iterator it = indexNameMap.find(seqName);
608 if (it == indexNameMap.end()) {
609 if ((hasGroups) && (groupCounts.size() != getNumGroups())) { m->mothurOut("[ERROR]: Your count table has a " + toString(getNumGroups()) + " groups and " + seqName + " has " + toString(groupCounts.size()) + ", please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
611 for (int i = 0; i < getNumGroups(); i++) { totalGroups[i] += groupCounts[i]; thisTotal += groupCounts[i]; }
612 if (hasGroups) { counts.push_back(groupCounts); }
613 indexNameMap[seqName] = uniques;
614 totals.push_back(thisTotal);
618 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); m->control_pressed = true;
623 catch(exception& e) {
624 m->errorOut(e, "CountTable", "push_back");
629 /************************************************************/
630 //create ListVector from uniques
631 ListVector CountTable::getListVector() {
633 ListVector list(indexNameMap.size());
634 for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
635 if (m->control_pressed) { break; }
636 list.set(it->second, it->first);
640 catch(exception& e) {
641 m->errorOut(e, "CountTable", "getListVector");
646 /************************************************************/
647 //returns the names of all unique sequences in file
648 vector<string> CountTable::getNamesOfSeqs() {
650 vector<string> names;
651 for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
652 names.push_back(it->first);
657 catch(exception& e) {
658 m->errorOut(e, "CountTable", "getNamesOfSeqs");
662 /************************************************************/
663 //returns the names of all unique sequences in file
664 vector<string> CountTable::getNamesOfSeqs(string group) {
666 vector<string> names;
668 map<string, int>::iterator it = indexGroupMap.find(group);
669 if (it == indexGroupMap.end()) {
670 m->mothurOut("[ERROR]: " + group + " is not in your count table. Please correct.\n"); m->control_pressed = true;
672 for (map<string, int>::iterator it2 = indexNameMap.begin(); it2 != indexNameMap.end(); it2++) {
673 if (counts[it2->second][it->second] != 0) { names.push_back(it2->first); }
676 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
680 catch(exception& e) {
681 m->errorOut(e, "CountTable", "getNamesOfSeqs");
685 /************************************************************/
686 //merges counts of seq1 and seq2, saving in seq1
687 int CountTable::mergeCounts(string seq1, string seq2) {
689 map<string, int>::iterator it = indexNameMap.find(seq1);
690 if (it == indexNameMap.end()) {
691 m->mothurOut("[ERROR]: " + seq1 + " is not in your count table. Please correct.\n"); m->control_pressed = true;
693 map<string, int>::iterator it2 = indexNameMap.find(seq2);
694 if (it2 == indexNameMap.end()) {
695 m->mothurOut("[ERROR]: " + seq2 + " is not in your count table. Please correct.\n"); m->control_pressed = true;
698 for (int i = 0; i < groups.size(); i++) { counts[it->second][i] += counts[it2->second][i]; }
699 totals[it->second] += totals[it2->second];
701 indexNameMap.erase(it2);
706 catch(exception& e) {
707 m->errorOut(e, "CountTable", "getNamesOfSeqs");
711 /************************************************************/
712 int CountTable::copy(CountTable* ct) {
714 vector<string> thisGroups = ct->getNamesOfGroups();
715 for (int i = 0; i < thisGroups.size(); i++) { addGroup(thisGroups[i]); }
716 vector<string> names = ct->getNamesOfSeqs();
718 for (int i = 0; i < names.size(); i++) {
719 vector<int> thisCounts = ct->getGroupCounts(names[i]);
720 push_back(names[i], thisCounts);
725 catch(exception& e) {
726 m->errorOut(e, "CountTable", "copy");
731 /************************************************************/