5 // Created by Sarah Westcott on 6/26/12.
6 // Copyright (c) 2012 Schloss Lab. All rights reserved.
9 #include "counttable.h"
11 /************************************************************/
12 int CountTable::createTable(set<string>& n, map<string, string>& g, set<string>& gs) {
17 indexGroupMap.clear();
20 for (set<string>::iterator it = gs.begin(); it != gs.end(); it++) { groups.push_back(*it); hasGroups = true; }
21 numGroups = groups.size();
22 totalGroups.resize(numGroups, 0);
24 //sort groups to keep consistent with how we store the groups in groupmap
25 sort(groups.begin(), groups.end());
26 for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; }
27 m->setAllGroups(groups);
31 for (set<string>::iterator it = n.begin(); it != n.end(); it++) {
33 if (m->control_pressed) { break; }
37 vector<int> groupCounts; groupCounts.resize(numGroups, 0);
38 map<string, string>::iterator itGroup = g.find(seqName);
40 if (itGroup != g.end()) {
41 groupCounts[indexGroupMap[itGroup->second]] = 1;
42 totalGroups[indexGroupMap[itGroup->second]]++;
43 }else { m->mothurOut("[ERROR]: Your group file does not contain " + seqName + ". Please correct."); m->mothurOutEndLine(); }
45 map<string, int>::iterator it2 = indexNameMap.find(seqName);
46 if (it2 == indexNameMap.end()) {
47 if (hasGroups) { counts.push_back(groupCounts); }
48 indexNameMap[seqName] = uniques;
56 for (int i = 0; i < totalGroups.size(); i++) {
57 if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
64 m->errorOut(e, "CountTable", "createTable");
68 /************************************************************/
69 bool CountTable::testGroups(string file) {
71 m = MothurOut::getInstance(); hasGroups = false; total = 0;
73 m->openInputFile(file, in);
75 string headers = m->getline(in); m->gobble(in);
76 vector<string> columnHeaders = m->splitWhiteSpace(headers);
77 if (columnHeaders.size() > 2) { hasGroups = true; }
81 m->errorOut(e, "CountTable", "readTable");
85 /************************************************************/
86 int CountTable::createTable(string namefile, string groupfile, bool createGroup) {
89 if (namefile == "") { m->mothurOut("[ERROR]: namefile cannot be blank when creating a count table.\n"); m->control_pressed = true; }
95 indexGroupMap.clear();
98 map<int, string> originalGroupIndexes;
100 if (groupfile != "") {
102 groupMap = new GroupMap(groupfile); groupMap->readMap();
103 numGroups = groupMap->getNumGroups();
104 groups = groupMap->getNamesOfGroups();
105 totalGroups.resize(numGroups, 0);
106 }else if(createGroup) {
109 groups.push_back("Group1");
110 totalGroups.resize(numGroups, 0);
112 //sort groups to keep consistent with how we store the groups in groupmap
113 sort(groups.begin(), groups.end());
114 for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; }
115 m->setAllGroups(groups);
125 m->openInputFile(namefile, in);
129 if (m->control_pressed) { break; }
131 string firstCol, secondCol;
132 in >> firstCol; m->gobble(in); in >> secondCol; m->gobble(in);
134 vector<string> names;
135 m->splitAtChar(secondCol, names, ',');
137 map<string, int> groupCounts;
139 if (groupfile != "") {
141 for (int i = 0; i < groups.size(); i++) { groupCounts[groups[i]] = 0; }
143 //get counts for each of the users groups
144 for (int i = 0; i < names.size(); i++) {
145 string group = groupMap->getGroup(names[i]);
147 if (group == "not found") { m->mothurOut("[ERROR]: " + names[i] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); error=true; }
149 map<string, int>::iterator it = groupCounts.find(group);
151 //if not found, then this sequence is not from a group we care about
152 if (it != groupCounts.end()) {
158 }else if (createGroup) {
159 groupCounts["Group1"]=0;
160 for (int i = 0; i < names.size(); i++) {
161 string group = "Group1";
162 groupCounts["Group1"]++; thisTotal++;
164 }else { thisTotal = names.size(); }
166 //if group info, then read it
167 vector<int> thisGroupsCount; thisGroupsCount.resize(numGroups, 0);
168 for (int i = 0; i < numGroups; i++) {
169 thisGroupsCount[i] = groupCounts[groups[i]];
170 totalGroups[i] += thisGroupsCount[i];
173 map<string, int>::iterator it = indexNameMap.find(firstCol);
174 if (it == indexNameMap.end()) {
175 if (hasGroups) { counts.push_back(thisGroupsCount); }
176 indexNameMap[firstCol] = uniques;
177 totals.push_back(thisTotal);
182 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + firstCol + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();
187 if (error) { m->control_pressed = true; }
188 else { //check for zero groups
190 for (int i = 0; i < totalGroups.size(); i++) {
191 if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
195 if (groupfile != "") { delete groupMap; }
199 catch(exception& e) {
200 m->errorOut(e, "CountTable", "createTable");
204 /************************************************************/
205 int CountTable::readTable(string file) {
209 m->openInputFile(filename, in);
211 string headers = m->getline(in); m->gobble(in);
212 vector<string> columnHeaders = m->splitWhiteSpace(headers);
217 indexGroupMap.clear();
218 indexNameMap.clear();
220 map<int, string> originalGroupIndexes;
221 if (columnHeaders.size() > 2) { hasGroups = true; numGroups = columnHeaders.size() - 2; }
222 for (int i = 2; i < columnHeaders.size(); i++) { groups.push_back(columnHeaders[i]); originalGroupIndexes[i-2] = columnHeaders[i]; totalGroups.push_back(0); }
223 //sort groups to keep consistent with how we store the groups in groupmap
224 sort(groups.begin(), groups.end());
225 for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; }
226 m->setAllGroups(groups);
235 if (m->control_pressed) { break; }
237 in >> name; m->gobble(in); in >> thisTotal; m->gobble(in);
238 if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + toString(thisTotal) + "\n"); }
240 //if group info, then read it
241 vector<int> groupCounts; groupCounts.resize(numGroups, 0);
242 for (int i = 0; i < numGroups; i++) { int thisIndex = indexGroupMap[originalGroupIndexes[i]]; in >> groupCounts[thisIndex]; m->gobble(in); totalGroups[thisIndex] += groupCounts[thisIndex]; }
244 map<string, int>::iterator it = indexNameMap.find(name);
245 if (it == indexNameMap.end()) {
246 if (hasGroups) { counts.push_back(groupCounts); }
247 indexNameMap[name] = uniques;
248 totals.push_back(thisTotal);
253 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + name + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();
258 if (error) { m->control_pressed = true; }
259 else { //check for zero groups
261 for (int i = 0; i < totalGroups.size(); i++) {
262 if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
269 catch(exception& e) {
270 m->errorOut(e, "CountTable", "readTable");
274 /************************************************************/
275 int CountTable::printTable(string file) {
278 m->openOutputFile(file, out);
279 out << "Representative_Sequence\ttotal\t";
280 for (int i = 0; i < groups.size(); i++) { out << groups[i] << '\t'; }
283 for (map<string, int>::iterator itNames = indexNameMap.begin(); itNames != indexNameMap.end(); itNames++) {
284 out << itNames->first << '\t' << totals[itNames->second] << '\t';
287 for (int i = 0; i < groups.size(); i++) {
288 out << counts[itNames->second][i] << '\t';
296 catch(exception& e) {
297 m->errorOut(e, "CountTable", "printTable");
301 /************************************************************/
302 int CountTable::printHeaders(ofstream& out) {
304 out << "Representative_Sequence\ttotal\t";
305 for (int i = 0; i < groups.size(); i++) { out << groups[i] << '\t'; }
309 catch(exception& e) {
310 m->errorOut(e, "CountTable", "printHeaders");
314 /************************************************************/
315 int CountTable::printSeq(ofstream& out, string seqName) {
317 map<string, int>::iterator it = indexNameMap.find(seqName);
318 if (it == indexNameMap.end()) {
319 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
321 out << it->first << '\t' << totals[it->second] << '\t';
323 for (int i = 0; i < groups.size(); i++) {
324 out << counts[it->second][i] << '\t';
331 catch(exception& e) {
332 m->errorOut(e, "CountTable", "printSeq");
336 /************************************************************/
337 //group counts for a seq
338 vector<int> CountTable::getGroupCounts(string seqName) {
342 map<string, int>::iterator it = indexNameMap.find(seqName);
343 if (it == indexNameMap.end()) {
344 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
346 temp = counts[it->second];
348 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
352 catch(exception& e) {
353 m->errorOut(e, "CountTable", "getGroupCounts");
357 /************************************************************/
358 //total number of sequences for the group
359 int CountTable::getGroupCount(string groupName) {
362 map<string, int>::iterator it = indexGroupMap.find(groupName);
363 if (it == indexGroupMap.end()) {
364 m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
366 return totalGroups[it->second];
368 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
372 catch(exception& e) {
373 m->errorOut(e, "CountTable", "getGroupCount");
377 /************************************************************/
378 //total number of sequences for the seq for the group
379 int CountTable::getGroupCount(string seqName, string groupName) {
382 map<string, int>::iterator it = indexGroupMap.find(groupName);
383 if (it == indexGroupMap.end()) {
384 m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
386 map<string, int>::iterator it2 = indexNameMap.find(seqName);
387 if (it2 == indexNameMap.end()) {
388 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
390 return counts[it2->second][it->second];
393 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
397 catch(exception& e) {
398 m->errorOut(e, "CountTable", "getGroupCount");
402 /************************************************************/
403 //set the number of sequences for the seq for the group
404 int CountTable::setAbund(string seqName, string groupName, int num) {
407 map<string, int>::iterator it = indexGroupMap.find(groupName);
408 if (it == indexGroupMap.end()) {
409 m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
411 map<string, int>::iterator it2 = indexNameMap.find(seqName);
412 if (it2 == indexNameMap.end()) {
413 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
415 int oldCount = counts[it2->second][it->second];
416 counts[it2->second][it->second] = num;
417 totalGroups[it->second] += (num - oldCount);
418 total += (num - oldCount);
419 totals[it2->second] += (num - oldCount);
422 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
426 catch(exception& e) {
427 m->errorOut(e, "CountTable", "set");
431 /************************************************************/
433 int CountTable::addGroup(string groupName) {
435 bool sanity = m->inUsersGroups(groupName, groups);
436 if (sanity) { m->mothurOut("[ERROR]: " + groupName + " is already in the count table, cannot add again.\n"); m->control_pressed = true; return 0; }
438 groups.push_back(groupName);
439 if (!hasGroups) { counts.resize(uniques); }
441 for (int i = 0; i < counts.size(); i++) { counts[i].push_back(0); }
442 totalGroups.push_back(0);
443 indexGroupMap[groupName] = groups.size()-1;
444 map<string, int> originalGroupMap = indexGroupMap;
446 //important to play well with others, :)
447 sort(groups.begin(), groups.end());
449 //fix indexGroupMap && totalGroups
450 vector<int> newTotals; newTotals.resize(groups.size(), 0);
451 for (int i = 0; i < groups.size(); i++) {
452 indexGroupMap[groups[i]] = i;
453 //find original spot of group[i]
454 int index = originalGroupMap[groups[i]];
455 newTotals[i] = totalGroups[index];
457 totalGroups = newTotals;
460 for (int i = 0; i < counts.size(); i++) {
461 vector<int> newCounts; newCounts.resize(groups.size(), 0);
462 for (int j = 0; j < groups.size(); j++) {
463 //find original spot of group[i]
464 int index = originalGroupMap[groups[j]];
465 newCounts[j] = counts[i][index];
467 counts[i] = newCounts;
470 m->setAllGroups(groups);
474 catch(exception& e) {
475 m->errorOut(e, "CountTable", "addGroup");
479 /************************************************************/
481 int CountTable::removeGroup(string groupName) {
484 //save for later in case removing a group means we need to remove a seq.
485 map<int, string> reverse;
486 for (map<string, int>::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first; }
488 map<string, int>::iterator it = indexGroupMap.find(groupName);
489 if (it == indexGroupMap.end()) {
490 m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
492 int indexOfGroupToRemove = it->second;
493 map<string, int> currentGroupIndex = indexGroupMap;
494 vector<string> newGroups;
495 for (int i = 0; i < groups.size(); i++) {
496 if (groups[i] != groupName) {
497 newGroups.push_back(groups[i]);
498 indexGroupMap[groups[i]] = newGroups.size()-1;
501 indexGroupMap.erase(groupName);
503 totalGroups.erase(totalGroups.begin()+indexOfGroupToRemove);
506 map<string, int> newIndexNameMap;
507 for (int i = 0; i < counts.size(); i++) {
508 int num = counts[i][indexOfGroupToRemove];
509 counts[i].erase(counts[i].begin()+indexOfGroupToRemove);
512 if (totals[i] == 0) { //your sequences are only from the group we want to remove, then remove you.
513 counts.erase(counts.begin()+i);
514 totals.erase(totals.begin()+i);
518 newIndexNameMap[reverse[thisIndex]] = i;
521 indexNameMap = newIndexNameMap;
523 if (groups.size() == 0) { hasGroups = false; }
525 }else { m->mothurOut("[ERROR]: your count table does not contain group information, can not remove group " + groupName + ".\n"); m->control_pressed = true; }
529 catch(exception& e) {
530 m->errorOut(e, "CountTable", "removeGroup");
534 /************************************************************/
535 //vector of groups for the seq
536 vector<string> CountTable::getGroups(string seqName) {
538 vector<string> thisGroups;
540 vector<int> thisCounts = getGroupCounts(seqName);
541 for (int i = 0; i < thisCounts.size(); i++) {
542 if (thisCounts[i] != 0) { thisGroups.push_back(groups[i]); }
544 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
548 catch(exception& e) {
549 m->errorOut(e, "CountTable", "getGroups");
553 /************************************************************/
554 //total number of seqs represented by seq
555 int CountTable::renameSeq(string oldSeqName, string newSeqName) {
558 map<string, int>::iterator it = indexNameMap.find(oldSeqName);
559 if (it == indexNameMap.end()) {
560 m->mothurOut("[ERROR]: " + oldSeqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
562 int index = it->second;
563 indexNameMap.erase(it);
564 indexNameMap[newSeqName] = index;
569 catch(exception& e) {
570 m->errorOut(e, "CountTable", "renameSeq");
575 /************************************************************/
576 //total number of seqs represented by seq
577 int CountTable::getNumSeqs(string seqName) {
580 map<string, int>::iterator it = indexNameMap.find(seqName);
581 if (it == indexNameMap.end()) {
582 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
584 return totals[it->second];
589 catch(exception& e) {
590 m->errorOut(e, "CountTable", "getNumSeqs");
594 /************************************************************/
595 //returns unique index for sequence like get in NameAssignment
596 int CountTable::get(string seqName) {
599 map<string, int>::iterator it = indexNameMap.find(seqName);
600 if (it == indexNameMap.end()) {
601 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
602 }else { return it->second; }
606 catch(exception& e) {
607 m->errorOut(e, "CountTable", "get");
611 /************************************************************/
612 //add seqeunce without group info
613 int CountTable::push_back(string seqName) {
615 map<string, int>::iterator it = indexNameMap.find(seqName);
616 if (it == indexNameMap.end()) {
617 if (hasGroups) { m->mothurOut("[ERROR]: Your count table has groups and I have no group information for " + seqName + "."); m->mothurOutEndLine(); m->control_pressed = true; }
618 indexNameMap[seqName] = uniques;
623 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); m->control_pressed = true;
628 catch(exception& e) {
629 m->errorOut(e, "CountTable", "push_back");
633 /************************************************************/
635 int CountTable::remove(string seqName) {
637 map<string, int>::iterator it = indexNameMap.find(seqName);
638 if (it != indexNameMap.end()) {
640 if (hasGroups){ //remove this sequences counts from group totals
641 for (int i = 0; i < totalGroups.size(); i++) { totalGroups[i] -= counts[it->second][i]; counts[it->second][i] = 0; }
643 int thisTotal = totals[it->second]; totals[it->second] = 0;
645 indexNameMap.erase(it);
647 m->mothurOut("[ERROR]: Your count table contains does not include " + seqName + ", cannot remove."); m->mothurOutEndLine(); m->control_pressed = true;
652 catch(exception& e) {
653 m->errorOut(e, "CountTable", "push_back");
657 /************************************************************/
658 //add seqeunce without group info
659 int CountTable::push_back(string seqName, int thisTotal) {
661 map<string, int>::iterator it = indexNameMap.find(seqName);
662 if (it == indexNameMap.end()) {
663 if (hasGroups) { m->mothurOut("[ERROR]: Your count table has groups and I have no group information for " + seqName + "."); m->mothurOutEndLine(); m->control_pressed = true; }
664 indexNameMap[seqName] = uniques;
665 totals.push_back(thisTotal);
669 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); m->control_pressed = true;
674 catch(exception& e) {
675 m->errorOut(e, "CountTable", "push_back");
679 /************************************************************/
680 //add sequence with group info
681 int CountTable::push_back(string seqName, vector<int> groupCounts) {
683 map<string, int>::iterator it = indexNameMap.find(seqName);
684 if (it == indexNameMap.end()) {
685 if ((hasGroups) && (groupCounts.size() != getNumGroups())) { m->mothurOut("[ERROR]: Your count table has a " + toString(getNumGroups()) + " groups and " + seqName + " has " + toString(groupCounts.size()) + ", please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
687 for (int i = 0; i < getNumGroups(); i++) { totalGroups[i] += groupCounts[i]; thisTotal += groupCounts[i]; }
688 if (hasGroups) { counts.push_back(groupCounts); }
689 indexNameMap[seqName] = uniques;
690 totals.push_back(thisTotal);
694 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); m->control_pressed = true;
699 catch(exception& e) {
700 m->errorOut(e, "CountTable", "push_back");
705 /************************************************************/
706 //create ListVector from uniques
707 ListVector CountTable::getListVector() {
709 ListVector list(indexNameMap.size());
710 for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
711 if (m->control_pressed) { break; }
712 list.set(it->second, it->first);
716 catch(exception& e) {
717 m->errorOut(e, "CountTable", "getListVector");
722 /************************************************************/
723 //returns the names of all unique sequences in file
724 vector<string> CountTable::getNamesOfSeqs() {
726 vector<string> names;
727 for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
728 names.push_back(it->first);
733 catch(exception& e) {
734 m->errorOut(e, "CountTable", "getNamesOfSeqs");
738 /************************************************************/
739 //returns the names of all unique sequences in file mapped to their seqCounts
740 map<string, int> CountTable::getNameMap() {
742 map<string, int> names;
743 for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
744 names[it->first] = totals[it->second];
749 catch(exception& e) {
750 m->errorOut(e, "CountTable", "getNameMap");
754 /************************************************************/
755 //returns the names of all unique sequences in file
756 vector<string> CountTable::getNamesOfSeqs(string group) {
758 vector<string> names;
760 map<string, int>::iterator it = indexGroupMap.find(group);
761 if (it == indexGroupMap.end()) {
762 m->mothurOut("[ERROR]: " + group + " is not in your count table. Please correct.\n"); m->control_pressed = true;
764 for (map<string, int>::iterator it2 = indexNameMap.begin(); it2 != indexNameMap.end(); it2++) {
765 if (counts[it2->second][it->second] != 0) { names.push_back(it2->first); }
768 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
772 catch(exception& e) {
773 m->errorOut(e, "CountTable", "getNamesOfSeqs");
777 /************************************************************/
778 //merges counts of seq1 and seq2, saving in seq1
779 int CountTable::mergeCounts(string seq1, string seq2) {
781 map<string, int>::iterator it = indexNameMap.find(seq1);
782 if (it == indexNameMap.end()) {
783 m->mothurOut("[ERROR]: " + seq1 + " is not in your count table. Please correct.\n"); m->control_pressed = true;
785 map<string, int>::iterator it2 = indexNameMap.find(seq2);
786 if (it2 == indexNameMap.end()) {
787 m->mothurOut("[ERROR]: " + seq2 + " is not in your count table. Please correct.\n"); m->control_pressed = true;
790 for (int i = 0; i < groups.size(); i++) { counts[it->second][i] += counts[it2->second][i]; }
791 totals[it->second] += totals[it2->second];
793 indexNameMap.erase(it2);
798 catch(exception& e) {
799 m->errorOut(e, "CountTable", "getNamesOfSeqs");
803 /************************************************************/
804 int CountTable::copy(CountTable* ct) {
806 vector<string> thisGroups = ct->getNamesOfGroups();
807 for (int i = 0; i < thisGroups.size(); i++) { addGroup(thisGroups[i]); }
808 vector<string> names = ct->getNamesOfSeqs();
810 for (int i = 0; i < names.size(); i++) {
811 vector<int> thisCounts = ct->getGroupCounts(names[i]);
812 push_back(names[i], thisCounts);
817 catch(exception& e) {
818 m->errorOut(e, "CountTable", "copy");
823 /************************************************************/