5 // Created by Sarah Westcott on 6/26/12.
6 // Copyright (c) 2012 Schloss Lab. All rights reserved.
9 #include "counttable.h"
11 /************************************************************/
12 int CountTable::createTable(set<string>& n, map<string, string>& g, set<string>& gs) {
17 indexGroupMap.clear();
20 for (set<string>::iterator it = gs.begin(); it != gs.end(); it++) { groups.push_back(*it); hasGroups = true; }
21 numGroups = groups.size();
22 totalGroups.resize(numGroups, 0);
24 //sort groups to keep consistent with how we store the groups in groupmap
25 sort(groups.begin(), groups.end());
26 for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; }
27 m->setAllGroups(groups);
31 for (set<string>::iterator it = n.begin(); it != n.end(); it++) {
33 if (m->control_pressed) { break; }
37 vector<int> groupCounts; groupCounts.resize(numGroups, 0);
38 map<string, string>::iterator itGroup = g.find(seqName);
40 if (itGroup != g.end()) {
41 groupCounts[indexGroupMap[itGroup->second]] = 1;
42 totalGroups[indexGroupMap[itGroup->second]]++;
43 }else { m->mothurOut("[ERROR]: Your group file does not contain " + seqName + ". Please correct."); m->mothurOutEndLine(); }
45 map<string, int>::iterator it2 = indexNameMap.find(seqName);
46 if (it2 == indexNameMap.end()) {
47 if (hasGroups) { counts.push_back(groupCounts); }
48 indexNameMap[seqName] = uniques;
58 m->errorOut(e, "CountTable", "createTable");
62 /************************************************************/
63 bool CountTable::testGroups(string file) {
65 m = MothurOut::getInstance(); hasGroups = false; total = 0;
67 m->openInputFile(file, in);
69 string headers = m->getline(in); m->gobble(in);
70 vector<string> columnHeaders = m->splitWhiteSpace(headers);
71 if (columnHeaders.size() > 2) { hasGroups = true; }
75 m->errorOut(e, "CountTable", "readTable");
79 /************************************************************/
80 int CountTable::createTable(string namefile, string groupfile, bool createGroup) {
83 if (namefile == "") { m->mothurOut("[ERROR]: namefile cannot be blank when creating a count table.\n"); m->control_pressed = true; }
89 indexGroupMap.clear();
92 map<int, string> originalGroupIndexes;
94 if (groupfile != "") {
96 groupMap = new GroupMap(groupfile); groupMap->readMap();
97 numGroups = groupMap->getNumGroups();
98 groups = groupMap->getNamesOfGroups();
99 totalGroups.resize(numGroups, 0);
100 }else if(createGroup) {
103 groups.push_back("Group1");
104 totalGroups.resize(numGroups, 0);
106 //sort groups to keep consistent with how we store the groups in groupmap
107 sort(groups.begin(), groups.end());
108 for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; }
109 m->setAllGroups(groups);
119 m->openInputFile(namefile, in);
123 if (m->control_pressed) { break; }
125 string firstCol, secondCol;
126 in >> firstCol; m->gobble(in); in >> secondCol; m->gobble(in);
128 vector<string> names;
129 m->splitAtChar(secondCol, names, ',');
131 map<string, int> groupCounts;
133 if (groupfile != "") {
135 for (int i = 0; i < groups.size(); i++) { groupCounts[groups[i]] = 0; }
137 //get counts for each of the users groups
138 for (int i = 0; i < names.size(); i++) {
139 string group = groupMap->getGroup(names[i]);
141 if (group == "not found") { m->mothurOut("[ERROR]: " + names[i] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); error=true; }
143 map<string, int>::iterator it = groupCounts.find(group);
145 //if not found, then this sequence is not from a group we care about
146 if (it != groupCounts.end()) {
152 }else if (createGroup) {
153 groupCounts["Group1"]=0;
154 for (int i = 0; i < names.size(); i++) {
155 string group = "Group1";
156 groupCounts["Group1"]++; thisTotal++;
158 }else { thisTotal = names.size(); }
160 //if group info, then read it
161 vector<int> thisGroupsCount; thisGroupsCount.resize(numGroups, 0);
162 for (int i = 0; i < numGroups; i++) {
163 thisGroupsCount[i] = groupCounts[groups[i]];
164 totalGroups[i] += thisGroupsCount[i];
167 map<string, int>::iterator it = indexNameMap.find(firstCol);
168 if (it == indexNameMap.end()) {
169 if (hasGroups) { counts.push_back(thisGroupsCount); }
170 indexNameMap[firstCol] = uniques;
171 totals.push_back(thisTotal);
176 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + firstCol + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();
181 if (error) { m->control_pressed = true; }
182 if (groupfile != "") { delete groupMap; }
186 catch(exception& e) {
187 m->errorOut(e, "CountTable", "createTable");
191 /************************************************************/
192 int CountTable::readTable(string file) {
196 m->openInputFile(filename, in);
198 string headers = m->getline(in); m->gobble(in);
199 vector<string> columnHeaders = m->splitWhiteSpace(headers);
204 indexGroupMap.clear();
205 indexNameMap.clear();
207 map<int, string> originalGroupIndexes;
208 if (columnHeaders.size() > 2) { hasGroups = true; numGroups = columnHeaders.size() - 2; }
209 for (int i = 2; i < columnHeaders.size(); i++) { groups.push_back(columnHeaders[i]); originalGroupIndexes[i-2] = columnHeaders[i]; totalGroups.push_back(0); }
210 //sort groups to keep consistent with how we store the groups in groupmap
211 sort(groups.begin(), groups.end());
212 for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; }
213 m->setAllGroups(groups);
222 if (m->control_pressed) { break; }
224 in >> name; m->gobble(in); in >> thisTotal; m->gobble(in);
225 if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + toString(thisTotal) + "\n"); }
227 //if group info, then read it
228 vector<int> groupCounts; groupCounts.resize(numGroups, 0);
229 for (int i = 0; i < numGroups; i++) { int thisIndex = indexGroupMap[originalGroupIndexes[i]]; in >> groupCounts[thisIndex]; m->gobble(in); totalGroups[thisIndex] += groupCounts[thisIndex]; }
231 map<string, int>::iterator it = indexNameMap.find(name);
232 if (it == indexNameMap.end()) {
233 if (hasGroups) { counts.push_back(groupCounts); }
234 indexNameMap[name] = uniques;
235 totals.push_back(thisTotal);
240 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + name + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();
245 if (error) { m->control_pressed = true; }
249 catch(exception& e) {
250 m->errorOut(e, "CountTable", "readTable");
254 /************************************************************/
255 int CountTable::printTable(string file) {
258 m->openOutputFile(file, out);
259 out << "Representative_Sequence\ttotal\t";
260 for (int i = 0; i < groups.size(); i++) { out << groups[i] << '\t'; }
263 for (map<string, int>::iterator itNames = indexNameMap.begin(); itNames != indexNameMap.end(); itNames++) {
264 out << itNames->first << '\t' << totals[itNames->second] << '\t';
267 for (int i = 0; i < groups.size(); i++) {
268 out << counts[itNames->second][i] << '\t';
276 catch(exception& e) {
277 m->errorOut(e, "CountTable", "printTable");
281 /************************************************************/
282 int CountTable::printHeaders(ofstream& out) {
284 out << "Representative_Sequence\ttotal\t";
285 for (int i = 0; i < groups.size(); i++) { out << groups[i] << '\t'; }
289 catch(exception& e) {
290 m->errorOut(e, "CountTable", "printHeaders");
294 /************************************************************/
295 int CountTable::printSeq(ofstream& out, string seqName) {
297 map<string, int>::iterator it = indexNameMap.find(seqName);
298 if (it == indexNameMap.end()) {
299 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
301 out << it->first << '\t' << totals[it->second] << '\t';
303 for (int i = 0; i < groups.size(); i++) {
304 out << counts[it->second][i] << '\t';
311 catch(exception& e) {
312 m->errorOut(e, "CountTable", "printSeq");
316 /************************************************************/
317 //group counts for a seq
318 vector<int> CountTable::getGroupCounts(string seqName) {
322 map<string, int>::iterator it = indexNameMap.find(seqName);
323 if (it == indexNameMap.end()) {
324 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
326 temp = counts[it->second];
328 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
332 catch(exception& e) {
333 m->errorOut(e, "CountTable", "getGroupCounts");
337 /************************************************************/
338 //total number of sequences for the group
339 int CountTable::getGroupCount(string groupName) {
342 map<string, int>::iterator it = indexGroupMap.find(groupName);
343 if (it == indexGroupMap.end()) {
344 m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
346 return totalGroups[it->second];
348 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
352 catch(exception& e) {
353 m->errorOut(e, "CountTable", "getGroupCount");
357 /************************************************************/
358 //total number of sequences for the seq for the group
359 int CountTable::getGroupCount(string seqName, string groupName) {
362 map<string, int>::iterator it = indexGroupMap.find(groupName);
363 if (it == indexGroupMap.end()) {
364 m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
366 map<string, int>::iterator it2 = indexNameMap.find(seqName);
367 if (it2 == indexNameMap.end()) {
368 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
370 return counts[it2->second][it->second];
373 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
377 catch(exception& e) {
378 m->errorOut(e, "CountTable", "getGroupCount");
382 /************************************************************/
383 //set the number of sequences for the seq for the group
384 int CountTable::setAbund(string seqName, string groupName, int num) {
387 map<string, int>::iterator it = indexGroupMap.find(groupName);
388 if (it == indexGroupMap.end()) {
389 m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
391 map<string, int>::iterator it2 = indexNameMap.find(seqName);
392 if (it2 == indexNameMap.end()) {
393 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
395 int oldCount = counts[it2->second][it->second];
396 counts[it2->second][it->second] = num;
397 totalGroups[it->second] += (num - oldCount);
398 total += (num - oldCount);
399 totals[it2->second] += (num - oldCount);
402 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
406 catch(exception& e) {
407 m->errorOut(e, "CountTable", "set");
411 /************************************************************/
413 int CountTable::addGroup(string groupName) {
415 bool sanity = m->inUsersGroups(groupName, groups);
416 if (sanity) { m->mothurOut("[ERROR]: " + groupName + " is already in the count table, cannot add again.\n"); m->control_pressed = true; return 0; }
418 groups.push_back(groupName);
419 if (!hasGroups) { counts.resize(uniques); }
421 for (int i = 0; i < counts.size(); i++) { counts[i].push_back(0); }
422 totalGroups.push_back(0);
423 indexGroupMap[groupName] = groups.size()-1;
424 map<string, int> originalGroupMap = indexGroupMap;
426 //important to play well with others, :)
427 sort(groups.begin(), groups.end());
429 //fix indexGroupMap && totalGroups
430 vector<int> newTotals; newTotals.resize(groups.size(), 0);
431 for (int i = 0; i < groups.size(); i++) {
432 indexGroupMap[groups[i]] = i;
433 //find original spot of group[i]
434 int index = originalGroupMap[groups[i]];
435 newTotals[i] = totalGroups[index];
437 totalGroups = newTotals;
440 for (int i = 0; i < counts.size(); i++) {
441 vector<int> newCounts; newCounts.resize(groups.size(), 0);
442 for (int j = 0; j < groups.size(); j++) {
443 //find original spot of group[i]
444 int index = originalGroupMap[groups[j]];
445 newCounts[j] = counts[i][index];
447 counts[i] = newCounts;
450 m->setAllGroups(groups);
454 catch(exception& e) {
455 m->errorOut(e, "CountTable", "addGroup");
459 /************************************************************/
460 //vector of groups for the seq
461 vector<string> CountTable::getGroups(string seqName) {
463 vector<string> thisGroups;
465 vector<int> thisCounts = getGroupCounts(seqName);
466 for (int i = 0; i < thisCounts.size(); i++) {
467 if (thisCounts[i] != 0) { thisGroups.push_back(groups[i]); }
469 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
473 catch(exception& e) {
474 m->errorOut(e, "CountTable", "getGroups");
478 /************************************************************/
479 //total number of seqs represented by seq
480 int CountTable::renameSeq(string oldSeqName, string newSeqName) {
483 map<string, int>::iterator it = indexNameMap.find(oldSeqName);
484 if (it == indexNameMap.end()) {
485 m->mothurOut("[ERROR]: " + oldSeqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
487 int index = it->second;
488 indexNameMap.erase(it);
489 indexNameMap[newSeqName] = index;
494 catch(exception& e) {
495 m->errorOut(e, "CountTable", "renameSeq");
500 /************************************************************/
501 //total number of seqs represented by seq
502 int CountTable::getNumSeqs(string seqName) {
505 map<string, int>::iterator it = indexNameMap.find(seqName);
506 if (it == indexNameMap.end()) {
507 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
509 return totals[it->second];
514 catch(exception& e) {
515 m->errorOut(e, "CountTable", "getNumSeqs");
519 /************************************************************/
520 //returns unique index for sequence like get in NameAssignment
521 int CountTable::get(string seqName) {
524 map<string, int>::iterator it = indexNameMap.find(seqName);
525 if (it == indexNameMap.end()) {
526 m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
527 }else { return it->second; }
531 catch(exception& e) {
532 m->errorOut(e, "CountTable", "get");
536 /************************************************************/
537 //add seqeunce without group info
538 int CountTable::push_back(string seqName) {
540 map<string, int>::iterator it = indexNameMap.find(seqName);
541 if (it == indexNameMap.end()) {
542 if (hasGroups) { m->mothurOut("[ERROR]: Your count table has groups and I have no group information for " + seqName + "."); m->mothurOutEndLine(); m->control_pressed = true; }
543 indexNameMap[seqName] = uniques;
548 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); m->control_pressed = true;
553 catch(exception& e) {
554 m->errorOut(e, "CountTable", "push_back");
558 /************************************************************/
560 int CountTable::remove(string seqName) {
562 map<string, int>::iterator it = indexNameMap.find(seqName);
563 if (it == indexNameMap.end()) {
565 if (hasGroups){ //remove this sequences counts from group totals
566 for (int i = 0; i < totalGroups.size(); i++) { totalGroups[i] -= counts[it->second][i]; counts[it->second][i] = 0; }
568 int thisTotal = totals[it->second]; totals[it->second] = 0;
570 indexNameMap.erase(it);
572 m->mothurOut("[ERROR]: Your count table contains does not include " + seqName + ", cannot remove."); m->mothurOutEndLine(); m->control_pressed = true;
577 catch(exception& e) {
578 m->errorOut(e, "CountTable", "push_back");
582 /************************************************************/
583 //add seqeunce without group info
584 int CountTable::push_back(string seqName, int thisTotal) {
586 map<string, int>::iterator it = indexNameMap.find(seqName);
587 if (it == indexNameMap.end()) {
588 if (hasGroups) { m->mothurOut("[ERROR]: Your count table has groups and I have no group information for " + seqName + "."); m->mothurOutEndLine(); m->control_pressed = true; }
589 indexNameMap[seqName] = uniques;
590 totals.push_back(thisTotal);
594 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); m->control_pressed = true;
599 catch(exception& e) {
600 m->errorOut(e, "CountTable", "push_back");
604 /************************************************************/
605 //add sequence with group info
606 int CountTable::push_back(string seqName, vector<int> groupCounts) {
608 map<string, int>::iterator it = indexNameMap.find(seqName);
609 if (it == indexNameMap.end()) {
610 if ((hasGroups) && (groupCounts.size() != getNumGroups())) { m->mothurOut("[ERROR]: Your count table has a " + toString(getNumGroups()) + " groups and " + seqName + " has " + toString(groupCounts.size()) + ", please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
612 for (int i = 0; i < getNumGroups(); i++) { totalGroups[i] += groupCounts[i]; thisTotal += groupCounts[i]; }
613 if (hasGroups) { counts.push_back(groupCounts); }
614 indexNameMap[seqName] = uniques;
615 totals.push_back(thisTotal);
619 m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); m->control_pressed = true;
624 catch(exception& e) {
625 m->errorOut(e, "CountTable", "push_back");
630 /************************************************************/
631 //create ListVector from uniques
632 ListVector CountTable::getListVector() {
634 ListVector list(indexNameMap.size());
635 for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
636 if (m->control_pressed) { break; }
637 list.set(it->second, it->first);
641 catch(exception& e) {
642 m->errorOut(e, "CountTable", "getListVector");
647 /************************************************************/
648 //returns the names of all unique sequences in file
649 vector<string> CountTable::getNamesOfSeqs() {
651 vector<string> names;
652 for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
653 names.push_back(it->first);
658 catch(exception& e) {
659 m->errorOut(e, "CountTable", "getNamesOfSeqs");
663 /************************************************************/
664 //returns the names of all unique sequences in file mapped to their seqCounts
665 map<string, int> CountTable::getNameMap() {
667 map<string, int> names;
668 for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
669 names[it->first] = totals[it->second];
674 catch(exception& e) {
675 m->errorOut(e, "CountTable", "getNameMap");
679 /************************************************************/
680 //returns the names of all unique sequences in file
681 vector<string> CountTable::getNamesOfSeqs(string group) {
683 vector<string> names;
685 map<string, int>::iterator it = indexGroupMap.find(group);
686 if (it == indexGroupMap.end()) {
687 m->mothurOut("[ERROR]: " + group + " is not in your count table. Please correct.\n"); m->control_pressed = true;
689 for (map<string, int>::iterator it2 = indexNameMap.begin(); it2 != indexNameMap.end(); it2++) {
690 if (counts[it2->second][it->second] != 0) { names.push_back(it2->first); }
693 }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
697 catch(exception& e) {
698 m->errorOut(e, "CountTable", "getNamesOfSeqs");
702 /************************************************************/
703 //merges counts of seq1 and seq2, saving in seq1
704 int CountTable::mergeCounts(string seq1, string seq2) {
706 map<string, int>::iterator it = indexNameMap.find(seq1);
707 if (it == indexNameMap.end()) {
708 m->mothurOut("[ERROR]: " + seq1 + " is not in your count table. Please correct.\n"); m->control_pressed = true;
710 map<string, int>::iterator it2 = indexNameMap.find(seq2);
711 if (it2 == indexNameMap.end()) {
712 m->mothurOut("[ERROR]: " + seq2 + " is not in your count table. Please correct.\n"); m->control_pressed = true;
715 for (int i = 0; i < groups.size(); i++) { counts[it->second][i] += counts[it2->second][i]; }
716 totals[it->second] += totals[it2->second];
718 indexNameMap.erase(it2);
723 catch(exception& e) {
724 m->errorOut(e, "CountTable", "getNamesOfSeqs");
728 /************************************************************/
729 int CountTable::copy(CountTable* ct) {
731 vector<string> thisGroups = ct->getNamesOfGroups();
732 for (int i = 0; i < thisGroups.size(); i++) { addGroup(thisGroups[i]); }
733 vector<string> names = ct->getNamesOfSeqs();
735 for (int i = 0; i < names.size(); i++) {
736 vector<int> thisCounts = ct->getGroupCounts(names[i]);
737 push_back(names[i], thisCounts);
742 catch(exception& e) {
743 m->errorOut(e, "CountTable", "copy");
748 /************************************************************/