X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=counttable.cpp;h=d5720acd386c36cf8cd713dcbc6d1bd5e507609b;hp=7f08509897f0765823b4e2e7b65f837809821c45;hb=b206f634aae1b4ce13978d203247fb64757d5482;hpb=1f3d51cfdbe3e28c780501b7ed82d64fe27dba98 diff --git a/counttable.cpp b/counttable.cpp index 7f08509..d5720ac 100644 --- a/counttable.cpp +++ b/counttable.cpp @@ -8,9 +8,210 @@ #include "counttable.h" +/************************************************************/ +int CountTable::createTable(set& n, map& g, set& gs) { + try { + int numGroups = 0; + groups.clear(); + totalGroups.clear(); + indexGroupMap.clear(); + indexNameMap.clear(); + counts.clear(); + for (set::iterator it = gs.begin(); it != gs.end(); it++) { groups.push_back(*it); hasGroups = true; } + numGroups = groups.size(); + totalGroups.resize(numGroups, 0); + + //sort groups to keep consistent with how we store the groups in groupmap + sort(groups.begin(), groups.end()); + for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; } + m->setAllGroups(groups); + + uniques = 0; + total = 0; + for (set::iterator it = n.begin(); it != n.end(); it++) { + + if (m->control_pressed) { break; } + + string seqName = *it; + + vector groupCounts; groupCounts.resize(numGroups, 0); + map::iterator itGroup = g.find(seqName); + + if (itGroup != g.end()) { + groupCounts[indexGroupMap[itGroup->second]] = 1; + totalGroups[indexGroupMap[itGroup->second]]++; + }else { + //look for it in names of groups to see if the user accidently used the wrong file + if (m->inUsersGroups(seqName, groups)) { + m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group."); m->mothurOutEndLine(); + } + m->mothurOut("[ERROR]: Your group file does not contain " + seqName + ". Please correct."); m->mothurOutEndLine(); + } + + map::iterator it2 = indexNameMap.find(seqName); + if (it2 == indexNameMap.end()) { + if (hasGroups) { counts.push_back(groupCounts); } + indexNameMap[seqName] = uniques; + totals.push_back(1); + total++; + uniques++; + } + } + + if (hasGroups) { + for (int i = 0; i < totalGroups.size(); i++) { + if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; } + } + } + return 0; + } + catch(exception& e) { + m->errorOut(e, "CountTable", "createTable"); + exit(1); + } +} +/************************************************************/ +bool CountTable::testGroups(string file) { + try { + m = MothurOut::getInstance(); hasGroups = false; total = 0; + ifstream in; + m->openInputFile(file, in); + + string headers = m->getline(in); m->gobble(in); + vector columnHeaders = m->splitWhiteSpace(headers); + if (columnHeaders.size() > 2) { hasGroups = true; } + return hasGroups; + } + catch(exception& e) { + m->errorOut(e, "CountTable", "readTable"); + exit(1); + } +} +/************************************************************/ +int CountTable::createTable(string namefile, string groupfile, bool createGroup) { + try { + + if (namefile == "") { m->mothurOut("[ERROR]: namefile cannot be blank when creating a count table.\n"); m->control_pressed = true; } + + GroupMap* groupMap; + int numGroups = 0; + groups.clear(); + totalGroups.clear(); + indexGroupMap.clear(); + indexNameMap.clear(); + counts.clear(); + map originalGroupIndexes; + + if (groupfile != "") { + hasGroups = true; + groupMap = new GroupMap(groupfile); groupMap->readMap(); + numGroups = groupMap->getNumGroups(); + groups = groupMap->getNamesOfGroups(); + totalGroups.resize(numGroups, 0); + }else if(createGroup) { + hasGroups = true; + numGroups = 1; + groups.push_back("Group1"); + totalGroups.resize(numGroups, 0); + } + //sort groups to keep consistent with how we store the groups in groupmap + sort(groups.begin(), groups.end()); + for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; } + m->setAllGroups(groups); + + bool error = false; + string name; + uniques = 0; + total = 0; + + + //open input file + ifstream in; + m->openInputFile(namefile, in); + + int total = 0; + while (!in.eof()) { + if (m->control_pressed) { break; } + + string firstCol, secondCol; + in >> firstCol; m->gobble(in); in >> secondCol; m->gobble(in); + + m->checkName(firstCol); + m->checkName(secondCol); + + vector names; + m->splitAtChar(secondCol, names, ','); + + map groupCounts; + int thisTotal = 0; + if (groupfile != "") { + //set to 0 + for (int i = 0; i < groups.size(); i++) { groupCounts[groups[i]] = 0; } + + //get counts for each of the users groups + for (int i = 0; i < names.size(); i++) { + string group = groupMap->getGroup(names[i]); + + if (group == "not found") { m->mothurOut("[ERROR]: " + names[i] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); error=true; } + else { + map::iterator it = groupCounts.find(group); + + //if not found, then this sequence is not from a group we care about + if (it != groupCounts.end()) { + it->second++; + thisTotal++; + } + } + } + }else if (createGroup) { + groupCounts["Group1"]=0; + for (int i = 0; i < names.size(); i++) { + string group = "Group1"; + groupCounts["Group1"]++; thisTotal++; + } + }else { thisTotal = names.size(); } + + //if group info, then read it + vector thisGroupsCount; thisGroupsCount.resize(numGroups, 0); + for (int i = 0; i < numGroups; i++) { + thisGroupsCount[i] = groupCounts[groups[i]]; + totalGroups[i] += thisGroupsCount[i]; + } + + map::iterator it = indexNameMap.find(firstCol); + if (it == indexNameMap.end()) { + if (hasGroups) { counts.push_back(thisGroupsCount); } + indexNameMap[firstCol] = uniques; + totals.push_back(thisTotal); + total += thisTotal; + uniques++; + }else { + error = true; + m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + firstCol + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); + } + } + in.close(); + + if (error) { m->control_pressed = true; } + else { //check for zero groups + if (hasGroups) { + for (int i = 0; i < totalGroups.size(); i++) { + if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; } + } + } + } + if (groupfile != "") { delete groupMap; } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "CountTable", "createTable"); + exit(1); + } +} /************************************************************/ -int CountTable::readTable(string file) { +int CountTable::readTable(string file, bool readGroups, bool mothurRunning) { try { filename = file; ifstream in; @@ -26,7 +227,7 @@ int CountTable::readTable(string file) { indexNameMap.clear(); counts.clear(); map originalGroupIndexes; - if (columnHeaders.size() > 2) { hasGroups = true; numGroups = columnHeaders.size() - 2; } + if ((columnHeaders.size() > 2) && readGroups) { hasGroups = true; numGroups = columnHeaders.size() - 2; } for (int i = 2; i < columnHeaders.size(); i++) { groups.push_back(columnHeaders[i]); originalGroupIndexes[i-2] = columnHeaders[i]; totalGroups.push_back(0); } //sort groups to keep consistent with how we store the groups in groupmap sort(groups.begin(), groups.end()); @@ -42,11 +243,21 @@ int CountTable::readTable(string file) { if (m->control_pressed) { break; } - in >> name >> thisTotal; m->gobble(in); + in >> name; m->gobble(in); in >> thisTotal; m->gobble(in); + if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + toString(thisTotal) + "\n"); } + + if ((thisTotal == 0) && !mothurRunning) { error=true; m->mothurOut("[ERROR]: Your count table contains a sequence named " + name + " with a total=0. Please correct."); m->mothurOutEndLine(); + } //if group info, then read it vector groupCounts; groupCounts.resize(numGroups, 0); - for (int i = 0; i < numGroups; i++) { int thisIndex = indexGroupMap[originalGroupIndexes[i]]; in >> groupCounts[thisIndex]; m->gobble(in); totalGroups[thisIndex] += groupCounts[thisIndex]; } + if (columnHeaders.size() > 2) { //file contains groups + if (readGroups) { //user wants to save them + for (int i = 0; i < numGroups; i++) { int thisIndex = indexGroupMap[originalGroupIndexes[i]]; in >> groupCounts[thisIndex]; m->gobble(in); totalGroups[thisIndex] += groupCounts[thisIndex]; } + }else { //read and discard + m->getline(in); m->gobble(in); + } + } map::iterator it = indexNameMap.find(name); if (it == indexNameMap.end()) { @@ -63,6 +274,13 @@ int CountTable::readTable(string file) { in.close(); if (error) { m->control_pressed = true; } + else { //check for zero groups + if (hasGroups) { + for (int i = 0; i < totalGroups.size(); i++) { + if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; } + } + } + } return 0; } @@ -72,6 +290,84 @@ int CountTable::readTable(string file) { } } /************************************************************/ +int CountTable::printTable(string file) { + try { + ofstream out; + m->openOutputFile(file, out); + out << "Representative_Sequence\ttotal\t"; + for (int i = 0; i < groups.size(); i++) { out << groups[i] << '\t'; } + out << endl; + + map reverse; //use this to preserve order + for (map::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first; } + + for (int i = 0; i < totals.size(); i++) { + map::iterator itR = reverse.find(i); + + if (itR != reverse.end()) { //will equal end if seqs were removed because remove just removes from indexNameMap + out << itR->second << '\t' << totals[i] << '\t'; + if (hasGroups) { + for (int j = 0; j < groups.size(); j++) { + out << counts[i][j] << '\t'; + } + } + out << endl; + } + } + /*for (map::iterator itNames = indexNameMap.begin(); itNames != indexNameMap.end(); itNames++) { + out << itNames->first << '\t' << totals[itNames->second] << '\t'; + if (hasGroups) { + + for (int i = 0; i < groups.size(); i++) { + out << counts[itNames->second][i] << '\t'; + } + } + out << endl; + }*/ + out.close(); + return 0; + } + catch(exception& e) { + m->errorOut(e, "CountTable", "printTable"); + exit(1); + } +} +/************************************************************/ +int CountTable::printHeaders(ofstream& out) { + try { + out << "Representative_Sequence\ttotal\t"; + for (int i = 0; i < groups.size(); i++) { out << groups[i] << '\t'; } + out << endl; + return 0; + } + catch(exception& e) { + m->errorOut(e, "CountTable", "printHeaders"); + exit(1); + } +} +/************************************************************/ +int CountTable::printSeq(ofstream& out, string seqName) { + try { + map::iterator it = indexNameMap.find(seqName); + if (it == indexNameMap.end()) { + m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true; + }else { + out << it->first << '\t' << totals[it->second] << '\t'; + if (hasGroups) { + for (int i = 0; i < groups.size(); i++) { + out << counts[it->second][i] << '\t'; + } + } + out << endl; + } + return 0; + } + catch(exception& e) { + m->errorOut(e, "CountTable", "printSeq"); + exit(1); + } +} +/************************************************************/ //group counts for a seq vector CountTable::getGroupCounts(string seqName) { try { @@ -79,6 +375,10 @@ vector CountTable::getGroupCounts(string seqName) { if (hasGroups) { map::iterator it = indexNameMap.find(seqName); if (it == indexNameMap.end()) { + //look for it in names of groups to see if the user accidently used the wrong file + if (m->inUsersGroups(seqName, groups)) { + m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group."); m->mothurOutEndLine(); + } m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true; }else { temp = counts[it->second]; @@ -99,7 +399,7 @@ int CountTable::getGroupCount(string groupName) { if (hasGroups) { map::iterator it = indexGroupMap.find(groupName); if (it == indexGroupMap.end()) { - m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true; + m->mothurOut("[ERROR]: group " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true; }else { return totalGroups[it->second]; } @@ -119,11 +419,15 @@ int CountTable::getGroupCount(string seqName, string groupName) { if (hasGroups) { map::iterator it = indexGroupMap.find(groupName); if (it == indexGroupMap.end()) { - m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true; + m->mothurOut("[ERROR]: group " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true; }else { map::iterator it2 = indexNameMap.find(seqName); if (it2 == indexNameMap.end()) { - m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true; + //look for it in names of groups to see if the user accidently used the wrong file + if (m->inUsersGroups(seqName, groups)) { + m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group."); m->mothurOutEndLine(); + } + m->mothurOut("[ERROR]: seq " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true; }else { return counts[it2->second][it->second]; } @@ -137,6 +441,189 @@ int CountTable::getGroupCount(string seqName, string groupName) { exit(1); } } +/************************************************************/ +//set the number of sequences for the seq for the group +int CountTable::setAbund(string seqName, string groupName, int num) { + try { + if (hasGroups) { + map::iterator it = indexGroupMap.find(groupName); + if (it == indexGroupMap.end()) { + m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true; + }else { + map::iterator it2 = indexNameMap.find(seqName); + if (it2 == indexNameMap.end()) { + //look for it in names of groups to see if the user accidently used the wrong file + if (m->inUsersGroups(seqName, groups)) { + m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group."); m->mothurOutEndLine(); + } + m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true; + }else { + int oldCount = counts[it2->second][it->second]; + counts[it2->second][it->second] = num; + totalGroups[it->second] += (num - oldCount); + total += (num - oldCount); + totals[it2->second] += (num - oldCount); + } + } + }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "CountTable", "set"); + exit(1); + } +} +/************************************************************/ +//add group +int CountTable::addGroup(string groupName) { + try { + bool sanity = m->inUsersGroups(groupName, groups); + if (sanity) { m->mothurOut("[ERROR]: " + groupName + " is already in the count table, cannot add again.\n"); m->control_pressed = true; return 0; } + + groups.push_back(groupName); + if (!hasGroups) { counts.resize(uniques); } + + for (int i = 0; i < counts.size(); i++) { counts[i].push_back(0); } + totalGroups.push_back(0); + indexGroupMap[groupName] = groups.size()-1; + map originalGroupMap = indexGroupMap; + + //important to play well with others, :) + sort(groups.begin(), groups.end()); + + //fix indexGroupMap && totalGroups + vector newTotals; newTotals.resize(groups.size(), 0); + for (int i = 0; i < groups.size(); i++) { + indexGroupMap[groups[i]] = i; + //find original spot of group[i] + int index = originalGroupMap[groups[i]]; + newTotals[i] = totalGroups[index]; + } + totalGroups = newTotals; + + //fix counts vectors + for (int i = 0; i < counts.size(); i++) { + vector newCounts; newCounts.resize(groups.size(), 0); + for (int j = 0; j < groups.size(); j++) { + //find original spot of group[i] + int index = originalGroupMap[groups[j]]; + newCounts[j] = counts[i][index]; + } + counts[i] = newCounts; + } + hasGroups = true; + m->setAllGroups(groups); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "CountTable", "addGroup"); + exit(1); + } +} +/************************************************************/ +//remove group +int CountTable::removeGroup(string groupName) { + try { + if (hasGroups) { + //save for later in case removing a group means we need to remove a seq. + map reverse; + for (map::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first; } + + map::iterator it = indexGroupMap.find(groupName); + if (it == indexGroupMap.end()) { + m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true; + }else { + int indexOfGroupToRemove = it->second; + map currentGroupIndex = indexGroupMap; + vector newGroups; + for (int i = 0; i < groups.size(); i++) { + if (groups[i] != groupName) { + newGroups.push_back(groups[i]); + indexGroupMap[groups[i]] = newGroups.size()-1; + } + } + indexGroupMap.erase(groupName); + groups = newGroups; + totalGroups.erase(totalGroups.begin()+indexOfGroupToRemove); + + int thisIndex = 0; + map newIndexNameMap; + for (int i = 0; i < counts.size(); i++) { + int num = counts[i][indexOfGroupToRemove]; + counts[i].erase(counts[i].begin()+indexOfGroupToRemove); + totals[i] -= num; + total -= num; + if (totals[i] == 0) { //your sequences are only from the group we want to remove, then remove you. + counts.erase(counts.begin()+i); + totals.erase(totals.begin()+i); + uniques--; + i--; + } + newIndexNameMap[reverse[thisIndex]] = i; + thisIndex++; + } + indexNameMap = newIndexNameMap; + + if (groups.size() == 0) { hasGroups = false; } + } + }else { m->mothurOut("[ERROR]: your count table does not contain group information, can not remove group " + groupName + ".\n"); m->control_pressed = true; } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "CountTable", "removeGroup"); + exit(1); + } +} +/************************************************************/ +//vector of groups for the seq +vector CountTable::getGroups(string seqName) { + try { + vector thisGroups; + if (hasGroups) { + vector thisCounts = getGroupCounts(seqName); + for (int i = 0; i < thisCounts.size(); i++) { + if (thisCounts[i] != 0) { thisGroups.push_back(groups[i]); } + } + }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; } + + return thisGroups; + } + catch(exception& e) { + m->errorOut(e, "CountTable", "getGroups"); + exit(1); + } +} +/************************************************************/ +//total number of seqs represented by seq +int CountTable::renameSeq(string oldSeqName, string newSeqName) { + try { + + map::iterator it = indexNameMap.find(oldSeqName); + if (it == indexNameMap.end()) { + if (hasGroupInfo()) { + //look for it in names of groups to see if the user accidently used the wrong file + if (m->inUsersGroups(oldSeqName, groups)) { + m->mothurOut("[WARNING]: Your group or design file contains a group named " + oldSeqName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group."); m->mothurOutEndLine(); + } + } + m->mothurOut("[ERROR]: " + oldSeqName + " is not in your count table. Please correct.\n"); m->control_pressed = true; + }else { + int index = it->second; + indexNameMap.erase(it); + indexNameMap[newSeqName] = index; + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "CountTable", "renameSeq"); + exit(1); + } +} + /************************************************************/ //total number of seqs represented by seq int CountTable::getNumSeqs(string seqName) { @@ -144,6 +631,12 @@ int CountTable::getNumSeqs(string seqName) { map::iterator it = indexNameMap.find(seqName); if (it == indexNameMap.end()) { + if (hasGroupInfo()) { + //look for it in names of groups to see if the user accidently used the wrong file + if (m->inUsersGroups(seqName, groups)) { + m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group."); m->mothurOutEndLine(); + } + } m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true; }else { return totals[it->second]; @@ -157,7 +650,147 @@ int CountTable::getNumSeqs(string seqName) { } } /************************************************************/ -//returns names of seqs +//returns unique index for sequence like get in NameAssignment +int CountTable::get(string seqName) { + try { + + map::iterator it = indexNameMap.find(seqName); + if (it == indexNameMap.end()) { + if (hasGroupInfo()) { + //look for it in names of groups to see if the user accidently used the wrong file + if (m->inUsersGroups(seqName, groups)) { + m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group."); m->mothurOutEndLine(); + } + } + m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true; + }else { return it->second; } + + return -1; + } + catch(exception& e) { + m->errorOut(e, "CountTable", "get"); + exit(1); + } +} +/************************************************************/ +//add seqeunce without group info +int CountTable::push_back(string seqName) { + try { + map::iterator it = indexNameMap.find(seqName); + if (it == indexNameMap.end()) { + if (hasGroups) { m->mothurOut("[ERROR]: Your count table has groups and I have no group information for " + seqName + "."); m->mothurOutEndLine(); m->control_pressed = true; } + indexNameMap[seqName] = uniques; + totals.push_back(1); + total++; + uniques++; + }else { + m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); m->control_pressed = true; + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "CountTable", "push_back"); + exit(1); + } +} +/************************************************************/ +//remove sequence +int CountTable::remove(string seqName) { + try { + map::iterator it = indexNameMap.find(seqName); + if (it != indexNameMap.end()) { + uniques--; + if (hasGroups){ //remove this sequences counts from group totals + for (int i = 0; i < totalGroups.size(); i++) { totalGroups[i] -= counts[it->second][i]; counts[it->second][i] = 0; } + } + int thisTotal = totals[it->second]; totals[it->second] = 0; + total -= thisTotal; + indexNameMap.erase(it); + }else { + if (hasGroupInfo()) { + //look for it in names of groups to see if the user accidently used the wrong file + if (m->inUsersGroups(seqName, groups)) { + m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group."); m->mothurOutEndLine(); + } + } + m->mothurOut("[ERROR]: Your count table contains does not include " + seqName + ", cannot remove."); m->mothurOutEndLine(); m->control_pressed = true; + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "CountTable", "push_back"); + exit(1); + } +} +/************************************************************/ +//add seqeunce without group info +int CountTable::push_back(string seqName, int thisTotal) { + try { + map::iterator it = indexNameMap.find(seqName); + if (it == indexNameMap.end()) { + if (hasGroups) { m->mothurOut("[ERROR]: Your count table has groups and I have no group information for " + seqName + "."); m->mothurOutEndLine(); m->control_pressed = true; } + indexNameMap[seqName] = uniques; + totals.push_back(thisTotal); + total+=thisTotal; + uniques++; + }else { + m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); m->control_pressed = true; + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "CountTable", "push_back"); + exit(1); + } +} +/************************************************************/ +//add sequence with group info +int CountTable::push_back(string seqName, vector groupCounts) { + try { + map::iterator it = indexNameMap.find(seqName); + if (it == indexNameMap.end()) { + if ((hasGroups) && (groupCounts.size() != getNumGroups())) { m->mothurOut("[ERROR]: Your count table has a " + toString(getNumGroups()) + " groups and " + seqName + " has " + toString(groupCounts.size()) + ", please correct."); m->mothurOutEndLine(); m->control_pressed = true; } + int thisTotal = 0; + for (int i = 0; i < getNumGroups(); i++) { totalGroups[i] += groupCounts[i]; thisTotal += groupCounts[i]; } + if (hasGroups) { counts.push_back(groupCounts); } + indexNameMap[seqName] = uniques; + totals.push_back(thisTotal); + total+= thisTotal; + uniques++; + }else { + m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); m->control_pressed = true; + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "CountTable", "push_back"); + exit(1); + } +} + +/************************************************************/ +//create ListVector from uniques +ListVector CountTable::getListVector() { + try { + ListVector list(indexNameMap.size()); + for (map::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) { + if (m->control_pressed) { break; } + list.set(it->second, it->first); + } + return list; + } + catch(exception& e) { + m->errorOut(e, "CountTable", "getListVector"); + exit(1); + } +} + +/************************************************************/ +//returns the names of all unique sequences in file vector CountTable::getNamesOfSeqs() { try { vector names; @@ -173,29 +806,75 @@ vector CountTable::getNamesOfSeqs() { } } /************************************************************/ -//returns names of seqs +//returns the names of all unique sequences in file mapped to their seqCounts +map CountTable::getNameMap() { + try { + map names; + for (map::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) { + names[it->first] = totals[it->second]; + } + + return names; + } + catch(exception& e) { + m->errorOut(e, "CountTable", "getNameMap"); + exit(1); + } +} +/************************************************************/ +//returns the names of all unique sequences in file +vector CountTable::getNamesOfSeqs(string group) { + try { + vector names; + if (hasGroups) { + map::iterator it = indexGroupMap.find(group); + if (it == indexGroupMap.end()) { + m->mothurOut("[ERROR]: " + group + " is not in your count table. Please correct.\n"); m->control_pressed = true; + }else { + for (map::iterator it2 = indexNameMap.begin(); it2 != indexNameMap.end(); it2++) { + if (counts[it2->second][it->second] != 0) { names.push_back(it2->first); } + } + } + }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; } + + return names; + } + catch(exception& e) { + m->errorOut(e, "CountTable", "getNamesOfSeqs"); + exit(1); + } +} +/************************************************************/ +//merges counts of seq1 and seq2, saving in seq1 int CountTable::mergeCounts(string seq1, string seq2) { try { map::iterator it = indexNameMap.find(seq1); if (it == indexNameMap.end()) { + if (hasGroupInfo()) { + //look for it in names of groups to see if the user accidently used the wrong file + if (m->inUsersGroups(seq1, groups)) { + m->mothurOut("[WARNING]: Your group or design file contains a group named " + seq1 + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group."); m->mothurOutEndLine(); + } + } m->mothurOut("[ERROR]: " + seq1 + " is not in your count table. Please correct.\n"); m->control_pressed = true; }else { map::iterator it2 = indexNameMap.find(seq2); if (it2 == indexNameMap.end()) { + if (hasGroupInfo()) { + //look for it in names of groups to see if the user accidently used the wrong file + if (m->inUsersGroups(seq2, groups)) { + m->mothurOut("[WARNING]: Your group or design file contains a group named " + seq2 + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group."); m->mothurOutEndLine(); + } + } m->mothurOut("[ERROR]: " + seq2 + " is not in your count table. Please correct.\n"); m->control_pressed = true; }else { //merge data - for (int i = 0; i < groups.size(); i++) { - counts[it->second][i] += counts[it2->second][i]; - counts[it2->second][i] = 0; - } + for (int i = 0; i < groups.size(); i++) { counts[it->second][i] += counts[it2->second][i]; } totals[it->second] += totals[it2->second]; - totals[it2->second] = 0; uniques--; indexNameMap.erase(it2); } } - return 0; } catch(exception& e) { @@ -203,6 +882,25 @@ int CountTable::mergeCounts(string seq1, string seq2) { exit(1); } } +/************************************************************/ +int CountTable::copy(CountTable* ct) { + try { + vector thisGroups = ct->getNamesOfGroups(); + for (int i = 0; i < thisGroups.size(); i++) { addGroup(thisGroups[i]); } + vector names = ct->getNamesOfSeqs(); + + for (int i = 0; i < names.size(); i++) { + vector thisCounts = ct->getGroupCounts(names[i]); + push_back(names[i], thisCounts); + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "CountTable", "copy"); + exit(1); + } +} /************************************************************/