2 * sharedSharedListVector.cpp
5 * Created by Sarah Westcott on 1/22/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "sabundvector.hpp"
11 #include "rabundvector.hpp"
12 #include "ordervector.hpp"
13 #include "sharedlistvector.h"
14 #include "sharedordervector.h"
15 #include "sharedutilities.h"
17 /***********************************************************************/
19 SharedListVector::SharedListVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0){ groupmap = NULL; countTable = NULL; }
21 /***********************************************************************/
23 SharedListVector::SharedListVector(int n): DataVector(), data(n, "") , maxRank(0), numBins(0), numSeqs(0){ groupmap = NULL; countTable = NULL; }
25 /***********************************************************************/
26 SharedListVector::SharedListVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) {
28 groupmap = NULL; countTable = NULL;
29 //set up groupmap for later.
30 if (m->groupMode == "group") {
31 groupmap = new GroupMap(m->getGroupFile());
34 countTable = new CountTable();
35 countTable->readTable(m->getCountTableFile(), true, false);
40 //are we at the beginning of the file??
41 if (m->saveNextLabel == "") {
44 //is this a shared file that has headers
45 if (label == "label") {
48 f >> label; m->gobble(f);
51 label = m->getline(f); m->gobble(f);
53 //parse labels to save
54 istringstream iStringStream(label);
55 m->listBinLabelsInFile.clear();
56 while(!iStringStream.eof()){
57 if (m->control_pressed) { break; }
59 iStringStream >> temp; m->gobble(iStringStream);
61 m->listBinLabelsInFile.push_back(temp);
69 //make binlabels because we don't have any
70 string snumBins = toString(hold);
71 m->listBinLabelsInFile.clear();
72 for (int i = 0; i < hold; i++) {
73 //if there is a bin label use it otherwise make one
74 string binLabel = "Otu";
75 string sbinNumber = toString(i+1);
76 if (sbinNumber.length() < snumBins.length()) {
77 int diff = snumBins.length() - sbinNumber.length();
78 for (int h = 0; h < diff; h++) { binLabel += "0"; }
80 binLabel += sbinNumber;
81 m->listBinLabelsInFile.push_back(binLabel);
84 m->saveNextLabel = label;
87 m->saveNextLabel = label;
90 binLabels.assign(m->listBinLabelsInFile.begin(), m->listBinLabelsInFile.begin()+hold);
92 data.assign(hold, "");
93 string inputData = "";
95 for(int i=0;i<hold;i++){
101 if (f.eof()) { m->saveNextLabel = ""; }
104 catch(exception& e) {
105 m->errorOut(e, "SharedListVector", "SharedListVector");
110 /***********************************************************************/
111 void SharedListVector::set(int binNumber, string seqNames){
113 int nNames_old = m->getNumNames(data[binNumber]);
114 data[binNumber] = seqNames;
115 int nNames_new = m->getNumNames(seqNames);
117 if(nNames_old == 0) { numBins++; }
118 if(nNames_new == 0) { numBins--; }
119 if(nNames_new > maxRank) { maxRank = nNames_new; }
121 numSeqs += (nNames_new - nNames_old);
125 catch(exception& e) {
126 m->errorOut(e, "SharedListVector", "set");
131 /***********************************************************************/
133 string SharedListVector::get(int index){
136 /***********************************************************************/
138 void SharedListVector::setLabels(vector<string> labels){
142 catch(exception& e) {
143 m->errorOut(e, "SharedListVector", "setLabels");
148 /***********************************************************************/
149 //could potentially end up with duplicate binlabel names with code below.
150 //we don't currently use them in a way that would do that.
151 //if you had a listfile that had been subsampled and then added to it, dup names would be possible.
152 vector<string> SharedListVector::getLabels(){
154 string tagHeader = "Otu";
155 if (m->sharedHeaderMode == "tax") { tagHeader = "PhyloType"; }
157 if (binLabels.size() < data.size()) {
158 string snumBins = toString(numBins);
160 for (int i = 0; i < numBins; i++) {
161 string binLabel = tagHeader;
163 if (i < binLabels.size()) { //label exists, check leading zeros length
164 string sbinNumber = m->getSimpleLabel(binLabels[i]);
165 if (sbinNumber.length() < snumBins.length()) {
166 int diff = snumBins.length() - sbinNumber.length();
167 for (int h = 0; h < diff; h++) { binLabel += "0"; }
169 binLabel += sbinNumber;
170 binLabels[i] = binLabel;
172 string sbinNumber = toString(i+1);
173 if (sbinNumber.length() < snumBins.length()) {
174 int diff = snumBins.length() - sbinNumber.length();
175 for (int h = 0; h < diff; h++) { binLabel += "0"; }
177 binLabel += sbinNumber;
178 binLabels.push_back(binLabel);
184 catch(exception& e) {
185 m->errorOut(e, "SharedListVector", "getLabels");
189 /***********************************************************************/
191 void SharedListVector::push_back(string seqNames){
193 data.push_back(seqNames);
194 int nNames = m->getNumNames(seqNames);
198 if(nNames > maxRank) { maxRank = nNames; }
202 catch(exception& e) {
203 m->errorOut(e, "SharedListVector", "push_back");
208 /***********************************************************************/
210 void SharedListVector::resize(int size){
214 /***********************************************************************/
216 int SharedListVector::size(){
219 /***********************************************************************/
221 void SharedListVector::clear(){
229 /***********************************************************************/
231 void SharedListVector::print(ostream& output){
233 output << label << '\t' << numBins << '\t';
235 for(int i=0;i<data.size();i++){
237 output << data[i] << '\t';
242 catch(exception& e) {
243 m->errorOut(e, "SharedListVector", "print");
249 /***********************************************************************/
251 RAbundVector SharedListVector::getRAbundVector(){
255 for(int i=0;i<data.size();i++){
256 int binSize = m->getNumNames(data[i]);
257 rav.push_back(binSize);
260 // This was here before to output data in a nice format, but it screws up the name mapping steps
261 // sort(rav.rbegin(), rav.rend());
263 // for(int i=data.size()-1;i>=0;i--){
264 // if(rav.get(i) == 0){ rav.pop_back(); }
273 catch(exception& e) {
274 m->errorOut(e, "SharedListVector", "getRAbundVector");
279 /***********************************************************************/
281 SAbundVector SharedListVector::getSAbundVector(){
283 SAbundVector sav(maxRank+1);
285 for(int i=0;i<data.size();i++){
286 int binSize = m->getNumNames(data[i]);
287 sav.set(binSize, sav.get(binSize) + 1);
294 catch(exception& e) {
295 m->errorOut(e, "SharedListVector", "getSAbundVector");
300 /***********************************************************************/
301 SharedOrderVector* SharedListVector::getSharedOrderVector(){
303 SharedOrderVector* order = new SharedOrderVector();
304 order->setLabel(label);
306 for(int i=0;i<numBins;i++){
307 int binSize = m->getNumNames(get(i)); //find number of individual in given bin
308 string names = get(i);
309 vector<string> binNames;
310 m->splitAtComma(names, binNames);
311 if (m->groupMode != "group") {
313 for (int j = 0; j < binNames.size(); j++) { binSize += countTable->getNumSeqs(binNames[i]); }
315 for (int j = 0; j < binNames.size(); j++) {
316 if (m->control_pressed) { return order; }
317 if (m->groupMode == "group") {
318 string groupName = groupmap->getGroup(binNames[i]);
319 if(groupName == "not found") { m->mothurOut("Error: Sequence '" + binNames[i] + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
321 order->push_back(i, binSize, groupName); //i represents what bin you are in
323 vector<int> groupAbundances = countTable->getGroupCounts(binNames[i]);
324 vector<string> groupNames = countTable->getNamesOfGroups();
325 for (int k = 0; k < groupAbundances.size(); k++) { //groupAbundances.size() == 0 if there is a file mismatch and m->control_pressed is true.
326 if (m->control_pressed) { return order; }
327 for (int l = 0; l < groupAbundances[k]; l++) { order->push_back(i, binSize, groupNames[k]); }
333 random_shuffle(order->begin(), order->end());
334 order->updateStats();
338 catch(exception& e) {
339 m->errorOut(e, "SharedListVector", "getSharedOrderVector");
343 /***********************************************************************/
344 SharedRAbundVector SharedListVector::getSharedRAbundVector(string groupName) {
346 m->currentSharedBinLabels = binLabels;
348 SharedRAbundVector rav(data.size());
350 for(int i=0;i<numBins;i++){
351 string names = get(i);
352 vector<string> binNames;
353 m->splitAtComma(names, binNames);
354 for (int j = 0; j < binNames.size(); j++) {
355 if (m->control_pressed) { return rav; }
356 if (m->groupMode == "group") {
357 string group = groupmap->getGroup(binNames[j]);
358 if(group == "not found") { m->mothurOut("Error: Sequence '" + binNames[j] + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
359 if (group == groupName) { //this name is in the group you want the vector for.
360 rav.set(i, rav.getAbundance(i) + 1, group); //i represents what bin you are in
363 int count = countTable->getGroupCount(binNames[j], groupName);
364 rav.set(i, rav.getAbundance(i) + count, groupName);
370 rav.setGroup(groupName);
375 catch(exception& e) {
376 m->errorOut(e, "SharedListVector", "getSharedRAbundVector");
380 /***********************************************************************/
381 vector<SharedRAbundVector*> SharedListVector::getSharedRAbundVector() {
383 m->currentSharedBinLabels = binLabels;
386 util = new SharedUtil();
387 vector<SharedRAbundVector*> lookup; //contains just the groups the user selected
388 vector<SharedRAbundVector*> lookupDelete;
389 map<string, SharedRAbundVector*> finder; //contains all groups in groupmap
391 vector<string> Groups = m->getGroups();
392 vector<string> allGroups;
393 if (m->groupMode == "group") { allGroups = groupmap->getNamesOfGroups(); }
394 else { allGroups = countTable->getNamesOfGroups(); }
395 util->setGroups(Groups, allGroups);
396 m->setGroups(Groups);
399 for (int i = 0; i < allGroups.size(); i++) {
400 SharedRAbundVector* temp = new SharedRAbundVector(data.size());
401 finder[allGroups[i]] = temp;
402 finder[allGroups[i]]->setLabel(label);
403 finder[allGroups[i]]->setGroup(allGroups[i]);
404 if (m->inUsersGroups(allGroups[i], m->getGroups())) { //if this group is in user groups
405 lookup.push_back(finder[allGroups[i]]);
407 lookupDelete.push_back(finder[allGroups[i]]);
412 for(int i=0;i<numBins;i++){
413 string names = get(i);
414 vector<string> binNames;
415 m->splitAtComma(names, binNames);
416 for (int j = 0; j < binNames.size(); j++) {
417 if (m->groupMode == "group") {
418 string group = groupmap->getGroup(binNames[j]);
419 if(group == "not found") { m->mothurOut("Error: Sequence '" + binNames[j] + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
420 finder[group]->set(i, finder[group]->getAbundance(i) + 1, group); //i represents what bin you are in
422 vector<int> counts = countTable->getGroupCounts(binNames[j]);
423 for (int k = 0; k < allGroups.size(); k++) {
424 finder[allGroups[k]]->set(i, finder[allGroups[k]]->getAbundance(i) + counts[k], allGroups[k]);
430 for (int j = 0; j < lookupDelete.size(); j++) { delete lookupDelete[j]; }
434 catch(exception& e) {
435 m->errorOut(e, "SharedListVector", "getSharedRAbundVector");
440 /***********************************************************************/
441 SharedSAbundVector SharedListVector::getSharedSAbundVector(string groupName) {
443 SharedSAbundVector sav;
444 SharedRAbundVector rav;
446 rav = this->getSharedRAbundVector(groupName);
447 sav = rav.getSharedSAbundVector();
451 catch(exception& e) {
452 m->errorOut(e, "SharedListVector", "getSharedSAbundVector");
456 /***********************************************************************/
458 OrderVector SharedListVector::getOrderVector(map<string,int>* orderMap = NULL){
461 if(orderMap == NULL){
464 for(int i=0;i<data.size();i++){
465 string names = data[i];
466 vector<string> binNames;
467 m->splitAtComma(names, binNames);
468 int binSize = binNames.size();
469 if (m->groupMode != "group") {
471 for (int j = 0; j < binNames.size(); j++) { binSize += countTable->getNumSeqs(binNames[i]); }
473 for(int j=0;j<binSize;j++){
477 random_shuffle(ov.begin(), ov.end());
485 OrderVector ov(numSeqs);
487 for(int i=0;i<data.size();i++){
488 string listOTU = data[i];
489 vector<string> binNames;
490 m->splitAtComma(listOTU, binNames);
491 for (int j = 0; j < binNames.size(); j++) {
492 if(orderMap->count(binNames[j]) == 0){
493 m->mothurOut(binNames[j] + " not found, check *.names file\n");
496 ov.set((*orderMap)[binNames[j]], i);
506 catch(exception& e) {
507 m->errorOut(e, "SharedListVector", "getOrderVector");
512 /***********************************************************************/