]> git.donarmstrong.com Git - mothur.git/blob - sharedrabundvector.cpp
7bc333bc02e737f7caab1f4b4d34ec0c131dc2dd
[mothur.git] / sharedrabundvector.cpp
1 /*
2  *  sharedvector.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 12/5/08.
6  *  Copyright 2008 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "sharedrabundvector.h" 
11 #include "sabundvector.hpp"
12 #include "ordervector.hpp"
13 #include "sharedutilities.h"
14
15
16 /***********************************************************************/
17 SharedRAbundVector::SharedRAbundVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0) {} 
18 /***********************************************************************/
19
20 SharedRAbundVector::~SharedRAbundVector() {
21         //for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
22
23 }
24
25 /***********************************************************************/
26
27 SharedRAbundVector::SharedRAbundVector(int n) : DataVector(), maxRank(0), numBins(n), numSeqs(0) {
28                 individual newGuy;
29                 //initialize data
30                 for (int i=0; i< n; i++) {
31                         newGuy.bin = i;
32                         newGuy.abundance = 0;
33                         data.push_back(newGuy);
34                 }
35 }
36
37 /***********************************************************************
38
39 SharedRAbundVector::SharedRAbundVector(string id, vector<individual> rav) : DataVector(id), data(rav) {
40         try {
41                 numBins = 0;
42                 maxRank = 0;
43                 numSeqs = 0;
44                 
45                 for(int i=0;i<data.size();i++){
46                         if(data[i].abundance != 0)              {       numBins = i+1;          }
47                         if(data[i].abundance > maxRank) {       maxRank = data[i].abundance;    }
48                         numSeqs += data[i].abundance;
49                 }
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "SharedRAbundVector", "SharedRAbundVector");
53                 exit(1);
54         }
55 }
56
57
58 ***********************************************************************/
59 //reads a shared file
60 SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) {
61         try {
62                 m->clearAllGroups();
63                 vector<string> allGroups;
64                 
65                 int num, inputData, count;
66                 count = 0;  
67                 string holdLabel, nextLabel, groupN;
68                 individual newguy;
69                 
70                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }  lookup.clear();
71                 
72                 //are we at the beginning of the file??
73                 if (m->saveNextLabel == "") {  
74                         f >> label; 
75         
76                         //is this a shared file that has headers
77                         if (label == "label") { 
78                                 //gets "group"
79                                 f >> label; m->gobble(f);
80                                 
81                                 //gets "numOtus"
82                                 f >> label; m->gobble(f);
83                                 
84                                 //eat rest of line
85                                 label = m->getline(f); m->gobble(f);
86                                 
87                                 //parse labels to save
88                                 istringstream iStringStream(label);
89                                 m->binLabelsInFile.clear();
90                                 while(!iStringStream.eof()){
91                                         if (m->control_pressed) { break; }
92                                         string temp;
93                                         iStringStream >> temp;  m->gobble(iStringStream);
94                 
95                                         m->binLabelsInFile.push_back(temp);
96                                 }
97                                 
98                                 f >> label >> groupN >> num;
99                         }else {
100                 //read in first row since you know there is at least 1 group.
101                 f >> groupN >> num;
102                 
103                 //make binlabels because we don't have any
104                 string snumBins = toString(num);
105                 m->binLabelsInFile.clear();
106                 for (int i = 0; i < num; i++) {  
107                     //if there is a bin label use it otherwise make one
108                     string binLabel = "Otu";
109                     string sbinNumber = toString(i+1);
110                     if (sbinNumber.length() < snumBins.length()) { 
111                         int diff = snumBins.length() - sbinNumber.length();
112                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
113                     }
114                     binLabel += sbinNumber;
115                     m->binLabelsInFile.push_back(binLabel);
116                 }
117             }
118                 }else { 
119             label = m->saveNextLabel; 
120             
121             //read in first row since you know there is at least 1 group.
122             f >> groupN >> num;
123         }
124                 
125                 //reset labels, currentLabels may have gotten changed as otus were eliminated because of group choices or sampling
126                 m->currentBinLabels = m->binLabelsInFile;
127                 
128                 holdLabel = label;
129                 
130                 //add new vector to lookup
131                 SharedRAbundVector* temp = new SharedRAbundVector();
132                 lookup.push_back(temp);
133                 lookup[0]->setLabel(label);
134                 lookup[0]->setGroup(groupN);
135                 
136                 allGroups.push_back(groupN);
137                 
138                 //fill vector.  data = first sharedrabund in file
139                 for(int i=0;i<num;i++){
140                         f >> inputData;
141                         
142                         lookup[0]->push_back(inputData, groupN); //abundance, bin, group
143                         push_back(inputData, groupN);
144                         
145                         if (inputData > maxRank) { maxRank = inputData; }
146                 }
147                 
148                 m->gobble(f);
149                 
150                 if (!(f.eof())) { f >> nextLabel; }
151         
152                 //read the rest of the groups info in
153                 while ((nextLabel == holdLabel) && (f.eof() != true)) {
154                         f >> groupN >> num;
155                         count++;
156                         
157                         allGroups.push_back(groupN);
158                         
159                         //add new vector to lookup
160                         temp = new SharedRAbundVector();
161                         lookup.push_back(temp);
162                         lookup[count]->setLabel(label);
163                         lookup[count]->setGroup(groupN);
164
165                         //fill vector.  
166                         for(int i=0;i<num;i++){
167                                 f >> inputData;
168                                 
169                                 lookup[count]->push_back(inputData, groupN); //abundance, bin, group
170                         }
171                         
172                         m->gobble(f);
173                                 
174                         if (f.eof() != true) { f >> nextLabel; }
175                 }
176                 m->saveNextLabel = nextLabel;
177                 m->setAllGroups(allGroups);
178         }
179         catch(exception& e) {
180                 m->errorOut(e, "SharedRAbundVector", "SharedRAbundVector");
181                 exit(1);
182         }
183 }
184
185 /***********************************************************************/
186
187 void SharedRAbundVector::set(int binNumber, int newBinSize, string groupname){
188         try {
189                 int oldBinSize = data[binNumber].abundance;
190                 data[binNumber].abundance = newBinSize;
191                 data[binNumber].group = groupname;
192         
193                 if(newBinSize > maxRank)        {       maxRank = newBinSize;   }
194         
195                 numSeqs += (newBinSize - oldBinSize);
196         }
197         catch(exception& e) {
198                 m->errorOut(e, "SharedRAbundVector", "set");
199                 exit(1);
200         }
201 }
202 /***********************************************************************/
203
204 void SharedRAbundVector::setData(vector <individual> newData){
205         data = newData;
206 }
207
208 /***********************************************************************/
209
210 int SharedRAbundVector::getAbundance(int index){
211         return data[index].abundance;
212         
213 }
214 /***********************************************************************/
215 //returns vector of abundances 
216 vector<int> SharedRAbundVector::getAbundances(){
217     vector<int> abunds;
218     for (int i = 0; i < data.size(); i++) {
219         abunds.push_back(data[i].abundance);
220     }
221     
222         return abunds;
223 }
224
225
226 /***********************************************************************/
227
228 int SharedRAbundVector::numNZ(){
229         int sum = 0;
230         for(int i = 1; i < numBins; i++)
231                 if(data[i].abundance > 0)
232                         sum++;
233         return sum;
234 }
235 /***********************************************************************/
236
237 void SharedRAbundVector::sortD(){
238         struct individual indObj;
239         sort(data.begin()+1, data.end(), indObj);
240 }
241 /***********************************************************************/
242
243 individual SharedRAbundVector::get(int index){
244         return data[index];
245         
246 }
247 /***********************************************************************/
248
249 vector <individual> SharedRAbundVector::getData(){
250         return data;
251 }
252 /***********************************************************************/
253
254 void SharedRAbundVector::clear(){
255         numBins = 0;
256         maxRank = 0;
257         numSeqs = 0;
258         data.clear();
259         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
260         lookup.clear();
261 }
262 /***********************************************************************/
263
264 void SharedRAbundVector::push_back(int binSize, string groupName){
265         try {
266                 individual newGuy;
267                 newGuy.abundance = binSize;
268                 newGuy.group = groupName;
269                 newGuy.bin = data.size();
270                 
271                 data.push_back(newGuy);
272                 numBins++;
273         
274                 if(binSize > maxRank){
275                         maxRank = binSize;
276                 }
277         
278                 numSeqs += binSize;
279         }
280         catch(exception& e) {
281                 m->errorOut(e, "SharedRAbundVector", "push_back");
282                 exit(1);
283         }
284 }
285
286 /***********************************************************************/
287
288 void SharedRAbundVector::insert(int binSize, int otu, string groupName){
289         try {
290                 individual newGuy;
291                 newGuy.abundance = binSize;
292                 newGuy.group = groupName;
293                 newGuy.bin = otu;
294                 
295                 data.insert(data.begin()+otu, newGuy);
296                 numBins++;
297         
298                 if(binSize > maxRank){
299                         maxRank = binSize;
300                 }
301         
302                 numSeqs += binSize;
303         }
304         catch(exception& e) {
305                 m->errorOut(e, "SharedRAbundVector", "insert");
306                 exit(1);
307         }
308 }
309
310 /***********************************************************************/
311
312 void SharedRAbundVector::push_front(int binSize, int otu, string groupName){
313         try {
314                 individual newGuy;
315                 newGuy.abundance = binSize;
316                 newGuy.group = groupName;
317                 newGuy.bin = otu;
318                 
319                 data.insert(data.begin(), newGuy);
320                 numBins++;
321         
322                 if(binSize > maxRank){
323                         maxRank = binSize;
324                 }
325         
326                 numSeqs += binSize;
327         }
328         catch(exception& e) {
329                 m->errorOut(e, "SharedRAbundVector", "push_front");
330                 exit(1);
331         }
332 }
333
334 /***********************************************************************/
335 void SharedRAbundVector::pop_back(){
336         numSeqs -= data[data.size()-1].abundance;
337         numBins--;
338         return data.pop_back();
339 }
340
341 /***********************************************************************/
342
343
344 vector<individual>::reverse_iterator SharedRAbundVector::rbegin(){
345         return data.rbegin();                           
346 }
347
348 /***********************************************************************/
349
350 vector<individual>::reverse_iterator SharedRAbundVector::rend(){
351         return data.rend();                                     
352 }
353
354 /***********************************************************************/
355 void SharedRAbundVector::resize(int size){
356         
357         data.resize(size);
358 }
359
360 /***********************************************************************/
361
362 int SharedRAbundVector::size(){
363         return data.size();
364 }
365
366
367 /***********************************************************************/
368 void SharedRAbundVector::printHeaders(ostream& output){
369         try {
370                 string snumBins = toString(numBins);
371                 output << "label\tGroup\tnumOtus\t";
372                 if (m->sharedHeaderMode == "tax") {
373                         for (int i = 0; i < numBins; i++) {  
374                                 
375                                 //if there is a bin label use it otherwise make one
376                                 string binLabel = "PhyloType";
377                                 string sbinNumber = toString(i+1);
378                                 if (sbinNumber.length() < snumBins.length()) { 
379                                         int diff = snumBins.length() - sbinNumber.length();
380                                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
381                                 }
382                                 binLabel += sbinNumber;
383                                 if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
384                                 
385                                 output << binLabel << '\t'; 
386                         }
387                         output << endl;
388                 }else {
389                         for (int i = 0; i < numBins; i++) {  
390                                 //if there is a bin label use it otherwise make one
391                                 string binLabel = "Otu";
392                                 string sbinNumber = toString(i+1);
393                                 if (sbinNumber.length() < snumBins.length()) { 
394                                         int diff = snumBins.length() - sbinNumber.length();
395                                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
396                                 }
397                                 binLabel += sbinNumber;
398                                 if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
399                                 
400                                 output << binLabel << '\t'; 
401                         }
402                         
403                         output << endl;
404                 }
405                 m->printedHeaders = true;
406         }
407         catch(exception& e) {
408                 m->errorOut(e, "SharedRAbundVector", "printHeaders");
409                 exit(1);
410         }
411 }
412 /***********************************************************************/
413 void SharedRAbundVector::print(ostream& output) {
414         try {
415                 output << numBins << '\t';
416         
417                 for(int i=0;i<data.size();i++){         output << data[i].abundance << '\t';            }
418                 output << endl;
419         }
420         catch(exception& e) {
421                 m->errorOut(e, "SharedRAbundVector", "print");
422                 exit(1);
423         }
424 }
425 /***********************************************************************/
426 string SharedRAbundVector::getGroup(){
427         return group;
428 }
429
430 /***********************************************************************/
431
432 void SharedRAbundVector::setGroup(string groupName){
433         group = groupName;
434 }
435 /***********************************************************************/
436 int SharedRAbundVector::getGroupIndex()  { return index; }
437 /***********************************************************************/
438 void SharedRAbundVector::setGroupIndex(int vIndex)      { index = vIndex; }
439 /***********************************************************************/
440 int SharedRAbundVector::getNumBins(){
441                 return numBins;
442 }
443
444 /***********************************************************************/
445
446 int SharedRAbundVector::getNumSeqs(){
447         return numSeqs;
448 }
449
450 /***********************************************************************/
451
452 int SharedRAbundVector::getMaxRank(){
453         return maxRank;
454 }
455 /***********************************************************************/
456
457 SharedRAbundVector SharedRAbundVector::getSharedRAbundVector(){
458         return *this;                   
459 }
460 /***********************************************************************/
461 vector<SharedRAbundVector*> SharedRAbundVector::getSharedRAbundVectors(){
462         try {
463                 SharedUtil* util;
464                 util = new SharedUtil();
465                 
466                 vector<string> Groups = m->getGroups();
467                 vector<string> allGroups = m->getAllGroups();
468                 util->setGroups(Groups, allGroups);
469                 m->setGroups(Groups);
470                 
471                 bool remove = false;
472                 for (int i = 0; i < lookup.size(); i++) {
473                         //if this sharedrabund is not from a group the user wants then delete it.
474                         if (util->isValidGroup(lookup[i]->getGroup(), m->getGroups()) == false) { 
475                                 remove = true;
476                                 delete lookup[i]; lookup[i] = NULL;
477                                 lookup.erase(lookup.begin()+i); 
478                                 i--; 
479                         }
480                 }
481                 
482                 delete util;
483                 
484                 if (remove) { eliminateZeroOTUS(lookup); }
485         
486                 return lookup;
487         }
488         catch(exception& e) {
489                 m->errorOut(e, "SharedRAbundVector", "getSharedRAbundVectors");
490                 exit(1);
491         }
492 }
493 //**********************************************************************************************************************
494 int SharedRAbundVector::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
495                 try {
496                         
497                         vector<SharedRAbundVector*> newLookup;
498                         for (int i = 0; i < thislookup.size(); i++) {
499                                 SharedRAbundVector* temp = new SharedRAbundVector();
500                                 temp->setLabel(thislookup[i]->getLabel());
501                                 temp->setGroup(thislookup[i]->getGroup());
502                                 newLookup.push_back(temp);
503                         }
504                         
505                         //for each bin
506                         vector<string> newBinLabels;
507                         string snumBins = toString(thislookup[0]->getNumBins());
508                         for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
509                                 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
510                                 
511                                 //look at each sharedRabund and make sure they are not all zero
512                                 bool allZero = true;
513                                 for (int j = 0; j < thislookup.size(); j++) {
514                                         if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
515                                 }
516                                 
517                                 //if they are not all zero add this bin
518                                 if (!allZero) {
519                                         for (int j = 0; j < thislookup.size(); j++) {
520                                                 newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
521                                         }
522                                         
523                                         //if there is a bin label use it otherwise make one
524                                         string binLabel = "Otu";
525                                         string sbinNumber = toString(i+1);
526                                         if (sbinNumber.length() < snumBins.length()) { 
527                                                 int diff = snumBins.length() - sbinNumber.length();
528                                                 for (int h = 0; h < diff; h++) { binLabel += "0"; }
529                                         }
530                                         binLabel += sbinNumber; 
531                                         if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
532                                         
533                                         newBinLabels.push_back(binLabel);
534                                 }
535                         }
536                         
537                         for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
538                         
539                         thislookup = newLookup;
540                         m->currentBinLabels = newBinLabels;
541                         
542                         return 0;
543                         
544                 }
545                 catch(exception& e) {
546                         m->errorOut(e, "SharedRAbundVector", "eliminateZeroOTUS");
547                         exit(1);
548                 }
549         }
550         
551 /***********************************************************************/
552 vector<SharedRAbundFloatVector*> SharedRAbundVector::getSharedRAbundFloatVectors(vector<SharedRAbundVector*> thislookup){
553         try {
554                 vector<SharedRAbundFloatVector*> newLookupFloat;        
555                 for (int i = 0; i < lookup.size(); i++) {
556                         SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
557                         temp->setLabel(thislookup[i]->getLabel());
558                         temp->setGroup(thislookup[i]->getGroup());
559                         newLookupFloat.push_back(temp);
560                 }
561                 
562                 for (int i = 0; i < thislookup.size(); i++) {
563                         
564                         for (int j = 0; j < thislookup[i]->getNumBins(); j++) {
565                                 
566                                 if (m->control_pressed) { return newLookupFloat; }
567                                 
568                                 int abund = thislookup[i]->getAbundance(j);
569                                 
570                                 float relabund = abund / (float) thislookup[i]->getNumSeqs();
571                                 
572                                 newLookupFloat[i]->push_back(relabund, thislookup[i]->getGroup());
573                         }
574                 }
575                 
576                 return newLookupFloat;
577         }
578         catch(exception& e) {
579                 m->errorOut(e, "SharedRAbundVector", "getSharedRAbundVectors");
580                 exit(1);
581         }
582 }
583 /***********************************************************************/
584
585 RAbundVector SharedRAbundVector::getRAbundVector() {
586         try {
587                 RAbundVector rav;
588                 
589                 for (int i = 0; i < data.size(); i++) {
590                         if(data[i].abundance != 0) {
591                                 rav.push_back(data[i].abundance);
592                         }
593                 }
594                 
595                 rav.setLabel(label);
596                 return rav;
597         }
598         catch(exception& e) {
599                 m->errorOut(e, "SharedRAbundVector", "getRAbundVector");
600                 exit(1);
601         }
602 }
603 /***********************************************************************/
604
605 RAbundVector SharedRAbundVector::getRAbundVector2() {
606         try {
607                 RAbundVector rav;
608                 for(int i = 0; i < numBins; i++)
609                         if(data[i].abundance != 0)
610                                 rav.push_back(data[i].abundance-1);
611                 return rav;
612         }
613         catch(exception& e) {
614                 m->errorOut(e, "SharedRAbundVector", "getRAbundVector2");
615                 exit(1);
616         }
617 }
618 /***********************************************************************/
619
620 SharedSAbundVector SharedRAbundVector::getSharedSAbundVector(){
621         try {
622                 SharedSAbundVector sav(maxRank+1);
623                 
624                 for(int i=0;i<data.size();i++){
625                         int abund = data[i].abundance;
626                         sav.set(abund, sav.getAbundance(abund) + 1, group);
627                 }
628                 
629                 sav.set(0, 0, group);
630                 sav.setLabel(label);
631                 sav.setGroup(group);
632                 
633                 return sav;
634         }
635         catch(exception& e) {
636                 m->errorOut(e, "SharedRAbundVector", "getSharedSAbundVector");
637                 exit(1);
638         }
639 }
640 /***********************************************************************/
641
642 SAbundVector SharedRAbundVector::getSAbundVector() {
643         try {
644                 SAbundVector sav(maxRank+1);
645                 
646                 for(int i=0;i<data.size();i++){
647                         int abund = data[i].abundance;
648                         sav.set(abund, sav.get(abund) + 1);
649                 }
650                 sav.set(0, 0);
651                 sav.setLabel(label);
652                 return sav;
653         }
654         catch(exception& e) {
655                 m->errorOut(e, "SharedRAbundVector", "getSAbundVector");                
656                 exit(1);
657         }
658 }
659
660 /***********************************************************************/
661
662 SharedOrderVector SharedRAbundVector::getSharedOrderVector() {
663         try {
664                 SharedOrderVector ov;
665         
666                 for(int i=0;i<data.size();i++){
667                         for(int j=0;j<data[i].abundance;j++){
668                                 ov.push_back(data[i].bin, data[i].abundance, data[i].group);
669                         }
670                 }
671                 random_shuffle(ov.begin(), ov.end());
672
673                 ov.setLabel(label);     
674                 ov.updateStats();
675                 
676                 return ov;
677         }
678         catch(exception& e) {
679                 m->errorOut(e, "SharedRAbundVector", "getSharedOrderVector");
680                 exit(1);
681         }
682 }
683 /***********************************************************************/
684
685 OrderVector SharedRAbundVector::getOrderVector(map<string,int>* nameMap = NULL) {
686         try {
687                 OrderVector ov;
688                 for(int i=0;i<numBins;i++){
689                         for(int j=0;j<data[i].abundance;j++){
690                                 ov.push_back(i);
691                         }
692                 }
693                 random_shuffle(ov.begin(), ov.end());
694                 
695                 ov.setLabel(label);     
696
697                 return ov;
698         }
699         catch(exception& e) {
700                 m->errorOut(e, "SharedRAbundVector", "getOrderVector");
701                 exit(1);
702         }
703 }
704
705 /***********************************************************************/
706