]> git.donarmstrong.com Git - mothur.git/blob - sharedrabundvector.cpp
changes while testing
[mothur.git] / sharedrabundvector.cpp
1 /*
2  *  sharedvector.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 12/5/08.
6  *  Copyright 2008 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "sharedrabundvector.h" 
11 #include "sabundvector.hpp"
12 #include "ordervector.hpp"
13 #include "sharedutilities.h"
14
15
16 /***********************************************************************/
17 SharedRAbundVector::SharedRAbundVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0) {} 
18 /***********************************************************************/
19
20 SharedRAbundVector::~SharedRAbundVector() {
21         //for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
22
23 }
24
25 /***********************************************************************/
26
27 SharedRAbundVector::SharedRAbundVector(int n) : DataVector(), maxRank(0), numBins(n), numSeqs(0) {
28                 individual newGuy;
29                 //initialize data
30                 for (int i=0; i< n; i++) {
31                         newGuy.bin = i;
32                         newGuy.abundance = 0;
33                         data.push_back(newGuy);
34                 }
35 }
36
37 /***********************************************************************
38
39 SharedRAbundVector::SharedRAbundVector(string id, vector<individual> rav) : DataVector(id), data(rav) {
40         try {
41                 numBins = 0;
42                 maxRank = 0;
43                 numSeqs = 0;
44                 
45                 for(int i=0;i<data.size();i++){
46                         if(data[i].abundance != 0)              {       numBins = i+1;          }
47                         if(data[i].abundance > maxRank) {       maxRank = data[i].abundance;    }
48                         numSeqs += data[i].abundance;
49                 }
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "SharedRAbundVector", "SharedRAbundVector");
53                 exit(1);
54         }
55 }
56
57
58 ***********************************************************************/
59 //reads a shared file
60 SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) {
61         try {
62                 m->clearAllGroups();
63                 vector<string> allGroups;
64                 
65                 int num, inputData, count;
66                 count = 0;  
67                 string holdLabel, nextLabel, groupN;
68                 individual newguy;
69                 
70                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }  lookup.clear();
71                 
72                 //are we at the beginning of the file??
73                 if (m->saveNextLabel == "") {  
74                         f >> label; 
75             
76                         //is this a shared file that has headers
77                         if (label == "label") { 
78                                 //gets "group"
79                                 f >> label; m->gobble(f);
80                                 
81                                 //gets "numOtus"
82                                 f >> label; m->gobble(f);
83                                 
84                                 //eat rest of line
85                                 label = m->getline(f); m->gobble(f);
86                                 
87                                 //parse labels to save
88                                 istringstream iStringStream(label);
89                                 m->binLabelsInFile.clear();
90                                 while(!iStringStream.eof()){
91                                         if (m->control_pressed) { break; }
92                                         string temp;
93                                         iStringStream >> temp;  m->gobble(iStringStream);
94                 
95                                         m->binLabelsInFile.push_back(temp);
96                                 }
97                                 
98                                 f >> label >> groupN >> num;
99                         }else {
100                 //read in first row since you know there is at least 1 group.
101                 f >> groupN >> num;
102                 
103                 //make binlabels because we don't have any
104                 string snumBins = toString(num);
105                 m->binLabelsInFile.clear();
106                 for (int i = 0; i < num; i++) {  
107                     //if there is a bin label use it otherwise make one
108                     string binLabel = "Otu";
109                     string sbinNumber = toString(i+1);
110                     if (sbinNumber.length() < snumBins.length()) { 
111                         int diff = snumBins.length() - sbinNumber.length();
112                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
113                     }
114                     binLabel += sbinNumber;
115                     m->binLabelsInFile.push_back(binLabel);
116                 }
117             }
118                 }else { 
119             label = m->saveNextLabel; 
120             
121             //read in first row since you know there is at least 1 group.
122             f >> groupN >> num;
123             
124             if (m->debug) { m->mothurOut("[DEBUG]: "+ groupN + '\t' + toString(num)); }
125         }
126                 
127                 //reset labels, currentLabels may have gotten changed as otus were eliminated because of group choices or sampling
128                 m->currentBinLabels = m->binLabelsInFile;
129                 
130                 holdLabel = label;
131                 
132                 //add new vector to lookup
133                 SharedRAbundVector* temp = new SharedRAbundVector();
134                 lookup.push_back(temp);
135                 lookup[0]->setLabel(label);
136                 lookup[0]->setGroup(groupN);
137                 
138                 allGroups.push_back(groupN);
139                 
140                 //fill vector.  data = first sharedrabund in file
141                 for(int i=0;i<num;i++){
142                         f >> inputData;
143             if (m->debug) { m->mothurOut("[DEBUG]: OTU" + toString(i+1)+ '\t' +toString(inputData)); }
144                         
145                         lookup[0]->push_back(inputData, groupN); //abundance, bin, group
146                         push_back(inputData, groupN);
147                         
148                         if (inputData > maxRank) { maxRank = inputData; }
149                 }
150                 
151                 m->gobble(f);
152                 
153                 if (!(f.eof())) { f >> nextLabel; }
154         
155                 //read the rest of the groups info in
156                 while ((nextLabel == holdLabel) && (f.eof() != true)) {
157                         f >> groupN >> num;
158             if (m->debug) { m->mothurOut("[DEBUG]: "+ groupN + '\t' + toString(num)); }
159                         count++;
160                         
161                         allGroups.push_back(groupN);
162                         
163                         //add new vector to lookup
164                         temp = new SharedRAbundVector();
165                         lookup.push_back(temp);
166                         lookup[count]->setLabel(label);
167                         lookup[count]->setGroup(groupN);
168
169                         //fill vector.  
170                         for(int i=0;i<num;i++){
171                                 f >> inputData;
172                 if (m->debug) { m->mothurOut("[DEBUG]: OTU" + toString(i+1)+ '\t' +toString(inputData)); }
173                                 
174                                 lookup[count]->push_back(inputData, groupN); //abundance, bin, group
175                         }
176                         
177                         m->gobble(f);
178                                 
179                         if (f.eof() != true) { f >> nextLabel; }
180                 }
181                 m->saveNextLabel = nextLabel;
182                 m->setAllGroups(allGroups);
183         }
184         catch(exception& e) {
185                 m->errorOut(e, "SharedRAbundVector", "SharedRAbundVector");
186                 exit(1);
187         }
188 }
189
190 /***********************************************************************/
191
192 void SharedRAbundVector::set(int binNumber, int newBinSize, string groupname){
193         try {
194                 int oldBinSize = data[binNumber].abundance;
195                 data[binNumber].abundance = newBinSize;
196                 data[binNumber].group = groupname;
197         
198                 if(newBinSize > maxRank)        {       maxRank = newBinSize;   }
199         
200                 numSeqs += (newBinSize - oldBinSize);
201         }
202         catch(exception& e) {
203                 m->errorOut(e, "SharedRAbundVector", "set");
204                 exit(1);
205         }
206 }
207 /***********************************************************************/
208
209 void SharedRAbundVector::setData(vector <individual> newData){
210         data = newData;
211 }
212
213 /***********************************************************************/
214
215 int SharedRAbundVector::getAbundance(int index){
216         return data[index].abundance;
217         
218 }
219 /***********************************************************************/
220 //returns vector of abundances 
221 vector<int> SharedRAbundVector::getAbundances(){
222     vector<int> abunds;
223     for (int i = 0; i < data.size(); i++) {
224         abunds.push_back(data[i].abundance);
225     }
226     
227         return abunds;
228 }
229
230
231 /***********************************************************************/
232
233 int SharedRAbundVector::numNZ(){
234         int sum = 0;
235         for(int i = 1; i < numBins; i++)
236                 if(data[i].abundance > 0)
237                         sum++;
238         return sum;
239 }
240 /***********************************************************************/
241
242 void SharedRAbundVector::sortD(){
243         struct individual indObj;
244         sort(data.begin()+1, data.end(), indObj);
245 }
246 /***********************************************************************/
247
248 individual SharedRAbundVector::get(int index){
249         return data[index];
250         
251 }
252 /***********************************************************************/
253
254 vector <individual> SharedRAbundVector::getData(){
255         return data;
256 }
257 /***********************************************************************/
258
259 void SharedRAbundVector::clear(){
260         numBins = 0;
261         maxRank = 0;
262         numSeqs = 0;
263         data.clear();
264         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
265         lookup.clear();
266 }
267 /***********************************************************************/
268
269 void SharedRAbundVector::push_back(int binSize, string groupName){
270         try {
271                 individual newGuy;
272                 newGuy.abundance = binSize;
273                 newGuy.group = groupName;
274                 newGuy.bin = data.size();
275                 
276                 data.push_back(newGuy);
277                 numBins++;
278         
279                 if(binSize > maxRank){
280                         maxRank = binSize;
281                 }
282         
283                 numSeqs += binSize;
284         }
285         catch(exception& e) {
286                 m->errorOut(e, "SharedRAbundVector", "push_back");
287                 exit(1);
288         }
289 }
290
291 /***********************************************************************/
292
293 void SharedRAbundVector::insert(int binSize, int otu, string groupName){
294         try {
295                 individual newGuy;
296                 newGuy.abundance = binSize;
297                 newGuy.group = groupName;
298                 newGuy.bin = otu;
299                 
300                 data.insert(data.begin()+otu, newGuy);
301                 numBins++;
302         
303                 if(binSize > maxRank){
304                         maxRank = binSize;
305                 }
306         
307                 numSeqs += binSize;
308         }
309         catch(exception& e) {
310                 m->errorOut(e, "SharedRAbundVector", "insert");
311                 exit(1);
312         }
313 }
314
315 /***********************************************************************/
316
317 void SharedRAbundVector::push_front(int binSize, int otu, string groupName){
318         try {
319                 individual newGuy;
320                 newGuy.abundance = binSize;
321                 newGuy.group = groupName;
322                 newGuy.bin = otu;
323                 
324                 data.insert(data.begin(), newGuy);
325                 numBins++;
326         
327                 if(binSize > maxRank){
328                         maxRank = binSize;
329                 }
330         
331                 numSeqs += binSize;
332         }
333         catch(exception& e) {
334                 m->errorOut(e, "SharedRAbundVector", "push_front");
335                 exit(1);
336         }
337 }
338
339 /***********************************************************************/
340 void SharedRAbundVector::pop_back(){
341         numSeqs -= data[data.size()-1].abundance;
342         numBins--;
343         return data.pop_back();
344 }
345
346 /***********************************************************************/
347
348
349 vector<individual>::reverse_iterator SharedRAbundVector::rbegin(){
350         return data.rbegin();                           
351 }
352
353 /***********************************************************************/
354
355 vector<individual>::reverse_iterator SharedRAbundVector::rend(){
356         return data.rend();                                     
357 }
358
359 /***********************************************************************/
360 void SharedRAbundVector::resize(int size){
361         
362         data.resize(size);
363 }
364
365 /***********************************************************************/
366
367 int SharedRAbundVector::size(){
368         return data.size();
369 }
370
371
372 /***********************************************************************/
373 void SharedRAbundVector::printHeaders(ostream& output){
374         try {
375                 string snumBins = toString(numBins);
376                 output << "label\tGroup\tnumOtus\t";
377                 if (m->sharedHeaderMode == "tax") {
378                         for (int i = 0; i < numBins; i++) {  
379                                 
380                                 //if there is a bin label use it otherwise make one
381                                 string binLabel = "PhyloType";
382                                 string sbinNumber = toString(i+1);
383                                 if (sbinNumber.length() < snumBins.length()) { 
384                                         int diff = snumBins.length() - sbinNumber.length();
385                                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
386                                 }
387                                 binLabel += sbinNumber;
388                                 if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
389                                 
390                                 output << binLabel << '\t'; 
391                         }
392                         output << endl;
393                 }else {
394                         for (int i = 0; i < numBins; i++) {  
395                                 //if there is a bin label use it otherwise make one
396                                 string binLabel = "Otu";
397                                 string sbinNumber = toString(i+1);
398                                 if (sbinNumber.length() < snumBins.length()) { 
399                                         int diff = snumBins.length() - sbinNumber.length();
400                                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
401                                 }
402                                 binLabel += sbinNumber;
403                                 if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
404                                 
405                                 output << binLabel << '\t'; 
406                         }
407                         
408                         output << endl;
409                 }
410                 m->printedHeaders = true;
411         }
412         catch(exception& e) {
413                 m->errorOut(e, "SharedRAbundVector", "printHeaders");
414                 exit(1);
415         }
416 }
417 /***********************************************************************/
418 void SharedRAbundVector::print(ostream& output) {
419         try {
420                 output << numBins << '\t';
421         
422                 for(int i=0;i<data.size();i++){         output << data[i].abundance << '\t';            }
423                 output << endl;
424         }
425         catch(exception& e) {
426                 m->errorOut(e, "SharedRAbundVector", "print");
427                 exit(1);
428         }
429 }
430 /***********************************************************************/
431 string SharedRAbundVector::getGroup(){
432         return group;
433 }
434
435 /***********************************************************************/
436
437 void SharedRAbundVector::setGroup(string groupName){
438         group = groupName;
439 }
440 /***********************************************************************/
441 int SharedRAbundVector::getGroupIndex()  { return index; }
442 /***********************************************************************/
443 void SharedRAbundVector::setGroupIndex(int vIndex)      { index = vIndex; }
444 /***********************************************************************/
445 int SharedRAbundVector::getNumBins(){
446                 return numBins;
447 }
448
449 /***********************************************************************/
450
451 int SharedRAbundVector::getNumSeqs(){
452         return numSeqs;
453 }
454
455 /***********************************************************************/
456
457 int SharedRAbundVector::getMaxRank(){
458         return maxRank;
459 }
460 /***********************************************************************/
461
462 SharedRAbundVector SharedRAbundVector::getSharedRAbundVector(){
463         return *this;                   
464 }
465 /***********************************************************************/
466 vector<SharedRAbundVector*> SharedRAbundVector::getSharedRAbundVectors(){
467         try {
468                 SharedUtil* util;
469                 util = new SharedUtil();
470                 
471                 vector<string> Groups = m->getGroups();
472                 vector<string> allGroups = m->getAllGroups();
473                 util->setGroups(Groups, allGroups);
474                 m->setGroups(Groups);
475                 
476                 bool remove = false;
477                 for (int i = 0; i < lookup.size(); i++) {
478                         //if this sharedrabund is not from a group the user wants then delete it.
479                         if (util->isValidGroup(lookup[i]->getGroup(), m->getGroups()) == false) { 
480                                 remove = true;
481                                 delete lookup[i]; lookup[i] = NULL;
482                                 lookup.erase(lookup.begin()+i); 
483                                 i--; 
484                         }
485                 }
486                 
487                 delete util;
488                 
489                 if (remove) { eliminateZeroOTUS(lookup); }
490         
491                 return lookup;
492         }
493         catch(exception& e) {
494                 m->errorOut(e, "SharedRAbundVector", "getSharedRAbundVectors");
495                 exit(1);
496         }
497 }
498 //**********************************************************************************************************************
499 int SharedRAbundVector::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
500                 try {
501                         
502                         vector<SharedRAbundVector*> newLookup;
503                         for (int i = 0; i < thislookup.size(); i++) {
504                                 SharedRAbundVector* temp = new SharedRAbundVector();
505                                 temp->setLabel(thislookup[i]->getLabel());
506                                 temp->setGroup(thislookup[i]->getGroup());
507                                 newLookup.push_back(temp);
508                         }
509                         
510                         //for each bin
511                         vector<string> newBinLabels;
512                         string snumBins = toString(thislookup[0]->getNumBins());
513                         for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
514                                 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
515                                 
516                                 //look at each sharedRabund and make sure they are not all zero
517                                 bool allZero = true;
518                                 for (int j = 0; j < thislookup.size(); j++) {
519                                         if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
520                                 }
521                                 
522                                 //if they are not all zero add this bin
523                                 if (!allZero) {
524                                         for (int j = 0; j < thislookup.size(); j++) {
525                                                 newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
526                                         }
527                                         
528                                         //if there is a bin label use it otherwise make one
529                                         string binLabel = "Otu";
530                                         string sbinNumber = toString(i+1);
531                                         if (sbinNumber.length() < snumBins.length()) { 
532                                                 int diff = snumBins.length() - sbinNumber.length();
533                                                 for (int h = 0; h < diff; h++) { binLabel += "0"; }
534                                         }
535                                         binLabel += sbinNumber; 
536                                         if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
537                                         
538                                         newBinLabels.push_back(binLabel);
539                                 }
540                         }
541                         
542                         for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
543                         
544                         thislookup = newLookup;
545                         m->currentBinLabels = newBinLabels;
546                         
547                         return 0;
548                         
549                 }
550                 catch(exception& e) {
551                         m->errorOut(e, "SharedRAbundVector", "eliminateZeroOTUS");
552                         exit(1);
553                 }
554         }
555         
556 /***********************************************************************/
557 vector<SharedRAbundFloatVector*> SharedRAbundVector::getSharedRAbundFloatVectors(vector<SharedRAbundVector*> thislookup){
558         try {
559                 vector<SharedRAbundFloatVector*> newLookupFloat;        
560                 for (int i = 0; i < lookup.size(); i++) {
561                         SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
562                         temp->setLabel(thislookup[i]->getLabel());
563                         temp->setGroup(thislookup[i]->getGroup());
564                         newLookupFloat.push_back(temp);
565                 }
566                 
567                 for (int i = 0; i < thislookup.size(); i++) {
568                         
569                         for (int j = 0; j < thislookup[i]->getNumBins(); j++) {
570                                 
571                                 if (m->control_pressed) { return newLookupFloat; }
572                                 
573                                 int abund = thislookup[i]->getAbundance(j);
574                                 
575                                 float relabund = abund / (float) thislookup[i]->getNumSeqs();
576                                 
577                                 newLookupFloat[i]->push_back(relabund, thislookup[i]->getGroup());
578                         }
579                 }
580                 
581                 return newLookupFloat;
582         }
583         catch(exception& e) {
584                 m->errorOut(e, "SharedRAbundVector", "getSharedRAbundVectors");
585                 exit(1);
586         }
587 }
588 /***********************************************************************/
589
590 RAbundVector SharedRAbundVector::getRAbundVector() {
591         try {
592                 RAbundVector rav;
593                 
594                 for (int i = 0; i < data.size(); i++) {
595                         if(data[i].abundance != 0) {
596                                 rav.push_back(data[i].abundance);
597                         }
598                 }
599                 
600                 rav.setLabel(label);
601                 return rav;
602         }
603         catch(exception& e) {
604                 m->errorOut(e, "SharedRAbundVector", "getRAbundVector");
605                 exit(1);
606         }
607 }
608 /***********************************************************************/
609
610 RAbundVector SharedRAbundVector::getRAbundVector2() {
611         try {
612                 RAbundVector rav;
613                 for(int i = 0; i < numBins; i++)
614                         if(data[i].abundance != 0)
615                                 rav.push_back(data[i].abundance-1);
616                 return rav;
617         }
618         catch(exception& e) {
619                 m->errorOut(e, "SharedRAbundVector", "getRAbundVector2");
620                 exit(1);
621         }
622 }
623 /***********************************************************************/
624
625 SharedSAbundVector SharedRAbundVector::getSharedSAbundVector(){
626         try {
627                 SharedSAbundVector sav(maxRank+1);
628                 
629                 for(int i=0;i<data.size();i++){
630                         int abund = data[i].abundance;
631                         sav.set(abund, sav.getAbundance(abund) + 1, group);
632                 }
633                 
634                 sav.set(0, 0, group);
635                 sav.setLabel(label);
636                 sav.setGroup(group);
637                 
638                 return sav;
639         }
640         catch(exception& e) {
641                 m->errorOut(e, "SharedRAbundVector", "getSharedSAbundVector");
642                 exit(1);
643         }
644 }
645 /***********************************************************************/
646
647 SAbundVector SharedRAbundVector::getSAbundVector() {
648         try {
649                 SAbundVector sav(maxRank+1);
650                 
651                 for(int i=0;i<data.size();i++){
652                         int abund = data[i].abundance;
653                         sav.set(abund, sav.get(abund) + 1);
654                 }
655                 sav.set(0, 0);
656                 sav.setLabel(label);
657                 return sav;
658         }
659         catch(exception& e) {
660                 m->errorOut(e, "SharedRAbundVector", "getSAbundVector");                
661                 exit(1);
662         }
663 }
664
665 /***********************************************************************/
666
667 SharedOrderVector SharedRAbundVector::getSharedOrderVector() {
668         try {
669                 SharedOrderVector ov;
670         
671                 for(int i=0;i<data.size();i++){
672                         for(int j=0;j<data[i].abundance;j++){
673                                 ov.push_back(data[i].bin, data[i].abundance, data[i].group);
674                         }
675                 }
676                 random_shuffle(ov.begin(), ov.end());
677
678                 ov.setLabel(label);     
679                 ov.updateStats();
680                 
681                 return ov;
682         }
683         catch(exception& e) {
684                 m->errorOut(e, "SharedRAbundVector", "getSharedOrderVector");
685                 exit(1);
686         }
687 }
688 /***********************************************************************/
689
690 OrderVector SharedRAbundVector::getOrderVector(map<string,int>* nameMap = NULL) {
691         try {
692                 OrderVector ov;
693                 for(int i=0;i<numBins;i++){
694                         for(int j=0;j<data[i].abundance;j++){
695                                 ov.push_back(i);
696                         }
697                 }
698                 random_shuffle(ov.begin(), ov.end());
699                 
700                 ov.setLabel(label);     
701
702                 return ov;
703         }
704         catch(exception& e) {
705                 m->errorOut(e, "SharedRAbundVector", "getOrderVector");
706                 exit(1);
707         }
708 }
709
710 /***********************************************************************/
711