]> git.donarmstrong.com Git - mothur.git/blob - sharedrabundvector.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / sharedrabundvector.cpp
1 /*
2  *  sharedvector.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 12/5/08.
6  *  Copyright 2008 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "sharedrabundvector.h" 
11 #include "sabundvector.hpp"
12 #include "ordervector.hpp"
13 #include "sharedutilities.h"
14
15
16 /***********************************************************************/
17 SharedRAbundVector::SharedRAbundVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0) {} 
18 /***********************************************************************/
19
20 SharedRAbundVector::~SharedRAbundVector() {
21         //for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
22
23 }
24
25 /***********************************************************************/
26
27 SharedRAbundVector::SharedRAbundVector(int n) : DataVector(), maxRank(0), numBins(n), numSeqs(0) {
28                 individual newGuy;
29                 //initialize data
30                 for (int i=0; i< n; i++) {
31                         newGuy.bin = i;
32                         newGuy.abundance = 0;
33                         data.push_back(newGuy);
34                 }
35 }
36
37 /***********************************************************************
38
39 SharedRAbundVector::SharedRAbundVector(string id, vector<individual> rav) : DataVector(id), data(rav) {
40         try {
41                 numBins = 0;
42                 maxRank = 0;
43                 numSeqs = 0;
44                 
45                 for(int i=0;i<data.size();i++){
46                         if(data[i].abundance != 0)              {       numBins = i+1;          }
47                         if(data[i].abundance > maxRank) {       maxRank = data[i].abundance;    }
48                         numSeqs += data[i].abundance;
49                 }
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "SharedRAbundVector", "SharedRAbundVector");
53                 exit(1);
54         }
55 }
56
57
58 ***********************************************************************/
59 //reads a shared file
60 SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) {
61         try {
62                 m->clearAllGroups();
63                 vector<string> allGroups;
64                 
65                 int num, inputData, count;
66                 count = 0;  
67                 string holdLabel, nextLabel, groupN;
68                 individual newguy;
69                 
70                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }  lookup.clear();
71                 
72                 //are we at the beginning of the file??
73                 if (m->saveNextLabel == "") {  
74                         f >> label; 
75         
76                         //is this a shared file that has headers
77                         if (label == "label") { 
78                                 //gets "group"
79                                 f >> label; m->gobble(f);
80                                 
81                                 //gets "numOtus"
82                                 f >> label; m->gobble(f);
83                                 
84                                 //eat rest of line
85                                 label = m->getline(f); m->gobble(f);
86                                 
87                                 //parse labels to save
88                                 istringstream iStringStream(label);
89                                 m->binLabelsInFile.clear();
90                                 while(!iStringStream.eof()){
91                                         if (m->control_pressed) { break; }
92                                         string temp;
93                                         iStringStream >> temp;  m->gobble(iStringStream);
94                 
95                                         m->binLabelsInFile.push_back(temp);
96                                 }
97                                 
98                                 f >> label >> groupN >> num;
99                         }else {
100                 //read in first row since you know there is at least 1 group.
101                 f >> groupN >> num;
102                 
103                 //make binlabels because we don't have any
104                 string snumBins = toString(num);
105                 m->binLabelsInFile.clear();
106                 for (int i = 0; i < num; i++) {  
107                     //if there is a bin label use it otherwise make one
108                     string binLabel = "Otu";
109                     string sbinNumber = toString(i+1);
110                     if (sbinNumber.length() < snumBins.length()) { 
111                         int diff = snumBins.length() - sbinNumber.length();
112                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
113                     }
114                     binLabel += sbinNumber;
115                     m->binLabelsInFile.push_back(binLabel);
116                 }
117             }
118                 }else { 
119             label = m->saveNextLabel; 
120             
121             //read in first row since you know there is at least 1 group.
122             f >> groupN >> num;
123         }
124                 
125                 //reset labels, currentLabels may have gotten changed as otus were eliminated because of group choices or sampling
126                 m->currentBinLabels = m->binLabelsInFile;
127                 
128                 holdLabel = label;
129                 
130                 //add new vector to lookup
131                 SharedRAbundVector* temp = new SharedRAbundVector();
132                 lookup.push_back(temp);
133                 lookup[0]->setLabel(label);
134                 lookup[0]->setGroup(groupN);
135                 
136                 allGroups.push_back(groupN);
137                 
138                 //fill vector.  data = first sharedrabund in file
139                 for(int i=0;i<num;i++){
140                         f >> inputData;
141                         
142                         lookup[0]->push_back(inputData, groupN); //abundance, bin, group
143                         push_back(inputData, groupN);
144                         
145                         if (inputData > maxRank) { maxRank = inputData; }
146                 }
147                 
148                 m->gobble(f);
149                 
150                 if (!(f.eof())) { f >> nextLabel; }
151         
152                 //read the rest of the groups info in
153                 while ((nextLabel == holdLabel) && (f.eof() != true)) {
154                         f >> groupN >> num;
155                         count++;
156                         
157                         allGroups.push_back(groupN);
158                         
159                         //add new vector to lookup
160                         temp = new SharedRAbundVector();
161                         lookup.push_back(temp);
162                         lookup[count]->setLabel(label);
163                         lookup[count]->setGroup(groupN);
164
165                         //fill vector.  
166                         for(int i=0;i<num;i++){
167                                 f >> inputData;
168                                 
169                                 lookup[count]->push_back(inputData, groupN); //abundance, bin, group
170                         }
171                         
172                         m->gobble(f);
173                                 
174                         if (f.eof() != true) { f >> nextLabel; }
175                 }
176                 m->saveNextLabel = nextLabel;
177                 m->setAllGroups(allGroups);
178         }
179         catch(exception& e) {
180                 m->errorOut(e, "SharedRAbundVector", "SharedRAbundVector");
181                 exit(1);
182         }
183 }
184
185 /***********************************************************************/
186
187 void SharedRAbundVector::set(int binNumber, int newBinSize, string groupname){
188         try {
189                 int oldBinSize = data[binNumber].abundance;
190                 data[binNumber].abundance = newBinSize;
191                 data[binNumber].group = groupname;
192         
193                 if(newBinSize > maxRank)        {       maxRank = newBinSize;   }
194         
195                 numSeqs += (newBinSize - oldBinSize);
196         }
197         catch(exception& e) {
198                 m->errorOut(e, "SharedRAbundVector", "set");
199                 exit(1);
200         }
201 }
202 /***********************************************************************/
203
204 void SharedRAbundVector::setData(vector <individual> newData){
205         data = newData;
206 }
207
208 /***********************************************************************/
209
210 int SharedRAbundVector::getAbundance(int index){
211         return data[index].abundance;
212         
213 }
214 /***********************************************************************/
215
216 int SharedRAbundVector::numNZ(){
217         int sum = 0;
218         for(int i = 1; i < numBins; i++)
219                 if(data[i].abundance > 0)
220                         sum++;
221         return sum;
222 }
223 /***********************************************************************/
224
225 void SharedRAbundVector::sortD(){
226         struct individual indObj;
227         sort(data.begin()+1, data.end(), indObj);
228 }
229 /***********************************************************************/
230
231 individual SharedRAbundVector::get(int index){
232         return data[index];
233         
234 }
235 /***********************************************************************/
236
237 vector <individual> SharedRAbundVector::getData(){
238         return data;
239 }
240 /***********************************************************************/
241
242 void SharedRAbundVector::clear(){
243         numBins = 0;
244         maxRank = 0;
245         numSeqs = 0;
246         data.clear();
247         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
248         lookup.clear();
249 }
250 /***********************************************************************/
251
252 void SharedRAbundVector::push_back(int binSize, string groupName){
253         try {
254                 individual newGuy;
255                 newGuy.abundance = binSize;
256                 newGuy.group = groupName;
257                 newGuy.bin = data.size();
258                 
259                 data.push_back(newGuy);
260                 numBins++;
261         
262                 if(binSize > maxRank){
263                         maxRank = binSize;
264                 }
265         
266                 numSeqs += binSize;
267         }
268         catch(exception& e) {
269                 m->errorOut(e, "SharedRAbundVector", "push_back");
270                 exit(1);
271         }
272 }
273
274 /***********************************************************************/
275
276 void SharedRAbundVector::insert(int binSize, int otu, string groupName){
277         try {
278                 individual newGuy;
279                 newGuy.abundance = binSize;
280                 newGuy.group = groupName;
281                 newGuy.bin = otu;
282                 
283                 data.insert(data.begin()+otu, newGuy);
284                 numBins++;
285         
286                 if(binSize > maxRank){
287                         maxRank = binSize;
288                 }
289         
290                 numSeqs += binSize;
291         }
292         catch(exception& e) {
293                 m->errorOut(e, "SharedRAbundVector", "insert");
294                 exit(1);
295         }
296 }
297
298 /***********************************************************************/
299
300 void SharedRAbundVector::push_front(int binSize, int otu, string groupName){
301         try {
302                 individual newGuy;
303                 newGuy.abundance = binSize;
304                 newGuy.group = groupName;
305                 newGuy.bin = otu;
306                 
307                 data.insert(data.begin(), newGuy);
308                 numBins++;
309         
310                 if(binSize > maxRank){
311                         maxRank = binSize;
312                 }
313         
314                 numSeqs += binSize;
315         }
316         catch(exception& e) {
317                 m->errorOut(e, "SharedRAbundVector", "push_front");
318                 exit(1);
319         }
320 }
321
322 /***********************************************************************/
323 void SharedRAbundVector::pop_back(){
324         numSeqs -= data[data.size()-1].abundance;
325         numBins--;
326         return data.pop_back();
327 }
328
329 /***********************************************************************/
330
331
332 vector<individual>::reverse_iterator SharedRAbundVector::rbegin(){
333         return data.rbegin();                           
334 }
335
336 /***********************************************************************/
337
338 vector<individual>::reverse_iterator SharedRAbundVector::rend(){
339         return data.rend();                                     
340 }
341
342 /***********************************************************************/
343 void SharedRAbundVector::resize(int size){
344         
345         data.resize(size);
346 }
347
348 /***********************************************************************/
349
350 int SharedRAbundVector::size(){
351         return data.size();
352 }
353
354
355 /***********************************************************************/
356 void SharedRAbundVector::printHeaders(ostream& output){
357         try {
358                 string snumBins = toString(numBins);
359                 output << "label\tGroup\tnumOtus\t";
360                 if (m->sharedHeaderMode == "tax") {
361                         for (int i = 0; i < numBins; i++) {  
362                                 
363                                 //if there is a bin label use it otherwise make one
364                                 string binLabel = "PhyloType";
365                                 string sbinNumber = toString(i+1);
366                                 if (sbinNumber.length() < snumBins.length()) { 
367                                         int diff = snumBins.length() - sbinNumber.length();
368                                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
369                                 }
370                                 binLabel += sbinNumber;
371                                 if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
372                                 
373                                 output << binLabel << '\t'; 
374                         }
375                         output << endl;
376                 }else {
377                         for (int i = 0; i < numBins; i++) {  
378                                 //if there is a bin label use it otherwise make one
379                                 string binLabel = "Otu";
380                                 string sbinNumber = toString(i+1);
381                                 if (sbinNumber.length() < snumBins.length()) { 
382                                         int diff = snumBins.length() - sbinNumber.length();
383                                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
384                                 }
385                                 binLabel += sbinNumber;
386                                 if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
387                                 
388                                 output << binLabel << '\t'; 
389                         }
390                         
391                         output << endl;
392                 }
393                 m->printedHeaders = true;
394         }
395         catch(exception& e) {
396                 m->errorOut(e, "SharedRAbundVector", "printHeaders");
397                 exit(1);
398         }
399 }
400 /***********************************************************************/
401 void SharedRAbundVector::print(ostream& output) {
402         try {
403                 output << numBins << '\t';
404         
405                 for(int i=0;i<data.size();i++){         output << data[i].abundance << '\t';            }
406                 output << endl;
407         }
408         catch(exception& e) {
409                 m->errorOut(e, "SharedRAbundVector", "print");
410                 exit(1);
411         }
412 }
413 /***********************************************************************/
414 string SharedRAbundVector::getGroup(){
415         return group;
416 }
417
418 /***********************************************************************/
419
420 void SharedRAbundVector::setGroup(string groupName){
421         group = groupName;
422 }
423 /***********************************************************************/
424 int SharedRAbundVector::getGroupIndex()  { return index; }
425 /***********************************************************************/
426 void SharedRAbundVector::setGroupIndex(int vIndex)      { index = vIndex; }
427 /***********************************************************************/
428 int SharedRAbundVector::getNumBins(){
429                 return numBins;
430 }
431
432 /***********************************************************************/
433
434 int SharedRAbundVector::getNumSeqs(){
435         return numSeqs;
436 }
437
438 /***********************************************************************/
439
440 int SharedRAbundVector::getMaxRank(){
441         return maxRank;
442 }
443 /***********************************************************************/
444
445 SharedRAbundVector SharedRAbundVector::getSharedRAbundVector(){
446         return *this;                   
447 }
448 /***********************************************************************/
449 vector<SharedRAbundVector*> SharedRAbundVector::getSharedRAbundVectors(){
450         try {
451                 SharedUtil* util;
452                 util = new SharedUtil();
453                 
454                 vector<string> Groups = m->getGroups();
455                 vector<string> allGroups = m->getAllGroups();
456                 util->setGroups(Groups, allGroups);
457                 m->setGroups(Groups);
458                 
459                 bool remove = false;
460                 for (int i = 0; i < lookup.size(); i++) {
461                         //if this sharedrabund is not from a group the user wants then delete it.
462                         if (util->isValidGroup(lookup[i]->getGroup(), m->getGroups()) == false) { 
463                                 remove = true;
464                                 delete lookup[i]; lookup[i] = NULL;
465                                 lookup.erase(lookup.begin()+i); 
466                                 i--; 
467                         }
468                 }
469                 
470                 delete util;
471                 
472                 if (remove) { eliminateZeroOTUS(lookup); }
473         
474                 return lookup;
475         }
476         catch(exception& e) {
477                 m->errorOut(e, "SharedRAbundVector", "getSharedRAbundVectors");
478                 exit(1);
479         }
480 }
481 //**********************************************************************************************************************
482 int SharedRAbundVector::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
483                 try {
484                         
485                         vector<SharedRAbundVector*> newLookup;
486                         for (int i = 0; i < thislookup.size(); i++) {
487                                 SharedRAbundVector* temp = new SharedRAbundVector();
488                                 temp->setLabel(thislookup[i]->getLabel());
489                                 temp->setGroup(thislookup[i]->getGroup());
490                                 newLookup.push_back(temp);
491                         }
492                         
493                         //for each bin
494                         vector<string> newBinLabels;
495                         string snumBins = toString(thislookup[0]->getNumBins());
496                         for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
497                                 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
498                                 
499                                 //look at each sharedRabund and make sure they are not all zero
500                                 bool allZero = true;
501                                 for (int j = 0; j < thislookup.size(); j++) {
502                                         if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
503                                 }
504                                 
505                                 //if they are not all zero add this bin
506                                 if (!allZero) {
507                                         for (int j = 0; j < thislookup.size(); j++) {
508                                                 newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
509                                         }
510                                         
511                                         //if there is a bin label use it otherwise make one
512                                         string binLabel = "Otu";
513                                         string sbinNumber = toString(i+1);
514                                         if (sbinNumber.length() < snumBins.length()) { 
515                                                 int diff = snumBins.length() - sbinNumber.length();
516                                                 for (int h = 0; h < diff; h++) { binLabel += "0"; }
517                                         }
518                                         binLabel += sbinNumber; 
519                                         if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
520                                         
521                                         newBinLabels.push_back(binLabel);
522                                 }
523                         }
524                         
525                         for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
526                         
527                         thislookup = newLookup;
528                         m->currentBinLabels = newBinLabels;
529                         
530                         return 0;
531                         
532                 }
533                 catch(exception& e) {
534                         m->errorOut(e, "SharedRAbundVector", "eliminateZeroOTUS");
535                         exit(1);
536                 }
537         }
538         
539 /***********************************************************************/
540 vector<SharedRAbundFloatVector*> SharedRAbundVector::getSharedRAbundFloatVectors(vector<SharedRAbundVector*> thislookup){
541         try {
542                 vector<SharedRAbundFloatVector*> newLookupFloat;        
543                 for (int i = 0; i < lookup.size(); i++) {
544                         SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
545                         temp->setLabel(thislookup[i]->getLabel());
546                         temp->setGroup(thislookup[i]->getGroup());
547                         newLookupFloat.push_back(temp);
548                 }
549                 
550                 for (int i = 0; i < thislookup.size(); i++) {
551                         
552                         for (int j = 0; j < thislookup[i]->getNumBins(); j++) {
553                                 
554                                 if (m->control_pressed) { return newLookupFloat; }
555                                 
556                                 int abund = thislookup[i]->getAbundance(j);
557                                 
558                                 float relabund = abund / (float) thislookup[i]->getNumSeqs();
559                                 
560                                 newLookupFloat[i]->push_back(relabund, thislookup[i]->getGroup());
561                         }
562                 }
563                 
564                 return newLookupFloat;
565         }
566         catch(exception& e) {
567                 m->errorOut(e, "SharedRAbundVector", "getSharedRAbundVectors");
568                 exit(1);
569         }
570 }
571 /***********************************************************************/
572
573 RAbundVector SharedRAbundVector::getRAbundVector() {
574         try {
575                 RAbundVector rav;
576                 
577                 for (int i = 0; i < data.size(); i++) {
578                         if(data[i].abundance != 0) {
579                                 rav.push_back(data[i].abundance);
580                         }
581                 }
582                 
583                 rav.setLabel(label);
584                 return rav;
585         }
586         catch(exception& e) {
587                 m->errorOut(e, "SharedRAbundVector", "getRAbundVector");
588                 exit(1);
589         }
590 }
591 /***********************************************************************/
592
593 RAbundVector SharedRAbundVector::getRAbundVector2() {
594         try {
595                 RAbundVector rav;
596                 for(int i = 0; i < numBins; i++)
597                         if(data[i].abundance != 0)
598                                 rav.push_back(data[i].abundance-1);
599                 return rav;
600         }
601         catch(exception& e) {
602                 m->errorOut(e, "SharedRAbundVector", "getRAbundVector2");
603                 exit(1);
604         }
605 }
606 /***********************************************************************/
607
608 SharedSAbundVector SharedRAbundVector::getSharedSAbundVector(){
609         try {
610                 SharedSAbundVector sav(maxRank+1);
611                 
612                 for(int i=0;i<data.size();i++){
613                         int abund = data[i].abundance;
614                         sav.set(abund, sav.getAbundance(abund) + 1, group);
615                 }
616                 
617                 sav.set(0, 0, group);
618                 sav.setLabel(label);
619                 sav.setGroup(group);
620                 
621                 return sav;
622         }
623         catch(exception& e) {
624                 m->errorOut(e, "SharedRAbundVector", "getSharedSAbundVector");
625                 exit(1);
626         }
627 }
628 /***********************************************************************/
629
630 SAbundVector SharedRAbundVector::getSAbundVector() {
631         try {
632                 SAbundVector sav(maxRank+1);
633                 
634                 for(int i=0;i<data.size();i++){
635                         int abund = data[i].abundance;
636                         sav.set(abund, sav.get(abund) + 1);
637                 }
638                 sav.set(0, 0);
639                 sav.setLabel(label);
640                 return sav;
641         }
642         catch(exception& e) {
643                 m->errorOut(e, "SharedRAbundVector", "getSAbundVector");                
644                 exit(1);
645         }
646 }
647
648 /***********************************************************************/
649
650 SharedOrderVector SharedRAbundVector::getSharedOrderVector() {
651         try {
652                 SharedOrderVector ov;
653         
654                 for(int i=0;i<data.size();i++){
655                         for(int j=0;j<data[i].abundance;j++){
656                                 ov.push_back(data[i].bin, data[i].abundance, data[i].group);
657                         }
658                 }
659                 random_shuffle(ov.begin(), ov.end());
660
661                 ov.setLabel(label);     
662                 ov.updateStats();
663                 
664                 return ov;
665         }
666         catch(exception& e) {
667                 m->errorOut(e, "SharedRAbundVector", "getSharedOrderVector");
668                 exit(1);
669         }
670 }
671 /***********************************************************************/
672
673 OrderVector SharedRAbundVector::getOrderVector(map<string,int>* nameMap = NULL) {
674         try {
675                 OrderVector ov;
676                 for(int i=0;i<numBins;i++){
677                         for(int j=0;j<data[i].abundance;j++){
678                                 ov.push_back(i);
679                         }
680                 }
681                 random_shuffle(ov.begin(), ov.end());
682                 
683                 ov.setLabel(label);     
684
685                 return ov;
686         }
687         catch(exception& e) {
688                 m->errorOut(e, "SharedRAbundVector", "getOrderVector");
689                 exit(1);
690         }
691 }
692
693 /***********************************************************************/
694