]> git.donarmstrong.com Git - mothur.git/blob - sharedrabundvector.cpp
fixed bug with trim.flows that was adding flow files names to the .flow.files file...
[mothur.git] / sharedrabundvector.cpp
1 /*
2  *  sharedvector.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 12/5/08.
6  *  Copyright 2008 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "sharedrabundvector.h" 
11 #include "sabundvector.hpp"
12 #include "ordervector.hpp"
13 #include "sharedutilities.h"
14
15
16 /***********************************************************************/
17 SharedRAbundVector::SharedRAbundVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0) {} 
18 /***********************************************************************/
19
20 SharedRAbundVector::~SharedRAbundVector() {
21         //for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
22
23 }
24
25 /***********************************************************************/
26
27 SharedRAbundVector::SharedRAbundVector(int n) : DataVector(), maxRank(0), numBins(n), numSeqs(0) {
28                 individual newGuy;
29                 //initialize data
30                 for (int i=0; i< n; i++) {
31                         newGuy.bin = i;
32                         newGuy.abundance = 0;
33                         data.push_back(newGuy);
34                 }
35 }
36
37 /***********************************************************************
38
39 SharedRAbundVector::SharedRAbundVector(string id, vector<individual> rav) : DataVector(id), data(rav) {
40         try {
41                 numBins = 0;
42                 maxRank = 0;
43                 numSeqs = 0;
44                 
45                 for(int i=0;i<data.size();i++){
46                         if(data[i].abundance != 0)              {       numBins = i+1;          }
47                         if(data[i].abundance > maxRank) {       maxRank = data[i].abundance;    }
48                         numSeqs += data[i].abundance;
49                 }
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "SharedRAbundVector", "SharedRAbundVector");
53                 exit(1);
54         }
55 }
56
57
58 /***********************************************************************/
59 //reads a shared file
60 SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) {
61         try {
62                 m->clearAllGroups();
63                 vector<string> allGroups;
64                 
65                 int num, inputData, count;
66                 count = 0;  
67                 string holdLabel, nextLabel, groupN;
68                 individual newguy;
69                 
70                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }  lookup.clear();
71                 
72                 //are we at the beginning of the file??
73                 if (m->saveNextLabel == "") {  
74                         f >> label; 
75         
76                         //is this a shared file that has headers
77                         if (label == "label") { 
78                                 //gets "group"
79                                 f >> label; m->gobble(f);
80                                 
81                                 //gets "numOtus"
82                                 f >> label; m->gobble(f);
83                                 
84                                 //eat rest of line
85                                 label = m->getline(f); m->gobble(f);
86                                 
87                                 //parse labels to save
88                                 istringstream iStringStream(label);
89                                 m->binLabelsInFile.clear();
90                                 while(!iStringStream.eof()){
91                                         if (m->control_pressed) { break; }
92                                         string temp;
93                                         iStringStream >> temp;  m->gobble(iStringStream);
94                 
95                                         m->binLabelsInFile.push_back(temp);
96                                 }
97                                 
98                                 f >> label;
99                         }
100                 }else { label = m->saveNextLabel; }
101                 
102                 //reset labels, currentLabels may have gotten changed as otus were eliminated because of group choices or sampling
103                 m->currentBinLabels = m->binLabelsInFile;
104                 
105                 //read in first row since you know there is at least 1 group.
106                 f >> groupN >> num;
107                 holdLabel = label;
108                 
109                 //add new vector to lookup
110                 SharedRAbundVector* temp = new SharedRAbundVector();
111                 lookup.push_back(temp);
112                 lookup[0]->setLabel(label);
113                 lookup[0]->setGroup(groupN);
114                 
115                 allGroups.push_back(groupN);
116                 
117                 //fill vector.  data = first sharedrabund in file
118                 for(int i=0;i<num;i++){
119                         f >> inputData;
120                         
121                         lookup[0]->push_back(inputData, groupN); //abundance, bin, group
122                         push_back(inputData, groupN);
123                         
124                         if (inputData > maxRank) { maxRank = inputData; }
125                 }
126                 
127                 m->gobble(f);
128                 
129                 if (!(f.eof())) { f >> nextLabel; }
130         
131                 //read the rest of the groups info in
132                 while ((nextLabel == holdLabel) && (f.eof() != true)) {
133                         f >> groupN >> num;
134                         count++;
135                         
136                         allGroups.push_back(groupN);
137                         
138                         //add new vector to lookup
139                         temp = new SharedRAbundVector();
140                         lookup.push_back(temp);
141                         lookup[count]->setLabel(label);
142                         lookup[count]->setGroup(groupN);
143
144                         //fill vector.  
145                         for(int i=0;i<num;i++){
146                                 f >> inputData;
147                                 
148                                 lookup[count]->push_back(inputData, groupN); //abundance, bin, group
149                         }
150                         
151                         m->gobble(f);
152                                 
153                         if (f.eof() != true) { f >> nextLabel; }
154                 }
155                 m->saveNextLabel = nextLabel;
156                 m->setAllGroups(allGroups);
157         }
158         catch(exception& e) {
159                 m->errorOut(e, "SharedRAbundVector", "SharedRAbundVector");
160                 exit(1);
161         }
162 }
163
164 /***********************************************************************/
165
166 void SharedRAbundVector::set(int binNumber, int newBinSize, string groupname){
167         try {
168                 int oldBinSize = data[binNumber].abundance;
169                 data[binNumber].abundance = newBinSize;
170                 data[binNumber].group = groupname;
171         
172                 if(newBinSize > maxRank)        {       maxRank = newBinSize;   }
173         
174                 numSeqs += (newBinSize - oldBinSize);
175         }
176         catch(exception& e) {
177                 m->errorOut(e, "SharedRAbundVector", "set");
178                 exit(1);
179         }
180 }
181 /***********************************************************************/
182
183 void SharedRAbundVector::setData(vector <individual> newData){
184         data = newData;
185 }
186
187 /***********************************************************************/
188
189 int SharedRAbundVector::getAbundance(int index){
190         return data[index].abundance;
191         
192 }
193 /***********************************************************************/
194
195 int SharedRAbundVector::numNZ(){
196         int sum = 0;
197         for(int i = 1; i < numBins; i++)
198                 if(data[i].abundance > 0)
199                         sum++;
200         return sum;
201 }
202 /***********************************************************************/
203
204 void SharedRAbundVector::sortD(){
205         struct individual indObj;
206         sort(data.begin()+1, data.end(), indObj);
207 }
208 /***********************************************************************/
209
210 individual SharedRAbundVector::get(int index){
211         return data[index];
212         
213 }
214 /***********************************************************************/
215
216 vector <individual> SharedRAbundVector::getData(){
217         return data;
218 }
219 /***********************************************************************/
220
221 void SharedRAbundVector::clear(){
222         numBins = 0;
223         maxRank = 0;
224         numSeqs = 0;
225         data.clear();
226         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
227         lookup.clear();
228 }
229 /***********************************************************************/
230
231 void SharedRAbundVector::push_back(int binSize, string groupName){
232         try {
233                 individual newGuy;
234                 newGuy.abundance = binSize;
235                 newGuy.group = groupName;
236                 newGuy.bin = data.size();
237                 
238                 data.push_back(newGuy);
239                 numBins++;
240         
241                 if(binSize > maxRank){
242                         maxRank = binSize;
243                 }
244         
245                 numSeqs += binSize;
246         }
247         catch(exception& e) {
248                 m->errorOut(e, "SharedRAbundVector", "push_back");
249                 exit(1);
250         }
251 }
252
253 /***********************************************************************/
254
255 void SharedRAbundVector::insert(int binSize, int otu, string groupName){
256         try {
257                 individual newGuy;
258                 newGuy.abundance = binSize;
259                 newGuy.group = groupName;
260                 newGuy.bin = otu;
261                 
262                 data.insert(data.begin()+otu, newGuy);
263                 numBins++;
264         
265                 if(binSize > maxRank){
266                         maxRank = binSize;
267                 }
268         
269                 numSeqs += binSize;
270         }
271         catch(exception& e) {
272                 m->errorOut(e, "SharedRAbundVector", "insert");
273                 exit(1);
274         }
275 }
276
277 /***********************************************************************/
278
279 void SharedRAbundVector::push_front(int binSize, int otu, string groupName){
280         try {
281                 individual newGuy;
282                 newGuy.abundance = binSize;
283                 newGuy.group = groupName;
284                 newGuy.bin = otu;
285                 
286                 data.insert(data.begin(), newGuy);
287                 numBins++;
288         
289                 if(binSize > maxRank){
290                         maxRank = binSize;
291                 }
292         
293                 numSeqs += binSize;
294         }
295         catch(exception& e) {
296                 m->errorOut(e, "SharedRAbundVector", "push_front");
297                 exit(1);
298         }
299 }
300
301 /***********************************************************************/
302 void SharedRAbundVector::pop_back(){
303         numSeqs -= data[data.size()-1].abundance;
304         numBins--;
305         return data.pop_back();
306 }
307
308 /***********************************************************************/
309
310
311 vector<individual>::reverse_iterator SharedRAbundVector::rbegin(){
312         return data.rbegin();                           
313 }
314
315 /***********************************************************************/
316
317 vector<individual>::reverse_iterator SharedRAbundVector::rend(){
318         return data.rend();                                     
319 }
320
321 /***********************************************************************/
322 void SharedRAbundVector::resize(int size){
323         
324         data.resize(size);
325 }
326
327 /***********************************************************************/
328
329 int SharedRAbundVector::size(){
330         return data.size();
331 }
332
333
334 /***********************************************************************/
335 void SharedRAbundVector::printHeaders(ostream& output){
336         try {
337                 string snumBins = toString(numBins);
338                 output << "label\tGroup\tnumOtus\t";
339                 if (m->sharedHeaderMode == "tax") {
340                         for (int i = 0; i < numBins; i++) {  
341                                 
342                                 //if there is a bin label use it otherwise make one
343                                 string binLabel = "PhyloType";
344                                 string sbinNumber = toString(i+1);
345                                 if (sbinNumber.length() < snumBins.length()) { 
346                                         int diff = snumBins.length() - sbinNumber.length();
347                                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
348                                 }
349                                 binLabel += sbinNumber;
350                                 if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
351                                 
352                                 output << binLabel << '\t'; 
353                         }
354                         output << endl;
355                 }else {
356                         for (int i = 0; i < numBins; i++) {  
357                                 //if there is a bin label use it otherwise make one
358                                 string binLabel = "Otu";
359                                 string sbinNumber = toString(i+1);
360                                 if (sbinNumber.length() < snumBins.length()) { 
361                                         int diff = snumBins.length() - sbinNumber.length();
362                                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
363                                 }
364                                 binLabel += sbinNumber;
365                                 if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
366                                 
367                                 output << binLabel << '\t'; 
368                         }
369                         
370                         output << endl;
371                 }
372                 m->printedHeaders = true;
373         }
374         catch(exception& e) {
375                 m->errorOut(e, "SharedRAbundVector", "printHeaders");
376                 exit(1);
377         }
378 }
379 /***********************************************************************/
380 void SharedRAbundVector::print(ostream& output) {
381         try {
382                 output << numBins << '\t';
383         
384                 for(int i=0;i<data.size();i++){         output << data[i].abundance << '\t';            }
385                 output << endl;
386         }
387         catch(exception& e) {
388                 m->errorOut(e, "SharedRAbundVector", "print");
389                 exit(1);
390         }
391 }
392 /***********************************************************************/
393 string SharedRAbundVector::getGroup(){
394         return group;
395 }
396
397 /***********************************************************************/
398
399 void SharedRAbundVector::setGroup(string groupName){
400         group = groupName;
401 }
402 /***********************************************************************/
403 int SharedRAbundVector::getGroupIndex()  { return index; }
404 /***********************************************************************/
405 void SharedRAbundVector::setGroupIndex(int vIndex)      { index = vIndex; }
406 /***********************************************************************/
407 int SharedRAbundVector::getNumBins(){
408                 return numBins;
409 }
410
411 /***********************************************************************/
412
413 int SharedRAbundVector::getNumSeqs(){
414         return numSeqs;
415 }
416
417 /***********************************************************************/
418
419 int SharedRAbundVector::getMaxRank(){
420         return maxRank;
421 }
422 /***********************************************************************/
423
424 SharedRAbundVector SharedRAbundVector::getSharedRAbundVector(){
425         return *this;                   
426 }
427 /***********************************************************************/
428 vector<SharedRAbundVector*> SharedRAbundVector::getSharedRAbundVectors(){
429         try {
430                 SharedUtil* util;
431                 util = new SharedUtil();
432                 
433                 vector<string> Groups = m->getGroups();
434                 vector<string> allGroups = m->getAllGroups();
435                 util->setGroups(Groups, allGroups);
436                 m->setGroups(Groups);
437                 
438                 bool remove = false;
439                 for (int i = 0; i < lookup.size(); i++) {
440                         //if this sharedrabund is not from a group the user wants then delete it.
441                         if (util->isValidGroup(lookup[i]->getGroup(), m->getGroups()) == false) { 
442                                 remove = true;
443                                 delete lookup[i]; lookup[i] = NULL;
444                                 lookup.erase(lookup.begin()+i); 
445                                 i--; 
446                         }
447                 }
448                 
449                 delete util;
450                 
451                 if (remove) { eliminateZeroOTUS(lookup); }
452         
453                 return lookup;
454         }
455         catch(exception& e) {
456                 m->errorOut(e, "SharedRAbundVector", "getSharedRAbundVectors");
457                 exit(1);
458         }
459 }
460 //**********************************************************************************************************************
461 int SharedRAbundVector::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
462                 try {
463                         
464                         vector<SharedRAbundVector*> newLookup;
465                         for (int i = 0; i < thislookup.size(); i++) {
466                                 SharedRAbundVector* temp = new SharedRAbundVector();
467                                 temp->setLabel(thislookup[i]->getLabel());
468                                 temp->setGroup(thislookup[i]->getGroup());
469                                 newLookup.push_back(temp);
470                         }
471                         
472                         //for each bin
473                         vector<string> newBinLabels;
474                         string snumBins = toString(thislookup[0]->getNumBins());
475                         for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
476                                 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
477                                 
478                                 //look at each sharedRabund and make sure they are not all zero
479                                 bool allZero = true;
480                                 for (int j = 0; j < thislookup.size(); j++) {
481                                         if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
482                                 }
483                                 
484                                 //if they are not all zero add this bin
485                                 if (!allZero) {
486                                         for (int j = 0; j < thislookup.size(); j++) {
487                                                 newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
488                                         }
489                                         
490                                         //if there is a bin label use it otherwise make one
491                                         string binLabel = "Otu";
492                                         string sbinNumber = toString(i+1);
493                                         if (sbinNumber.length() < snumBins.length()) { 
494                                                 int diff = snumBins.length() - sbinNumber.length();
495                                                 for (int h = 0; h < diff; h++) { binLabel += "0"; }
496                                         }
497                                         binLabel += sbinNumber; 
498                                         if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
499                                         
500                                         newBinLabels.push_back(binLabel);
501                                 }
502                         }
503                         
504                         for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
505                         
506                         thislookup = newLookup;
507                         m->currentBinLabels = newBinLabels;
508                         
509                         return 0;
510                         
511                 }
512                 catch(exception& e) {
513                         m->errorOut(e, "SharedRAbundVector", "eliminateZeroOTUS");
514                         exit(1);
515                 }
516         }
517         
518 /***********************************************************************/
519 vector<SharedRAbundFloatVector*> SharedRAbundVector::getSharedRAbundFloatVectors(vector<SharedRAbundVector*> thislookup){
520         try {
521                 vector<SharedRAbundFloatVector*> newLookupFloat;        
522                 for (int i = 0; i < lookup.size(); i++) {
523                         SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
524                         temp->setLabel(thislookup[i]->getLabel());
525                         temp->setGroup(thislookup[i]->getGroup());
526                         newLookupFloat.push_back(temp);
527                 }
528                 
529                 for (int i = 0; i < thislookup.size(); i++) {
530                         
531                         for (int j = 0; j < thislookup[i]->getNumBins(); j++) {
532                                 
533                                 if (m->control_pressed) { return newLookupFloat; }
534                                 
535                                 int abund = thislookup[i]->getAbundance(j);
536                                 
537                                 float relabund = abund / (float) thislookup[i]->getNumSeqs();
538                                 
539                                 newLookupFloat[i]->push_back(relabund, thislookup[i]->getGroup());
540                         }
541                 }
542                 
543                 return newLookupFloat;
544         }
545         catch(exception& e) {
546                 m->errorOut(e, "SharedRAbundVector", "getSharedRAbundVectors");
547                 exit(1);
548         }
549 }
550 /***********************************************************************/
551
552 RAbundVector SharedRAbundVector::getRAbundVector() {
553         try {
554                 RAbundVector rav;
555                 
556                 for (int i = 0; i < data.size(); i++) {
557                         if(data[i].abundance != 0) {
558                                 rav.push_back(data[i].abundance);
559                         }
560                 }
561                 
562                 rav.setLabel(label);
563                 return rav;
564         }
565         catch(exception& e) {
566                 m->errorOut(e, "SharedRAbundVector", "getRAbundVector");
567                 exit(1);
568         }
569 }
570 /***********************************************************************/
571
572 RAbundVector SharedRAbundVector::getRAbundVector2() {
573         try {
574                 RAbundVector rav;
575                 for(int i = 0; i < numBins; i++)
576                         if(data[i].abundance != 0)
577                                 rav.push_back(data[i].abundance-1);
578                 return rav;
579         }
580         catch(exception& e) {
581                 m->errorOut(e, "SharedRAbundVector", "getRAbundVector2");
582                 exit(1);
583         }
584 }
585 /***********************************************************************/
586
587 SharedSAbundVector SharedRAbundVector::getSharedSAbundVector(){
588         try {
589                 SharedSAbundVector sav(maxRank+1);
590                 
591                 for(int i=0;i<data.size();i++){
592                         int abund = data[i].abundance;
593                         sav.set(abund, sav.getAbundance(abund) + 1, group);
594                 }
595                 
596                 sav.set(0, 0, group);
597                 sav.setLabel(label);
598                 sav.setGroup(group);
599                 
600                 return sav;
601         }
602         catch(exception& e) {
603                 m->errorOut(e, "SharedRAbundVector", "getSharedSAbundVector");
604                 exit(1);
605         }
606 }
607 /***********************************************************************/
608
609 SAbundVector SharedRAbundVector::getSAbundVector() {
610         try {
611                 SAbundVector sav(maxRank+1);
612                 
613                 for(int i=0;i<data.size();i++){
614                         int abund = data[i].abundance;
615                         sav.set(abund, sav.get(abund) + 1);
616                 }
617                 sav.set(0, 0);
618                 sav.setLabel(label);
619                 return sav;
620         }
621         catch(exception& e) {
622                 m->errorOut(e, "SharedRAbundVector", "getSAbundVector");                
623                 exit(1);
624         }
625 }
626
627 /***********************************************************************/
628
629 SharedOrderVector SharedRAbundVector::getSharedOrderVector() {
630         try {
631                 SharedOrderVector ov;
632         
633                 for(int i=0;i<data.size();i++){
634                         for(int j=0;j<data[i].abundance;j++){
635                                 ov.push_back(data[i].bin, data[i].abundance, data[i].group);
636                         }
637                 }
638                 random_shuffle(ov.begin(), ov.end());
639
640                 ov.setLabel(label);     
641                 ov.updateStats();
642                 
643                 return ov;
644         }
645         catch(exception& e) {
646                 m->errorOut(e, "SharedRAbundVector", "getSharedOrderVector");
647                 exit(1);
648         }
649 }
650 /***********************************************************************/
651
652 OrderVector SharedRAbundVector::getOrderVector(map<string,int>* nameMap = NULL) {
653         try {
654                 OrderVector ov;
655                 for(int i=0;i<numBins;i++){
656                         for(int j=0;j<data[i].abundance;j++){
657                                 ov.push_back(i);
658                         }
659                 }
660                 random_shuffle(ov.begin(), ov.end());
661                 
662                 ov.setLabel(label);     
663
664                 return ov;
665         }
666         catch(exception& e) {
667                 m->errorOut(e, "SharedRAbundVector", "getOrderVector");
668                 exit(1);
669         }
670 }
671
672 /***********************************************************************/
673