]> git.donarmstrong.com Git - mothur.git/blob - sharedrabundvector.cpp
added smart distance feature and optimized all commands using line by line processing
[mothur.git] / sharedrabundvector.cpp
1 /*
2  *  sharedvector.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 12/5/08.
6  *  Copyright 2008 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10
11 using namespace std;
12
13 #include "sharedrabundvector.h" 
14 #include "sabundvector.hpp"
15 #include "ordervector.hpp"
16 #include "sharedutilities.h"
17
18
19 /***********************************************************************/
20
21 SharedRAbundVector::SharedRAbundVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0) {};
22
23 /***********************************************************************/
24
25 SharedRAbundVector::SharedRAbundVector(int n) : DataVector(), maxRank(0), numBins(n), numSeqs(0) {
26                 individual newGuy;
27                 //initialize data
28                 for (int i=0; i< n; i++) {
29                         newGuy.bin = i;
30                         newGuy.abundance = 0;
31                         data.push_back(newGuy);
32                 }
33 };
34
35 /***********************************************************************
36
37 SharedRAbundVector::SharedRAbundVector(string id, vector<individual> rav) : DataVector(id), data(rav) {
38         try {
39                 numBins = 0;
40                 maxRank = 0;
41                 numSeqs = 0;
42                 
43                 for(int i=0;i<data.size();i++){
44                         if(data[i].abundance != 0)              {       numBins = i+1;          }
45                         if(data[i].abundance > maxRank) {       maxRank = data[i].abundance;    }
46                         numSeqs += data[i].abundance;
47                 }
48         }
49         catch(exception& e) {
50                 cout << "Standard Error: " << e.what() << " has occurred in the SharedRAbundVector class Function SharedRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
51                 exit(1);
52         }
53         catch(...) {
54                 cout << "An unknown error has occurred in the SharedRAbundVector class function SharedRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
55                 exit(1);
56         }
57 }
58
59
60 /***********************************************************************/
61 //reads a shared file
62 SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) {
63         try {
64                 globaldata = GlobalData::getInstance();
65                 
66                 if (globaldata->gGroupmap == NULL) {  groupmap = new GroupMap(); }
67                 
68                 int num, inputData, pos, count;
69                 count = 0;  
70                 string holdLabel, nextLabel, groupN;
71                 individual newguy;
72                 
73                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
74                 lookup.clear();
75                 
76                 //read in first row since you know there is at least 1 group.
77                 f >> label >> groupN >> num;
78                 holdLabel = label;
79                 
80                 //add new vector to lookup
81                 SharedRAbundVector* temp = new SharedRAbundVector();
82                 lookup.push_back(temp);
83                 lookup[0]->setLabel(label);
84                 lookup[0]->setGroup(groupN);
85                 
86                 if (globaldata->gGroupmap == NULL) { 
87                         //save group in groupmap
88                         groupmap->namesOfGroups.push_back(groupN);
89                         groupmap->groupIndex[groupN] = 0;
90                 }
91                 
92                 //fill vector.  data = first sharedrabund in file
93                 for(int i=0;i<num;i++){
94                         f >> inputData;
95                         
96                         lookup[0]->push_back(inputData, i, groupN); //abundance, bin, group
97                         push_back(inputData, i, groupN);
98                         numSeqs += inputData;
99                         numBins++;
100                         if (inputData > maxRank) { maxRank = inputData; }
101                         
102                 }
103                 
104                 //save position in file in case next line is a new label.
105                 pos = f.tellg();
106                 
107                 if (f.eof() != true) { f >> nextLabel; }
108                 
109                 //read the rest of the groups info in
110                 while ((nextLabel == holdLabel) && (f.eof() != true)) {
111                         f >> groupN >> num;
112                         count++;
113                         
114                         if (globaldata->gGroupmap == NULL) { 
115                                 //save group in groupmap
116                                 groupmap->namesOfGroups.push_back(groupN);
117                                 groupmap->groupIndex[groupN] = count;
118                         }
119                         
120                         //add new vector to lookup
121                         temp = new SharedRAbundVector();
122                         lookup.push_back(temp);
123                         lookup[count]->setLabel(label);
124                         lookup[count]->setGroup(groupN);
125
126                         //fill vector.  
127                         for(int i=0;i<num;i++){
128                                 f >> inputData;
129                                 lookup[count]->push_back(inputData, i, groupN); //abundance, bin, group
130                         }
131                         
132                         //save position in file in case next line is a new label.
133                         pos = f.tellg();
134         
135                         if (f.eof() != true) { f >> nextLabel; }
136                 }
137                 
138                 //put file pointer back since you are now at a new distance label
139                 f.seekg(pos, ios::beg);
140         
141                 if (globaldata->gGroupmap == NULL) { globaldata->gGroupmap = groupmap; }
142                 
143         }
144         catch(exception& e) {
145                 cout << "Standard Error: " << e.what() << " has occurred in the SharedRAbundVector class Function SharedRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
146                 exit(1);
147         }
148         catch(...) {
149                 cout << "An unknown error has occurred in the SharedRAbundVector class function SharedRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
150                 exit(1);
151         }
152 }
153
154 /***********************************************************************/
155
156 void SharedRAbundVector::set(int binNumber, int newBinSize, string groupname){
157         try {
158                 int oldBinSize = data[binNumber].abundance;
159                 data[binNumber].abundance = newBinSize;
160                 data[binNumber].group = groupname;
161         
162                 if(newBinSize > maxRank)        {       maxRank = newBinSize;   }
163         
164                 numSeqs += (newBinSize - oldBinSize);
165         }
166         catch(exception& e) {
167                 cout << "Standard Error: " << e.what() << " has occurred in the SharedRAbundVector class Function set. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
168                 exit(1);
169         }
170         catch(...) {
171                 cout << "An unknown error has occurred in the SharedRAbundVector class function set. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
172                 exit(1);
173         }
174 }
175 /***********************************************************************/
176
177 void SharedRAbundVector::setData(vector <individual> newData){
178         data = newData;
179 }
180
181 /***********************************************************************/
182
183 int SharedRAbundVector::getAbundance(int index){
184         return data[index].abundance;
185         
186 }
187 /***********************************************************************/
188
189 int SharedRAbundVector::numNZ(){
190         int sum = 0;
191         for(int i = 1; i < numBins; i++)
192                 if(data[i].abundance > 0)
193                         sum++;
194         return sum;
195 }
196 /***********************************************************************/
197
198 void SharedRAbundVector::sortD(){
199         struct individual indObj;
200         sort(data.begin()+1, data.end(), indObj);
201 }
202 /***********************************************************************/
203
204 individual SharedRAbundVector::get(int index){
205         return data[index];
206         
207 }
208 /***********************************************************************/
209
210 vector <individual> SharedRAbundVector::getData(){
211         return data;
212 }
213 /***********************************************************************/
214
215 void SharedRAbundVector::push_back(int binSize, int otu, string groupName){
216         try {
217                 individual newGuy;
218                 newGuy.abundance = binSize;
219                 newGuy.group = groupName;
220                 newGuy.bin = otu;
221                 
222                 data.push_back(newGuy);
223                 numBins++;
224         
225                 if(binSize > maxRank){
226                         maxRank = binSize;
227                 }
228         
229                 numSeqs += binSize;
230         }
231         catch(exception& e) {
232                 cout << "Standard Error: " << e.what() << " has occurred in the SharedRAbundVector class Function push_back. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
233                 exit(1);
234         }
235         catch(...) {
236                 cout << "An unknown error has occurred in the SharedRAbundVector class function push_back. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
237                 exit(1);
238         }
239 }
240
241 /***********************************************************************/
242
243 void SharedRAbundVector::insert(int binSize, int otu, string groupName){
244         try {
245                 individual newGuy;
246                 newGuy.abundance = binSize;
247                 newGuy.group = groupName;
248                 newGuy.bin = otu;
249                 
250                 data.insert(data.begin()+otu, newGuy);
251                 numBins++;
252         
253                 if(binSize > maxRank){
254                         maxRank = binSize;
255                 }
256         
257                 numSeqs += binSize;
258         }
259         catch(exception& e) {
260                 cout << "Standard Error: " << e.what() << " has occurred in the SharedRAbundVector class Function insert. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
261                 exit(1);
262         }
263         catch(...) {
264                 cout << "An unknown error has occurred in the SharedRAbundVector class function insert. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
265                 exit(1);
266         }
267 }
268
269 /***********************************************************************/
270
271 void SharedRAbundVector::push_front(int binSize, int otu, string groupName){
272         try {
273                 individual newGuy;
274                 newGuy.abundance = binSize;
275                 newGuy.group = groupName;
276                 newGuy.bin = otu;
277                 
278                 data.insert(data.begin(), newGuy);
279                 numBins++;
280         
281                 if(binSize > maxRank){
282                         maxRank = binSize;
283                 }
284         
285                 numSeqs += binSize;
286         }
287         catch(exception& e) {
288                 cout << "Standard Error: " << e.what() << " has occurred in the SharedRAbundVector class Function push_front. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
289                 exit(1);
290         }
291         catch(...) {
292                 cout << "An unknown error has occurred in the SharedRAbundVector class function push_front. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
293                 exit(1);
294         }
295 }
296
297 /***********************************************************************/
298 void SharedRAbundVector::pop_back(){
299         numSeqs -= data[data.size()-1].abundance;
300         numBins--;
301         return data.pop_back();
302 }
303
304 /***********************************************************************/
305
306
307 vector<individual>::reverse_iterator SharedRAbundVector::rbegin(){
308         return data.rbegin();                           
309 }
310
311 /***********************************************************************/
312
313 vector<individual>::reverse_iterator SharedRAbundVector::rend(){
314         return data.rend();                                     
315 }
316
317 /***********************************************************************/
318 void SharedRAbundVector::resize(int size){
319         
320         data.resize(size);
321 }
322
323 /***********************************************************************/
324
325 int SharedRAbundVector::size(){
326         return data.size();
327 }
328
329 /***********************************************************************/
330 void SharedRAbundVector::print(ostream& output){
331         try {
332                 output << numBins << '\t';
333         
334                 for(int i=0;i<data.size();i++){         output << data[i].abundance << '\t';            }
335                 output << endl;
336         }
337         catch(exception& e) {
338                 cout << "Standard Error: " << e.what() << " has occurred in the SharedRAbundVector class Function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
339                 exit(1);
340         }
341         catch(...) {
342                 cout << "An unknown error has occurred in the SharedRAbundVector class function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
343                 exit(1);
344         }
345 }
346 /***********************************************************************/
347 string SharedRAbundVector::getGroup(){
348         return group;
349 }
350
351 /***********************************************************************/
352
353 void SharedRAbundVector::setGroup(string groupName){
354         group = groupName;
355 }
356 /***********************************************************************/
357 int SharedRAbundVector::getGroupIndex()  { return index; }
358 /***********************************************************************/
359 void SharedRAbundVector::setGroupIndex(int vIndex)      { index = vIndex; }
360 /***********************************************************************/
361 int SharedRAbundVector::getNumBins(){
362         return numBins;
363 }
364
365 /***********************************************************************/
366
367 int SharedRAbundVector::getNumSeqs(){
368         return numSeqs;
369 }
370
371 /***********************************************************************/
372
373 int SharedRAbundVector::getMaxRank(){
374         return maxRank;
375 }
376 /***********************************************************************/
377
378 SharedRAbundVector SharedRAbundVector::getSharedRAbundVector(){
379         return *this;                   
380 }
381 /***********************************************************************/
382 vector<SharedRAbundVector*> SharedRAbundVector::getSharedRAbundVectors(){
383         try {
384                 SharedUtil* util;
385                 util = new SharedUtil();
386                 
387                 util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups);
388
389                 for (int i = 0; i < lookup.size(); i++) {
390                         //if this sharedrabund is not from a group the user wants then delete it.
391                         if (util->isValidGroup(lookup[i]->getGroup(), globaldata->Groups) == false) { 
392                                 delete lookup[i]; 
393                                 lookup.erase(lookup.begin()+i); 
394                                 i--; 
395                         }
396                 }
397
398                 return lookup;
399         }
400         catch(exception& e) {
401                 cout << "Standard Error: " << e.what() << " has occurred in the SharedRAbundVector class Function getSharedRAbundVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
402                 exit(1);
403         }
404         catch(...) {
405                 cout << "An unknown error has occurred in the SharedRAbundVector class function getSharedRAbundVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
406                 exit(1);
407         }
408 }
409 /***********************************************************************/
410
411 RAbundVector SharedRAbundVector::getRAbundVector() {
412         try {
413                 RAbundVector rav(data.size());
414                 
415                 for (int i = 0; i < data.size(); i++) {
416                         rav.set(i, data[i].abundance);
417                 }
418         
419                 return rav;
420         }
421         catch(exception& e) {
422                 cout << "Standard Error: " << e.what() << " has occurred in the SharedRAbundVector class Function getRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
423                 exit(1);
424         }
425         catch(...) {
426                 cout << "An unknown error has occurred in the SharedRAbundVector class function getRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
427                 exit(1);
428         }
429 }
430 /***********************************************************************/
431
432 RAbundVector SharedRAbundVector::getRAbundVector2() {
433         try {
434                 RAbundVector rav;
435                 for(int i = 0; i < numBins; i++)
436                         if(data[i].abundance != 0)
437                                 rav.push_back(data[i].abundance-1);
438                 return rav;
439         }
440         catch(exception& e) {
441                 cout << "Standard Error: " << e.what() << " has occurred in the SharedRAbundVector class Function getRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
442                 exit(1);
443         }
444         catch(...) {
445                 cout << "An unknown error has occurred in the SharedRAbundVector class function getRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
446                 exit(1);
447         }
448 }
449 /***********************************************************************/
450
451 SharedSAbundVector SharedRAbundVector::getSharedSAbundVector(){
452         try {
453                 SharedSAbundVector sav(maxRank+1);
454                 
455                 for(int i=0;i<data.size();i++){
456                         int abund = data[i].abundance;
457                         sav.set(abund, sav.getAbundance(abund) + 1, group);
458                 }
459                 
460                 sav.set(0, 0, group);
461                 sav.setLabel(label);
462                 sav.setGroup(group);
463                 
464                 return sav;
465         }
466         catch(exception& e) {
467                 cout << "Standard Error: " << e.what() << " has occurred in the SharedRAbundVector class Function getSharedSAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
468                 exit(1);
469         }
470         catch(...) {
471                 cout << "An unknown error has occurred in the SharedRAbundVector class function getSharedSAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
472                 exit(1);
473         }
474 }
475 /***********************************************************************/
476
477 SAbundVector SharedRAbundVector::getSAbundVector() {
478         try {
479                 SAbundVector sav(maxRank+1);
480                 
481                 for(int i=0;i<data.size();i++){
482                         int abund = data[i].abundance;
483                         sav.set(abund, sav.get(abund) + 1);
484                 }
485                 sav.set(0, 0);
486                 sav.setLabel(label);
487                 return sav;
488         }
489         catch(exception& e) {
490                 cout << "Standard Error: " << e.what() << " has occurred in the SharedRAbundVector class Function getSAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
491                 exit(1);
492         }
493         catch(...) {
494                 cout << "An unknown error has occurred in the SharedRAbundVector class function getSAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
495                 exit(1);
496         }
497 }
498
499 /***********************************************************************/
500
501 SharedOrderVector SharedRAbundVector::getSharedOrderVector() {
502         try {
503                 SharedOrderVector ov;
504         
505                 for(int i=0;i<data.size();i++){
506                         for(int j=0;j<data[i].abundance;j++){
507                                 ov.push_back(data[i].bin, data[i].abundance, data[i].group);
508                         }
509                 }
510                 random_shuffle(ov.begin(), ov.end());
511
512                 ov.setLabel(label);     
513                 ov.updateStats();
514                 
515                 return ov;
516         }
517         catch(exception& e) {
518                 cout << "Standard Error: " << e.what() << " has occurred in the SharedRAbundVector class Function getOrderVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
519                 exit(1);
520         }
521         catch(...) {
522                 cout << "An unknown error has occurred in the SharedRAbundVector class function getOrderVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
523                 exit(1);
524         }
525 }
526 /***********************************************************************/
527
528 OrderVector SharedRAbundVector::getOrderVector(map<string,int>* nameMap = NULL) {
529         try {
530                 OrderVector ov;
531         
532                 for(int i=0;i<data.size();i++){
533                         for(int j=0;j<data[i].abundance;j++){
534                                 ov.push_back(i);
535                         }
536                 }
537                 random_shuffle(ov.begin(), ov.end());
538
539                 ov.setLabel(label);     
540                 return ov;
541         }
542         catch(exception& e) {
543                 cout << "Standard Error: " << e.what() << " has occurred in the SharedRAbundVector class Function getOrderVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
544                 exit(1);
545         }
546         catch(...) {
547                 cout << "An unknown error has occurred in the SharedRAbundVector class function getOrderVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
548                 exit(1);
549         }
550 }
551
552 /***********************************************************************/
553