]> git.donarmstrong.com Git - mothur.git/blob - sharedordervector.cpp
speed up rarefaction.shared
[mothur.git] / sharedordervector.cpp
1 /*
2  *  sharedSharedOrderVector.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 12/9/08.
6  *  Copyright 2008 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 using namespace std;
11
12
13 #include "sharedordervector.h"
14 #include "sharedutilities.h"
15
16 /***********************************************************************/
17
18 SharedOrderVector::SharedOrderVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0)  {}
19
20 /***********************************************************************/
21
22 SharedOrderVector::SharedOrderVector(string id, vector<individual>  ov) : 
23                                                                                         DataVector(id), data(ov)
24 {
25                 updateStats();
26 }
27
28 /***********************************************************************/
29 //This function is used to read a .shared file for the collect.shared, rarefaction.shared and summary.shared commands
30 //if you don't use a list and groupfile.  
31
32 SharedOrderVector::SharedOrderVector(ifstream& f) : DataVector() {  //reads in a shared file
33         try {
34                 globaldata = GlobalData::getInstance();
35                 maxRank = 0; numBins = 0; numSeqs = 0;
36                 
37                 if (globaldata->gGroupmap == NULL) {  groupmap = new GroupMap(); }
38                 
39                 int num, inputData, pos, count;
40                 count = 0;  numSeqs = 0;
41                 string holdLabel, nextLabel, groupN;
42                 individual newguy;
43                 
44                 //read in first row since you know there is at least 1 group.
45                 f >> label >> groupN >> num;
46                 holdLabel = label;
47                 
48                 if (globaldata->gGroupmap == NULL) { 
49                         //save group in groupmap
50                         groupmap->namesOfGroups.push_back(groupN);
51                         groupmap->groupIndex[groupN] = 0;
52                 }
53                 
54                 for(int i=0;i<num;i++){
55                         f >> inputData;
56                         
57                         for (int j = 0; j < inputData; j++) {
58                                 push_back(i, i, groupN);
59                                 numSeqs++;
60                         }
61                 }
62                 
63                 //save position in file in case next line is a new label.
64                 pos = f.tellg();
65                 
66                 if (f.eof() != true) { f >> nextLabel; }
67                 
68                 //read the rest of the groups info in
69                 while ((nextLabel == holdLabel) && (f.eof() != true)) {
70                         f >> groupN >> num;
71                         count++;
72                         
73                         if (globaldata->gGroupmap == NULL) { 
74                                 //save group in groupmap
75                                 groupmap->namesOfGroups.push_back(groupN);
76                                 groupmap->groupIndex[groupN] = count;
77                         }
78                         
79                         for(int i=0;i<num;i++){
80                                 f >> inputData;
81                                 
82                                 for (int j = 0; j < inputData; j++) {
83                                         push_back(i, i, groupN);
84                                         numSeqs++;
85                                 }
86                         }
87                         
88                         //save position in file in case next line is a new label.
89                         pos = f.tellg();
90         
91                         if (f.eof() != true) { f >> nextLabel; }
92
93                 }
94                 
95                 //put file pointer back since you are now at a new distance label
96                 f.seekg(pos, ios::beg);
97         
98                 if (globaldata->gGroupmap == NULL) { globaldata->gGroupmap = groupmap; }
99                 
100                 updateStats();
101                 
102         }
103         catch(exception& e) {
104                 cout << "Standard Error: " << e.what() << " has occurred in the SharedOrderVector class Function SharedOrderVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
105                 exit(1);
106         }
107         catch(...) {
108                 cout << "An unknown error has occurred in the SharedOrderVector class function SharedOrderVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
109                 exit(1);
110         }
111 }
112 /***********************************************************************/
113
114 int SharedOrderVector::getNumBins(){
115         return numBins;
116 }
117
118 /***********************************************************************/
119
120 int SharedOrderVector::getNumSeqs(){
121         return numSeqs;
122 }
123
124 /***********************************************************************/
125
126 int SharedOrderVector::getMaxRank(){
127         return maxRank;
128 }
129
130
131 /***********************************************************************/
132
133
134
135 void SharedOrderVector::set(int index, int binNumber, int abund, string groupName){
136         
137         data[index].group = groupName;
138         data[index].bin = binNumber;
139         data[index].abundance = abund;
140         //if (abund > maxRank) { maxRank = abund; }
141         updateStats();
142 }
143
144 /***********************************************************************/
145
146 individual SharedOrderVector::get(int index){
147         return data[index];                     
148 }
149
150
151 /***********************************************************************/
152 //commented updateStats out to improve speed, but whoever calls this must remember to update when they are done with all the pushbacks they are doing 
153 void SharedOrderVector::push_back(int binNumber, int abund, string groupName){
154         individual newGuy;
155         newGuy.group = groupName;
156         newGuy.abundance = abund;
157         newGuy.bin = binNumber;
158         data.push_back(newGuy);
159         //numSeqs++;
160         //numBins++;
161         //if (abund > maxRank) { maxRank = abund; }
162         
163         //updateStats();
164 }
165
166 /***********************************************************************/
167
168 void SharedOrderVector::print(ostream& output){
169         try {
170                 output << label << '\t' << numSeqs << '\t';
171         
172                 for(int i=0;i<data.size();i++){
173                         output << data[i].bin << '\t';
174                 }
175                 output << endl;
176         }
177         catch(exception& e) {
178                 cout << "Standard Error: " << e.what() << " has occurred in the SharedOrderVector class Function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
179                 exit(1);
180         }
181         catch(...) {
182                 cout << "An unknown error has occurred in the SharedOrderVector class function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
183                 exit(1);
184         }
185 }
186
187
188 /***********************************************************************/
189
190 void SharedOrderVector::resize(int){
191         cout << "resize() did nothing in class SharedOrderVector";
192 }
193
194 /***********************************************************************/
195
196
197 vector<individual>::iterator SharedOrderVector::begin(){
198         return data.begin();    
199 }
200
201 /***********************************************************************/
202
203 vector<individual>::iterator SharedOrderVector::end(){
204         return data.end();              
205 }
206
207 /***********************************************************************/
208
209 int SharedOrderVector::size(){
210         return data.size();                                     
211 }
212
213 /***********************************************************************/
214
215 RAbundVector SharedOrderVector::getRAbundVector(){
216         try {
217                 RAbundVector rav(data.size());
218         
219                 for(int i=0;i<numSeqs;i++){
220                         rav.set(data[i].bin, rav.get(data[i].bin) + 1);
221                 }       
222                 sort(rav.rbegin(), rav.rend());
223                 for(int i=numSeqs-1;i>=0;i--){
224                         if(rav.get(i) == 0){    rav.pop_back(); }
225                         else{
226                                 break;
227                         }
228                 }
229                 rav.setLabel(label);
230
231                 return rav;
232         }
233         catch(exception& e) {
234                 cout << "Standard Error: " << e.what() << " has occurred in the SharedOrderVector class Function getRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
235                 exit(1);
236         }
237         catch(...) {
238                 cout << "An unknown error has occurred in the SharedOrderVector class function getRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
239                 exit(1);
240         }       
241 }
242 /***********************************************************************/
243
244 OrderVector SharedOrderVector::getOrderVector(map<string,int>* nameMap = NULL) {
245         try {
246                 OrderVector ov;
247         
248                 for (int i = 0; i < data.size(); i++) {
249                         ov.push_back(data[i].bin);
250                 }
251                 
252                 random_shuffle(ov.begin(), ov.end());
253
254                 ov.setLabel(label);     
255                 return ov;
256         }
257         catch(exception& e) {
258                 cout << "Standard Error: " << e.what() << " has occurred in the SharedOrderVector class Function getOrderVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
259                 exit(1);
260         }
261         catch(...) {
262                 cout << "An unknown error has occurred in the SharedOrderVector class function getOrderVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
263                 exit(1);
264         }
265 }
266
267
268 /***********************************************************************/
269
270 SAbundVector SharedOrderVector::getSAbundVector(){
271         
272         RAbundVector rav(this->getRAbundVector());
273         return rav.getSAbundVector();
274
275 }
276 /***********************************************************************/
277 SharedRAbundVector SharedOrderVector::getSharedRAbundVector(string group) {
278         try {
279                 SharedRAbundVector sharedRav(data.size());
280                 
281                 sharedRav.setLabel(label);
282                 sharedRav.setGroup(group);
283                 
284                 for (int i = 0; i < data.size(); i++) {
285                         if (data[i].group == group) {
286                                 sharedRav.set(data[i].abundance, sharedRav.getAbundance(data[i].abundance) + 1, data[i].group);
287                         }
288                 }
289                 return sharedRav;
290         }
291         catch(exception& e) {
292                 cout << "Standard Error: " << e.what() << " has occurred in the SharedOrderVector class Function getSharedRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
293                 exit(1);
294         }
295         catch(...) {
296                 cout << "An unknown error has occurred in the SharedOrderVector class function getSharedRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
297                 exit(1);
298         }
299         
300 }
301 /***********************************************************************/
302 vector<SharedRAbundVector*> SharedOrderVector::getSharedRAbundVector() {
303         try {
304                 SharedUtil* util;
305                 util = new SharedUtil();
306                 vector<SharedRAbundVector*> lookup;
307                 
308                 util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups);
309                 util->getSharedVectors(globaldata->Groups, lookup, this);
310                 
311                 return lookup;
312         }
313         catch(exception& e) {
314                 cout << "Standard Error: " << e.what() << " has occurred in the SharedOrderVector class Function getSharedRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
315                 exit(1);
316         }
317         catch(...) {
318                 cout << "An unknown error has occurred in the SharedOrderVector class function getSharedRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
319                 exit(1);
320         }
321         
322 }
323 /***********************************************************************/
324 SharedSAbundVector SharedOrderVector::getSharedSAbundVector(string group) {
325         try {
326                 
327                 SharedRAbundVector sharedRav(this->getSharedRAbundVector(group));
328                 return sharedRav.getSharedSAbundVector();
329                                 
330         }
331         catch(exception& e) {
332                 cout << "Standard Error: " << e.what() << " has occurred in the SharedOrderVector class Function getSharedRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
333                 exit(1);
334         }
335         catch(...) {
336                 cout << "An unknown error has occurred in the SharedOrderVector class function getSharedRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
337                 exit(1);
338         }
339         
340 }
341
342 /***********************************************************************/
343
344 SharedOrderVector SharedOrderVector::getSharedOrderVector(){
345         random_shuffle(data.begin(), data.end());
346         return *this;                   
347 }
348
349 /***********************************************************************/
350
351 void SharedOrderVector::updateStats(){
352         try {
353                 needToUpdate = 0;
354                 numSeqs = 0;
355                 numBins = 0;
356                 maxRank = 0;
357         
358                 numSeqs = data.size();
359                                 
360                 vector<int> hold(numSeqs, 0);
361                 for(int i=0;i<numSeqs;i++){
362                         hold[data[i].bin] = hold[data[i].bin]+1;
363                 }       
364                 
365                 for(int i=0;i<numSeqs;i++){
366                         if(hold[i] > 0)                         {       numBins++;                              }
367                         if(hold[i] > maxRank)           {       maxRank = hold[i];              }
368                 }
369                 
370         }
371         catch(exception& e) {
372                 cout << "Standard Error: " << e.what() << " has occurred in the SharedOrderVector class Function updateStats. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
373                 exit(1);
374         }
375         catch(...) {
376                 cout << "An unknown error has occurred in the SharedOrderVector class function updateStats. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
377                 exit(1);
378         }       
379 }
380
381 /***********************************************************************/
382
383