]> git.donarmstrong.com Git - mothur.git/blob - listvector.cpp
Initial revision
[mothur.git] / listvector.cpp
1 /*
2  *  list.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 8/8/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 using namespace std;
11
12 #include <map>
13 #include <exception>
14 #include "sabundvector.hpp"
15 #include "rabundvector.hpp"
16 #include "ordervector.hpp"
17 #include "datavector.hpp"
18 #include "utilities.hpp"
19 #include "listvector.hpp"
20
21
22 /***********************************************************************/
23
24 ListVector::ListVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0){};
25
26 /***********************************************************************/
27
28 ListVector::ListVector(int n):  DataVector(), data(n, "") , maxRank(0), numBins(0), numSeqs(0){};
29
30 /***********************************************************************/
31
32 ListVector::ListVector(string id, vector<string> lv) : DataVector(id), data(lv){
33         try {
34                 for(int i=0;i<data.size();i++){
35                         if(data[i] != ""){
36                                 int binSize = getNumNames(data[i]);
37                                 numBins = i+1;
38                                 if(binSize > maxRank)   {       maxRank = binSize;      }
39                                 numSeqs += binSize;
40                         }
41                 }
42         }
43         catch(exception& e) {
44                 cout << "Standard Error: " << e.what() << " has occurred in the ListVector class Function ListVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
45                 exit(1);
46         }
47         catch(...) {
48                 cout << "An unknown error has occurred in the ListVector class function ListVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
49                 exit(1);
50         }
51 }
52
53 /**********************************************************************/
54
55 ListVector::ListVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) {
56         try {
57                 int hold;
58                 f >> label >> hold;
59         
60                 data.assign(hold, "");
61                 string inputData = "";
62         
63                 for(int i=0;i<hold;i++){
64                         f >> inputData;
65                         set(i, inputData);
66                 }
67         }
68         catch(exception& e) {
69                 cout << "Standard Error: " << e.what() << " has occurred in the ListVector class Function ListVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
70                 exit(1);
71         }
72         catch(...) {
73                 cout << "An unknown error has occurred in the ListVector class function ListVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
74                 exit(1);
75         }
76 }
77
78 /***********************************************************************/
79
80 void ListVector::set(int binNumber, string seqNames){
81         try {
82                 int nNames_old = getNumNames(data[binNumber]);
83                 data[binNumber] = seqNames;
84                 int nNames_new = getNumNames(seqNames);
85         
86                 if(nNames_old == 0)                     {       numBins++;                              }
87                 if(nNames_new == 0)                     {       numBins--;                              }
88                 if(nNames_new > maxRank)        {       maxRank = nNames_new;   }
89         
90                 numSeqs += (nNames_new - nNames_old);
91         }
92         catch(exception& e) {
93                 cout << "Standard Error: " << e.what() << " has occurred in the ListVector class Function set. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
94                 exit(1);
95         }
96         catch(...) {
97                 cout << "An unknown error has occurred in the ListVector class function set. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
98                 exit(1);
99         }
100 }
101
102 /***********************************************************************/
103
104 string ListVector::get(int index){
105         return data[index];
106 }
107
108 /***********************************************************************/
109
110 void ListVector::push_back(string seqNames){
111         try {
112                 data.push_back(seqNames);
113                 int nNames = getNumNames(seqNames);
114         
115                 numBins++;
116         
117                 if(nNames > maxRank)    {       maxRank = nNames;       }
118         
119                 numSeqs += nNames;
120         }
121         catch(exception& e) {
122                 cout << "Standard Error: " << e.what() << " has occurred in the ListVector class Function push_back. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
123                 exit(1);
124         }
125         catch(...) {
126                 cout << "An unknown error has occurred in the ListVector class function push_back. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
127                 exit(1);
128         }
129 }
130
131 /***********************************************************************/
132
133 void ListVector::resize(int size){
134         data.resize(size);              
135 }
136
137 /***********************************************************************/
138
139 int ListVector::size(){
140         return data.size();
141 }
142 /***********************************************************************/
143
144 void ListVector::clear(){
145         numBins = 0;
146         maxRank = 0;
147         numSeqs = 0;
148         return data.clear();
149         
150 }
151
152 /***********************************************************************/
153
154 void ListVector::print(ostream& output){
155         try {
156                 output << label << '\t' << numBins << '\t';
157         
158                 for(int i=0;i<data.size();i++){
159                         if(data[i] != ""){
160                                 output << data[i] << '\t';
161                         }
162                 }
163                 output << endl;
164         }
165         catch(exception& e) {
166                 cout << "Standard Error: " << e.what() << " has occurred in the ListVector class Function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
167                 exit(1);
168         }
169         catch(...) {
170                 cout << "An unknown error has occurred in the ListVector class function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
171                 exit(1);
172         }
173 }
174
175
176 /***********************************************************************/
177
178 RAbundVector ListVector::getRAbundVector(){
179         try {
180                 RAbundVector rav;
181         
182                 for(int i=0;i<data.size();i++){
183                         int binSize = getNumNames(data[i]);
184                         rav.push_back(binSize);
185                 }
186         
187         //  This was here before to output data in a nice format, but it screws up the name mapping steps
188         //      sort(rav.rbegin(), rav.rend());
189         //      
190         //      for(int i=data.size()-1;i>=0;i--){
191         //              if(rav.get(i) == 0){    rav.pop_back(); }
192         //              else{
193         //                      break;
194         //              }
195         //      }
196                 rav.setLabel(label);
197         
198                 return rav;
199         }
200         catch(exception& e) {
201                 cout << "Standard Error: " << e.what() << " has occurred in the ListVector class Function getRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
202                 exit(1);
203         }
204         catch(...) {
205                 cout << "An unknown error has occurred in the ListVector class function getRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
206                 exit(1);
207         }
208 }
209
210 /***********************************************************************/
211
212 SAbundVector ListVector::getSAbundVector(){
213         try {
214                 SAbundVector sav(maxRank+1);
215         
216                 for(int i=0;i<data.size();i++){
217                         int binSize = getNumNames(data[i]);     
218                         sav.set(binSize, sav.get(binSize) + 1); 
219                 }
220                 sav.set(0, 0);
221                 sav.setLabel(label);
222         
223                 return sav;
224         }
225         catch(exception& e) {
226                 cout << "Standard Error: " << e.what() << " has occurred in the ListVector class Function getSAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
227                 exit(1);
228         }
229         catch(...) {
230                 cout << "An unknown error has occurred in the ListVector class function getSAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
231                 exit(1);
232         }
233 }
234
235 /***********************************************************************/
236 SharedOrderVector* ListVector::getSharedOrderVector(){
237         globaldata = GlobalData::getInstance();
238         string groupName, names, name;
239         groupmap = globaldata->gGroupmap;
240         SharedOrderVector* order;
241         order = new SharedOrderVector();
242         order->setLabel(label);
243         
244         for(int i=0;i<numBins;i++){
245                 int binSize = getNumNames(get(i));      //find number of individual in given bin        
246                 names = get(i);
247                 while (names.find_first_of(',') != -1) { 
248                         name = names.substr(0,names.find_first_of(','));
249                         names = names.substr(names.find_first_of(',')+1, names.length());
250                         groupName = groupmap->getGroup(name);
251                         order->push_back(i, binSize, groupName);  //i represents what bin you are in
252                 }
253                 //get last name
254                 groupName = groupmap->getGroup(names);
255                 order->push_back(i, binSize, groupName);
256         }
257         random_shuffle(order->begin(), order->end());
258         return order;
259 }
260
261 /***********************************************************************/
262
263 OrderVector ListVector::getOrderVector(map<string,int>* orderMap = NULL){
264         
265         try {
266                 if(orderMap == NULL){
267                         OrderVector ov;
268                 
269                         for(int i=0;i<data.size();i++){
270                                 int binSize = getNumNames(data[i]);             
271                                 for(int j=0;j<binSize;j++){
272                                         ov.push_back(i);
273                                 }
274                         }
275                         random_shuffle(ov.begin(), ov.end());
276                         ov.setLabel(label);
277                         ov.getNumBins();
278                 
279                         return ov;
280                 
281                 }
282                 else{
283                         OrderVector ov(numSeqs);
284                 
285                         for(int i=0;i<data.size();i++){
286                                 string listOTU = data[i];
287                                 int length = listOTU.size();
288                                 
289                                 string seqName="";
290                         
291                                 for(int j=0;j<length;j++){
292                                 
293                                         if(listOTU[j] != ','){
294                                                 seqName += listOTU[j];
295                                         }
296                                         else{
297                                                 if(orderMap->count(seqName) == 0){
298                                                         cerr << seqName << " not found, check *.names file\n";
299                                                         exit(1);
300                                                 }
301                                         
302                                                 ov.set((*orderMap)[seqName], i);
303                                                 seqName = "";
304                                         }                                               
305                                 }
306                         
307                                 if(orderMap->count(seqName) == 0){
308                                         cerr << seqName << " not found, check *.names file\n";
309                                         exit(1);
310                                 }
311                                 ov.set((*orderMap)[seqName], i);        
312                         }
313                 
314                         ov.setLabel(label);
315                         ov.getNumBins();
316                 
317                         return ov;              
318                 }
319         }
320         catch(exception& e) {
321                 cout << "Standard Error: " << e.what() << " has occurred in the ListVector class Function getOrderVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
322                 exit(1);
323         }
324         catch(...) {
325                 cout << "An unknown error has occurred in the ListVector class function getOrderVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
326                 exit(1);
327         }
328 }
329
330 /***********************************************************************/