]> git.donarmstrong.com Git - mothur.git/blob - listvector.cpp
adding labels to list file.
[mothur.git] / listvector.cpp
1 /*
2  *  list.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 8/8/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10
11 #include "sabundvector.hpp"
12 #include "rabundvector.hpp"
13 #include "ordervector.hpp"
14 #include "listvector.hpp"
15
16 //sorts highest to lowest
17 /***********************************************************************/
18 inline bool abundNamesSort(string left, string right){
19     
20     int countLeft = 0;
21     if(left != ""){
22         countLeft = 1;
23         for(int i=0;i<left.size();i++){  if(left[i] == ','){  countLeft++;  }  }
24     }
25     
26     int countRight = 0;
27     if(right != ""){
28         countRight = 1;
29         for(int i=0;i<right.size();i++){  if(right[i] == ','){  countRight++;  }  }
30     }
31     
32         if (countLeft > countRight) {
33         return true;
34     }
35     return false;       
36
37
38 /***********************************************************************/
39
40 ListVector::ListVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0){}
41
42 /***********************************************************************/
43
44 ListVector::ListVector(int n):  DataVector(), data(n, "") , maxRank(0), numBins(0), numSeqs(0){}
45
46 /***********************************************************************/
47
48 ListVector::ListVector(string id, vector<string> lv) : DataVector(id), data(lv){
49         try {
50                 for(int i=0;i<data.size();i++){
51                         if(data[i] != ""){
52                                 int binSize = m->getNumNames(data[i]);
53                                 numBins = i+1;
54                                 if(binSize > maxRank)   {       maxRank = binSize;      }
55                                 numSeqs += binSize;
56                         }
57                 }
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "ListVector", "ListVector");
61                 exit(1);
62         }
63 }
64
65 /**********************************************************************/
66
67 ListVector::ListVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) {
68         try {
69                 int hold;
70         
71         //are we at the beginning of the file??
72                 if (m->saveNextLabel == "") {
73                         f >> label;
74             
75                         //is this a shared file that has headers
76                         if (label == "label") {
77                                 
78                                 //gets "numOtus"
79                                 f >> label; m->gobble(f);
80                                 
81                                 //eat rest of line
82                                 label = m->getline(f); m->gobble(f);
83                                 
84                                 //parse labels to save
85                                 istringstream iStringStream(label);
86                                 m->listBinLabelsInFile.clear();
87                                 while(!iStringStream.eof()){
88                                         if (m->control_pressed) { break; }
89                                         string temp;
90                                         iStringStream >> temp;  m->gobble(iStringStream);
91                     
92                                         m->listBinLabelsInFile.push_back(temp);
93                                 }
94                                 
95                                 f >> label >> hold;
96                         }else {
97                 //read in first row
98                 f >> hold;
99                 
100                 //make binlabels because we don't have any
101                 string snumBins = toString(hold);
102                 m->listBinLabelsInFile.clear();
103                 for (int i = 0; i < hold; i++) {
104                     //if there is a bin label use it otherwise make one
105                     string binLabel = "Otu";
106                     string sbinNumber = toString(i+1);
107                     if (sbinNumber.length() < snumBins.length()) {
108                         int diff = snumBins.length() - sbinNumber.length();
109                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
110                     }
111                     binLabel += sbinNumber;
112                     m->listBinLabelsInFile.push_back(binLabel);
113                 }
114             }
115             m->saveNextLabel = label;
116                 }else {
117             f >> label >> hold;
118             m->saveNextLabel = label;
119         }
120         
121         binLabels.assign(m->listBinLabelsInFile.begin(), m->listBinLabelsInFile.begin()+hold);
122                 
123                 data.assign(hold, "");
124                 string inputData = "";
125         
126                 for(int i=0;i<hold;i++){
127                         f >> inputData;
128                         set(i, inputData);
129                 }
130                 m->gobble(f);
131         
132         if (f.eof()) { m->saveNextLabel = ""; }
133         }
134         catch(exception& e) {
135                 m->errorOut(e, "ListVector", "ListVector");
136                 exit(1);
137         }
138 }
139
140 /***********************************************************************/
141
142 void ListVector::set(int binNumber, string seqNames){
143         try {
144                 int nNames_old = m->getNumNames(data[binNumber]);
145                 data[binNumber] = seqNames;
146                 int nNames_new = m->getNumNames(seqNames);
147         
148                 if(nNames_old == 0)                     {       numBins++;                              }
149                 if(nNames_new == 0)                     {       numBins--;                              }
150                 if(nNames_new > maxRank)        {       maxRank = nNames_new;   }
151         
152                 numSeqs += (nNames_new - nNames_old);
153         }
154         catch(exception& e) {
155                 m->errorOut(e, "ListVector", "set");
156                 exit(1);
157         }
158 }
159
160 /***********************************************************************/
161
162 string ListVector::get(int index){
163         return data[index];
164 }
165 /***********************************************************************/
166
167 void ListVector::setLabels(vector<string> labels){
168         try {
169                 binLabels = labels;
170         }
171         catch(exception& e) {
172                 m->errorOut(e, "ListVector", "setLabels");
173                 exit(1);
174         }
175 }
176
177 /***********************************************************************/
178 //could potentially end up with duplicate binlabel names with code below.
179 //we don't currently use them in a way that would do that.
180 //if you had a listfile that had been subsampled and then added to it, dup names would be possible.
181 vector<string> ListVector::getLabels(){
182     try {
183         
184         string tagHeader = "Otu";
185         if (m->sharedHeaderMode == "tax") { tagHeader = "PhyloType"; }
186         
187         if (binLabels.size() < data.size()) {
188             string snumBins = toString(numBins);
189             
190             for (int i = 0; i < numBins; i++) {
191                 string binLabel = tagHeader;
192                 
193                 if (i < binLabels.size()) { //label exists, check leading zeros length
194                     string sbinNumber = m->getSimpleLabel(binLabels[i]);
195                     if (sbinNumber.length() < snumBins.length()) {
196                         int diff = snumBins.length() - sbinNumber.length();
197                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
198                     }
199                     binLabel += sbinNumber;
200                     binLabels[i] = binLabel;
201                 }else{
202                     string sbinNumber = toString(i+1);
203                     if (sbinNumber.length() < snumBins.length()) {
204                         int diff = snumBins.length() - sbinNumber.length();
205                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
206                     }
207                     binLabel += sbinNumber;
208                     binLabels.push_back(binLabel);
209                 }
210             }
211         }
212         return binLabels;
213     }
214         catch(exception& e) {
215                 m->errorOut(e, "ListVector", "getLabels");
216                 exit(1);
217         }
218 }
219
220 /***********************************************************************/
221
222 void ListVector::push_back(string seqNames){
223         try {
224                 data.push_back(seqNames);
225                 int nNames = m->getNumNames(seqNames);
226         
227                 numBins++;
228         
229                 if(nNames > maxRank)    {       maxRank = nNames;       }
230         
231                 numSeqs += nNames;
232         }
233         catch(exception& e) {
234                 m->errorOut(e, "ListVector", "push_back");
235                 exit(1);
236         }
237 }
238
239 /***********************************************************************/
240
241 void ListVector::resize(int size){
242         data.resize(size);              
243 }
244
245 /***********************************************************************/
246
247 int ListVector::size(){
248         return data.size();
249 }
250 /***********************************************************************/
251
252 void ListVector::clear(){
253         numBins = 0;
254         maxRank = 0;
255         numSeqs = 0;
256         return data.clear();
257         
258 }
259
260 /***********************************************************************/
261 void ListVector::printHeaders(ostream& output){
262         try {
263                 string snumBins = toString(numBins);
264                 output << "label\tnumOtus\t";
265                 if (m->sharedHeaderMode == "tax") {
266                         for (int i = 0; i < numBins; i++) {
267                                 
268                                 //if there is a bin label use it otherwise make one
269                                 string binLabel = "PhyloType";
270                                 string sbinNumber = toString(i+1);
271                                 if (sbinNumber.length() < snumBins.length()) {
272                                         int diff = snumBins.length() - sbinNumber.length();
273                                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
274                                 }
275                                 binLabel += sbinNumber;
276                                 if (i < binLabels.size()) {  binLabel = binLabels[i]; }
277                                 
278                                 output << binLabel << '\t';
279                         }
280                         output << endl;
281                 }else {
282                         for (int i = 0; i < numBins; i++) {
283                                 //if there is a bin label use it otherwise make one
284                                 string binLabel = "Otu";
285                                 string sbinNumber = toString(i+1);
286                                 if (sbinNumber.length() < snumBins.length()) {
287                                         int diff = snumBins.length() - sbinNumber.length();
288                                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
289                                 }
290                                 binLabel += sbinNumber;
291                                 if (i < binLabels.size()) {  binLabel = binLabels[i]; }
292                                 
293                                 output << binLabel << '\t';
294                         }
295                         
296                         output << endl;
297                 }
298                 m->printedListHeaders = true;
299         }
300         catch(exception& e) {
301                 m->errorOut(e, "ListVector", "printHeaders");
302                 exit(1);
303         }
304 }
305
306 /***********************************************************************/
307
308 void ListVector::print(ostream& output){
309         try {
310                 output << label << '\t' << numBins << '\t';
311         
312         vector<string> hold = data;
313         sort(hold.begin(), hold.end(), abundNamesSort);
314         
315                 for(int i=0;i<hold.size();i++){
316                         if(hold[i] != ""){
317                                 output << hold[i] << '\t';
318                         }
319                 }
320                 output << endl;
321         }
322         catch(exception& e) {
323                 m->errorOut(e, "ListVector", "print");
324                 exit(1);
325         }
326 }
327
328
329 /***********************************************************************/
330
331 RAbundVector ListVector::getRAbundVector(){
332         try {
333                 RAbundVector rav;
334         
335                 for(int i=0;i<data.size();i++){
336                         int binSize = m->getNumNames(data[i]);
337                         rav.push_back(binSize);
338                 }
339         
340         //  This was here before to output data in a nice format, but it screws up the name mapping steps
341         //      sort(rav.rbegin(), rav.rend());
342         //      
343         //      for(int i=data.size()-1;i>=0;i--){
344         //              if(rav.get(i) == 0){    rav.pop_back(); }
345         //              else{
346         //                      break;
347         //              }
348         //      }
349                 rav.setLabel(label);
350         
351                 return rav;
352         }
353         catch(exception& e) {
354                 m->errorOut(e, "ListVector", "getRAbundVector");
355                 exit(1);
356         }
357 }
358
359 /***********************************************************************/
360
361 SAbundVector ListVector::getSAbundVector(){
362         try {
363                 SAbundVector sav(maxRank+1);
364         
365                 for(int i=0;i<data.size();i++){
366                         int binSize = m->getNumNames(data[i]);  
367                         sav.set(binSize, sav.get(binSize) + 1); 
368                 }
369                 sav.set(0, 0);
370                 sav.setLabel(label);
371         
372                 return sav;
373         }
374         catch(exception& e) {
375                 m->errorOut(e, "ListVector", "getSAbundVector");
376                 exit(1);
377         }
378 }
379
380 /***********************************************************************/
381
382 OrderVector ListVector::getOrderVector(map<string,int>* orderMap = NULL){
383         
384         try {
385                 if(orderMap == NULL){
386                         OrderVector ov;
387                 
388                         for(int i=0;i<data.size();i++){
389                                 int binSize = m->getNumNames(data[i]);          
390                                 for(int j=0;j<binSize;j++){
391                                         ov.push_back(i);
392                                 }
393                         }
394                         random_shuffle(ov.begin(), ov.end());
395                         ov.setLabel(label);
396                         ov.getNumBins();
397                 
398                         return ov;
399                 
400                 }
401                 else{
402                         OrderVector ov(numSeqs);
403                 
404                         for(int i=0;i<data.size();i++){
405                                 string listOTU = data[i];
406                                 int length = listOTU.size();
407                                 
408                                 string seqName="";
409                         
410                                 for(int j=0;j<length;j++){
411                                 
412                                         if(listOTU[j] != ','){
413                                                 seqName += listOTU[j];
414                                         }
415                                         else{
416                                                 if(orderMap->count(seqName) == 0){
417                                                         m->mothurOut(seqName + " not found, check *.names file\n");
418                                                         exit(1);
419                                                 }
420                                         
421                                                 ov.set((*orderMap)[seqName], i);
422                                                 seqName = "";
423                                         }                                               
424                                 }
425                         
426                                 if(orderMap->count(seqName) == 0){
427                                         m->mothurOut(seqName + " not found, check *.names file\n");
428                                         exit(1);
429                                 }
430                                 ov.set((*orderMap)[seqName], i);        
431                         }
432                 
433                         ov.setLabel(label);
434                         ov.getNumBins();
435                 
436                         return ov;              
437                 }
438         }
439         catch(exception& e) {
440                 m->errorOut(e, "ListVector", "getOrderVector");
441                 exit(1);
442         }
443 }
444
445 /***********************************************************************/