]> git.donarmstrong.com Git - mothur.git/blob - sharedlistvector.cpp
working on pam
[mothur.git] / sharedlistvector.cpp
1 /*
2  *  sharedSharedListVector.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/22/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "sabundvector.hpp"
11 #include "rabundvector.hpp"
12 #include "ordervector.hpp"
13 #include "sharedlistvector.h"
14 #include "sharedordervector.h"
15 #include "sharedutilities.h"
16
17 /***********************************************************************/
18
19 SharedListVector::SharedListVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0){ groupmap = NULL; countTable = NULL; }
20
21 /***********************************************************************/
22
23 SharedListVector::SharedListVector(int n):      DataVector(), data(n, "") , maxRank(0), numBins(0), numSeqs(0){ groupmap = NULL; countTable = NULL; }
24
25 /***********************************************************************/
26 SharedListVector::SharedListVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) {
27         try {
28         groupmap = NULL; countTable = NULL;
29                 //set up groupmap for later.
30         if (m->groupMode == "group") {
31             groupmap = new GroupMap(m->getGroupFile());
32             groupmap->readMap(); 
33         }else {
34             countTable = new CountTable();
35             countTable->readTable(m->getCountTableFile(), true, false);
36         }
37
38         int hold;
39         
40                 //are we at the beginning of the file??
41                 if (m->saveNextLabel == "") {
42                         f >> label;
43             
44                         //is this a shared file that has headers
45                         if (label == "label") {
46                                 
47                                 //gets "numOtus"
48                                 f >> label; m->gobble(f);
49                                 
50                                 //eat rest of line
51                                 label = m->getline(f); m->gobble(f);
52                                 
53                                 //parse labels to save
54                                 istringstream iStringStream(label);
55                                 m->listBinLabelsInFile.clear();
56                                 while(!iStringStream.eof()){
57                                         if (m->control_pressed) { break; }
58                                         string temp;
59                                         iStringStream >> temp;  m->gobble(iStringStream);
60                     
61                                         m->listBinLabelsInFile.push_back(temp);
62                                 }
63                                 
64                                 f >> label >> hold;
65                         }else {
66                 //read in first row
67                 f >> hold;
68                 
69                 //make binlabels because we don't have any
70                 string snumBins = toString(hold);
71                 m->listBinLabelsInFile.clear();
72                 for (int i = 0; i < hold; i++) {
73                     //if there is a bin label use it otherwise make one
74                     string binLabel = "Otu";
75                     string sbinNumber = toString(i+1);
76                     if (sbinNumber.length() < snumBins.length()) {
77                         int diff = snumBins.length() - sbinNumber.length();
78                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
79                     }
80                     binLabel += sbinNumber;
81                     m->listBinLabelsInFile.push_back(binLabel);
82                 }
83             }
84             m->saveNextLabel = label;
85                 }else {
86             f >> label >> hold;
87             m->saveNextLabel = label;
88         }
89         
90         binLabels.assign(m->listBinLabelsInFile.begin(), m->listBinLabelsInFile.begin()+hold);
91                 
92                 data.assign(hold, "");
93                 string inputData = "";
94         
95                 for(int i=0;i<hold;i++){
96                         f >> inputData;
97                         set(i, inputData);
98                 }
99                 m->gobble(f);
100         
101         if (f.eof()) { m->saveNextLabel = ""; }
102                 
103         }
104         catch(exception& e) {
105                 m->errorOut(e, "SharedListVector", "SharedListVector");
106                 exit(1);
107         }
108 }
109
110 /***********************************************************************/
111 void SharedListVector::set(int binNumber, string seqNames){
112         try {
113                 int nNames_old = m->getNumNames(data[binNumber]);
114                 data[binNumber] = seqNames;
115                 int nNames_new = m->getNumNames(seqNames);
116         
117                 if(nNames_old == 0)                     {       numBins++;                              }
118                 if(nNames_new == 0)                     {       numBins--;                              }
119                 if(nNames_new > maxRank)        {       maxRank = nNames_new;   }
120         
121                 numSeqs += (nNames_new - nNames_old);
122                 
123                          
124         }
125         catch(exception& e) {
126                 m->errorOut(e, "SharedListVector", "set");
127                 exit(1);
128         }
129 }
130
131 /***********************************************************************/
132
133 string SharedListVector::get(int index){
134         return data[index];
135 }
136 /***********************************************************************/
137
138 void SharedListVector::setLabels(vector<string> labels){
139         try {
140                 binLabels = labels;
141         }
142         catch(exception& e) {
143                 m->errorOut(e, "SharedListVector", "setLabels");
144                 exit(1);
145         }
146 }
147
148 /***********************************************************************/
149 //could potentially end up with duplicate binlabel names with code below.
150 //we don't currently use them in a way that would do that.
151 //if you had a listfile that had been subsampled and then added to it, dup names would be possible.
152 vector<string> SharedListVector::getLabels(){
153     try {
154         string tagHeader = "Otu";
155         if (m->sharedHeaderMode == "tax") { tagHeader = "PhyloType"; }
156         
157         if (binLabels.size() < data.size()) {
158             string snumBins = toString(numBins);
159             
160             for (int i = 0; i < numBins; i++) {
161                 string binLabel = tagHeader;
162                 
163                 if (i < binLabels.size()) { //label exists, check leading zeros length
164                     string sbinNumber = m->getSimpleLabel(binLabels[i]);
165                     if (sbinNumber.length() < snumBins.length()) {
166                         int diff = snumBins.length() - sbinNumber.length();
167                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
168                     }
169                     binLabel += sbinNumber;
170                     binLabels[i] = binLabel;
171                 }else{
172                     string sbinNumber = toString(i+1);
173                     if (sbinNumber.length() < snumBins.length()) {
174                         int diff = snumBins.length() - sbinNumber.length();
175                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
176                     }
177                     binLabel += sbinNumber;
178                     binLabels.push_back(binLabel);
179                 }
180             }
181         }
182         return binLabels;
183     }
184         catch(exception& e) {
185                 m->errorOut(e, "SharedListVector", "getLabels");
186                 exit(1);
187         }
188 }
189 /***********************************************************************/
190
191 void SharedListVector::push_back(string seqNames){
192         try {
193                 data.push_back(seqNames);
194                 int nNames = m->getNumNames(seqNames);
195         
196                 numBins++;
197         
198                 if(nNames > maxRank)    {       maxRank = nNames;       }
199         
200                 numSeqs += nNames;
201         }
202         catch(exception& e) {
203                 m->errorOut(e, "SharedListVector", "push_back");
204                 exit(1);
205         }
206 }
207
208 /***********************************************************************/
209
210 void SharedListVector::resize(int size){
211         data.resize(size);              
212 }
213
214 /***********************************************************************/
215
216 int SharedListVector::size(){
217         return data.size();
218 }
219 /***********************************************************************/
220
221 void SharedListVector::clear(){
222         numBins = 0;
223         maxRank = 0;
224         numSeqs = 0;
225         return data.clear();
226         
227 }
228
229 /***********************************************************************/
230
231 void SharedListVector::print(ostream& output){
232         try {
233                 output << label << '\t' << numBins << '\t';
234         
235                 for(int i=0;i<data.size();i++){
236                         if(data[i] != ""){
237                                 output << data[i] << '\t';
238                         }
239                 }
240                 output << endl;
241         }
242         catch(exception& e) {
243                 m->errorOut(e, "SharedListVector", "print");
244                 exit(1);
245         }
246 }
247
248
249 /***********************************************************************/
250
251 RAbundVector SharedListVector::getRAbundVector(){
252         try {
253                 RAbundVector rav;
254         
255                 for(int i=0;i<data.size();i++){
256                         int binSize = m->getNumNames(data[i]);
257                         rav.push_back(binSize);
258                 }
259         
260         //  This was here before to output data in a nice format, but it screws up the name mapping steps
261         //      sort(rav.rbegin(), rav.rend());
262         //      
263         //      for(int i=data.size()-1;i>=0;i--){
264         //              if(rav.get(i) == 0){    rav.pop_back(); }
265         //              else{
266         //                      break;
267         //              }
268         //      }
269                 rav.setLabel(label);
270         
271                 return rav;
272         }
273         catch(exception& e) {
274                 m->errorOut(e, "SharedListVector", "getRAbundVector");
275                 exit(1);
276         }
277 }
278
279 /***********************************************************************/
280
281 SAbundVector SharedListVector::getSAbundVector(){
282         try {
283                 SAbundVector sav(maxRank+1);
284         
285                 for(int i=0;i<data.size();i++){
286                         int binSize = m->getNumNames(data[i]);  
287                         sav.set(binSize, sav.get(binSize) + 1); 
288                 }
289                 sav.set(0, 0);
290                 sav.setLabel(label);
291         
292                 return sav;
293         }
294         catch(exception& e) {
295                 m->errorOut(e, "SharedListVector", "getSAbundVector");
296                 exit(1);
297         }
298 }
299
300 /***********************************************************************/
301 SharedOrderVector* SharedListVector::getSharedOrderVector(){
302         try {
303                 SharedOrderVector* order = new SharedOrderVector();
304                 order->setLabel(label);
305         
306                 for(int i=0;i<numBins;i++){
307                         int binSize = m->getNumNames(get(i));   //find number of individual in given bin        
308                         string names = get(i);
309             vector<string> binNames;
310             m->splitAtComma(names, binNames);
311             if (m->groupMode != "group") {
312                 binSize = 0;
313                 for (int j = 0; j < binNames.size(); j++) {  binSize += countTable->getNumSeqs(binNames[i]);  }
314             }
315                         for (int j = 0; j < binNames.size(); j++) { 
316                 if (m->control_pressed) { return order; }
317                 if (m->groupMode == "group") {
318                     string groupName = groupmap->getGroup(binNames[i]);
319                     if(groupName == "not found") {      m->mothurOut("Error: Sequence '" + binNames[i] + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
320                                 
321                     order->push_back(i, binSize, groupName);  //i represents what bin you are in
322                 }else {
323                     vector<int> groupAbundances = countTable->getGroupCounts(binNames[i]);
324                     vector<string> groupNames = countTable->getNamesOfGroups();
325                     for (int k = 0; k < groupAbundances.size(); k++) { //groupAbundances.size() == 0 if there is a file mismatch and m->control_pressed is true.
326                         if (m->control_pressed) { return order; }
327                         for (int l = 0; l < groupAbundances[k]; l++) {  order->push_back(i, binSize, groupNames[k]);  }
328                     }
329                 }
330                         }
331                 }
332
333                 random_shuffle(order->begin(), order->end());
334                 order->updateStats();
335                 
336                 return order;
337         }
338         catch(exception& e) {
339                 m->errorOut(e, "SharedListVector", "getSharedOrderVector");
340                 exit(1);
341         }
342 }
343 /***********************************************************************/
344 SharedRAbundVector SharedListVector::getSharedRAbundVector(string groupName) {
345         try {
346         m->currentSharedBinLabels = binLabels;
347         
348                 SharedRAbundVector rav(data.size());
349                 
350                 for(int i=0;i<numBins;i++){
351                         string names = get(i);
352             vector<string> binNames;
353             m->splitAtComma(names, binNames);
354             for (int j = 0; j < binNames.size(); j++) { 
355                                 if (m->control_pressed) { return rav; }
356                 if (m->groupMode == "group") {
357                     string group = groupmap->getGroup(binNames[j]);
358                     if(group == "not found") {  m->mothurOut("Error: Sequence '" + binNames[j] + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
359                     if (group == groupName) { //this name is in the group you want the vector for.
360                         rav.set(i, rav.getAbundance(i) + 1, group);  //i represents what bin you are in
361                     }
362                 }else {
363                     int count = countTable->getGroupCount(binNames[j], groupName);
364                     rav.set(i, rav.getAbundance(i) + count, groupName);
365                 }
366                         }
367                 }
368                 
369                 rav.setLabel(label);
370                 rav.setGroup(groupName);
371
372                 return rav;
373                 
374         }
375         catch(exception& e) {
376                 m->errorOut(e, "SharedListVector", "getSharedRAbundVector");
377                 exit(1);
378         }
379 }
380 /***********************************************************************/
381 vector<SharedRAbundVector*> SharedListVector::getSharedRAbundVector() {
382         try {
383         m->currentSharedBinLabels = binLabels;
384         
385                 SharedUtil* util;
386                 util = new SharedUtil();
387                 vector<SharedRAbundVector*> lookup;  //contains just the groups the user selected
388         vector<SharedRAbundVector*> lookupDelete;
389                 map<string, SharedRAbundVector*> finder;  //contains all groups in groupmap
390                 
391                 vector<string> Groups = m->getGroups();
392         vector<string> allGroups;
393                 if (m->groupMode == "group") {  allGroups = groupmap->getNamesOfGroups();  }
394         else {  allGroups = countTable->getNamesOfGroups();  }
395                 util->setGroups(Groups, allGroups);
396                 m->setGroups(Groups);
397                 delete util;
398
399                 for (int i = 0; i < allGroups.size(); i++) {
400                         SharedRAbundVector* temp = new SharedRAbundVector(data.size());
401                         finder[allGroups[i]] = temp;
402                         finder[allGroups[i]]->setLabel(label);
403                         finder[allGroups[i]]->setGroup(allGroups[i]);
404                         if (m->inUsersGroups(allGroups[i], m->getGroups())) {  //if this group is in user groups
405                                 lookup.push_back(finder[allGroups[i]]);
406                         }else {
407                 lookupDelete.push_back(finder[allGroups[i]]);
408             }
409                 }
410         
411                 //fill vectors
412                 for(int i=0;i<numBins;i++){
413                         string names = get(i);  
414                         vector<string> binNames;
415             m->splitAtComma(names, binNames);
416             for (int j = 0; j < binNames.size(); j++) { 
417                 if (m->groupMode == "group") {
418                     string group = groupmap->getGroup(binNames[j]);
419                     if(group == "not found") {  m->mothurOut("Error: Sequence '" + binNames[j] + "' was not found in the group file, please correct."); m->mothurOutEndLine();  exit(1); }
420                     finder[group]->set(i, finder[group]->getAbundance(i) + 1, group);  //i represents what bin you are in       
421                 }else{
422                     vector<int> counts = countTable->getGroupCounts(binNames[j]);
423                     for (int k = 0; k < allGroups.size(); k++) {
424                         finder[allGroups[k]]->set(i, finder[allGroups[k]]->getAbundance(i) + counts[k], allGroups[k]);
425                     }
426                 }
427                         }
428                 }
429         
430         for (int j = 0; j < lookupDelete.size(); j++) {  delete lookupDelete[j];  }
431
432                 return lookup;
433         }
434         catch(exception& e) {
435                 m->errorOut(e, "SharedListVector", "getSharedRAbundVector");
436                 exit(1);
437         }
438 }
439
440 /***********************************************************************/
441 SharedSAbundVector SharedListVector::getSharedSAbundVector(string groupName) {
442         try { 
443                 SharedSAbundVector sav;
444                 SharedRAbundVector rav;
445                 
446                 rav = this->getSharedRAbundVector(groupName);
447                 sav = rav.getSharedSAbundVector();
448                 
449                 return sav;
450         }
451         catch(exception& e) {
452                 m->errorOut(e, "SharedListVector", "getSharedSAbundVector");
453                 exit(1);
454         }
455 }
456 /***********************************************************************/
457
458 OrderVector SharedListVector::getOrderVector(map<string,int>* orderMap = NULL){
459         
460         try {
461                 if(orderMap == NULL){
462                         OrderVector ov;
463                 
464                         for(int i=0;i<data.size();i++){
465                 string names = data[i];
466                 vector<string> binNames;
467                 m->splitAtComma(names, binNames);
468                                 int binSize = binNames.size();  
469                 if (m->groupMode != "group") {
470                     binSize = 0;
471                     for (int j = 0; j < binNames.size(); j++) {  binSize += countTable->getNumSeqs(binNames[i]);  }
472                 }
473                                 for(int j=0;j<binSize;j++){
474                                         ov.push_back(i);
475                                 }
476                         }
477                         random_shuffle(ov.begin(), ov.end());
478                         ov.setLabel(label);
479                         ov.getNumBins();
480                 
481                         return ov;
482                 
483                 }
484                 else{
485                         OrderVector ov(numSeqs);
486                 
487                         for(int i=0;i<data.size();i++){
488                                 string listOTU = data[i];
489                                 vector<string> binNames;
490                 m->splitAtComma(listOTU, binNames);
491                 for (int j = 0; j < binNames.size(); j++) { 
492                     if(orderMap->count(binNames[j]) == 0){
493                         m->mothurOut(binNames[j] + " not found, check *.names file\n");
494                         exit(1);
495                     }
496                     ov.set((*orderMap)[binNames[j]], i);
497                                 }
498                         }
499                 
500                         ov.setLabel(label);
501                         ov.getNumBins();
502                 
503                         return ov;              
504                 }
505         }
506         catch(exception& e) {
507                 m->errorOut(e, "SharedListVector", "getOrderVector");
508                 exit(1);
509         }
510 }
511
512 /***********************************************************************/
513