5 * Created by Pat Schloss on 8/8/08.
6 * Copyright 2008 Patrick D. Schloss. All rights reserved.
11 #include "sabundvector.hpp"
12 #include "rabundvector.hpp"
13 #include "ordervector.hpp"
14 #include "listvector.hpp"
16 //sorts highest to lowest
17 /***********************************************************************/
18 inline bool abundNamesSort(string left, string right){
23 for(int i=0;i<left.size();i++){ if(left[i] == ','){ countLeft++; } }
29 for(int i=0;i<right.size();i++){ if(right[i] == ','){ countRight++; } }
32 if (countLeft > countRight) {
38 /***********************************************************************/
40 ListVector::ListVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0){}
42 /***********************************************************************/
44 ListVector::ListVector(int n): DataVector(), data(n, "") , maxRank(0), numBins(0), numSeqs(0){}
46 /***********************************************************************/
48 ListVector::ListVector(string id, vector<string> lv) : DataVector(id), data(lv){
50 for(int i=0;i<data.size();i++){
52 int binSize = m->getNumNames(data[i]);
54 if(binSize > maxRank) { maxRank = binSize; }
60 m->errorOut(e, "ListVector", "ListVector");
65 /**********************************************************************/
67 ListVector::ListVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) {
71 //are we at the beginning of the file??
72 if (m->saveNextLabel == "") {
75 //is this a shared file that has headers
76 if (label == "label") {
79 f >> label; m->gobble(f);
82 label = m->getline(f); m->gobble(f);
84 //parse labels to save
85 istringstream iStringStream(label);
86 m->listBinLabelsInFile.clear();
87 while(!iStringStream.eof()){
88 if (m->control_pressed) { break; }
90 iStringStream >> temp; m->gobble(iStringStream);
92 m->listBinLabelsInFile.push_back(temp);
100 //make binlabels because we don't have any
101 string snumBins = toString(hold);
102 m->listBinLabelsInFile.clear();
103 for (int i = 0; i < hold; i++) {
104 //if there is a bin label use it otherwise make one
105 string binLabel = "Otu";
106 string sbinNumber = toString(i+1);
107 if (sbinNumber.length() < snumBins.length()) {
108 int diff = snumBins.length() - sbinNumber.length();
109 for (int h = 0; h < diff; h++) { binLabel += "0"; }
111 binLabel += sbinNumber;
112 m->listBinLabelsInFile.push_back(binLabel);
115 m->saveNextLabel = label;
118 m->saveNextLabel = label;
121 binLabels.assign(m->listBinLabelsInFile.begin(), m->listBinLabelsInFile.begin()+hold);
123 data.assign(hold, "");
124 string inputData = "";
126 for(int i=0;i<hold;i++){
132 if (f.eof()) { m->saveNextLabel = ""; }
134 catch(exception& e) {
135 m->errorOut(e, "ListVector", "ListVector");
140 /***********************************************************************/
142 void ListVector::set(int binNumber, string seqNames){
144 int nNames_old = m->getNumNames(data[binNumber]);
145 data[binNumber] = seqNames;
146 int nNames_new = m->getNumNames(seqNames);
148 if(nNames_old == 0) { numBins++; }
149 if(nNames_new == 0) { numBins--; }
150 if(nNames_new > maxRank) { maxRank = nNames_new; }
152 numSeqs += (nNames_new - nNames_old);
154 catch(exception& e) {
155 m->errorOut(e, "ListVector", "set");
160 /***********************************************************************/
162 string ListVector::get(int index){
165 /***********************************************************************/
167 void ListVector::setLabels(vector<string> labels){
171 catch(exception& e) {
172 m->errorOut(e, "ListVector", "setLabels");
177 /***********************************************************************/
178 //could potentially end up with duplicate binlabel names with code below.
179 //we don't currently use them in a way that would do that.
180 //if you had a listfile that had been subsampled and then added to it, dup names would be possible.
181 vector<string> ListVector::getLabels(){
184 string tagHeader = "Otu";
185 if (m->sharedHeaderMode == "tax") { tagHeader = "PhyloType"; }
187 if (binLabels.size() < data.size()) {
188 string snumBins = toString(numBins);
190 for (int i = 0; i < numBins; i++) {
191 string binLabel = tagHeader;
193 if (i < binLabels.size()) { //label exists, check leading zeros length
194 string sbinNumber = m->getSimpleLabel(binLabels[i]);
195 if (sbinNumber.length() < snumBins.length()) {
196 int diff = snumBins.length() - sbinNumber.length();
197 for (int h = 0; h < diff; h++) { binLabel += "0"; }
199 binLabel += sbinNumber;
200 binLabels[i] = binLabel;
202 string sbinNumber = toString(i+1);
203 if (sbinNumber.length() < snumBins.length()) {
204 int diff = snumBins.length() - sbinNumber.length();
205 for (int h = 0; h < diff; h++) { binLabel += "0"; }
207 binLabel += sbinNumber;
208 binLabels.push_back(binLabel);
214 catch(exception& e) {
215 m->errorOut(e, "ListVector", "getLabels");
220 /***********************************************************************/
222 void ListVector::push_back(string seqNames){
224 data.push_back(seqNames);
225 int nNames = m->getNumNames(seqNames);
229 if(nNames > maxRank) { maxRank = nNames; }
233 catch(exception& e) {
234 m->errorOut(e, "ListVector", "push_back");
239 /***********************************************************************/
241 void ListVector::resize(int size){
245 /***********************************************************************/
247 int ListVector::size(){
250 /***********************************************************************/
252 void ListVector::clear(){
260 /***********************************************************************/
261 void ListVector::printHeaders(ostream& output){
263 string snumBins = toString(numBins);
264 output << "label\tnumOtus\t";
265 if (m->sharedHeaderMode == "tax") {
266 for (int i = 0; i < numBins; i++) {
268 //if there is a bin label use it otherwise make one
269 string binLabel = "PhyloType";
270 string sbinNumber = toString(i+1);
271 if (sbinNumber.length() < snumBins.length()) {
272 int diff = snumBins.length() - sbinNumber.length();
273 for (int h = 0; h < diff; h++) { binLabel += "0"; }
275 binLabel += sbinNumber;
276 if (i < binLabels.size()) { binLabel = binLabels[i]; }
278 output << binLabel << '\t';
282 for (int i = 0; i < numBins; i++) {
283 //if there is a bin label use it otherwise make one
284 string binLabel = "Otu";
285 string sbinNumber = toString(i+1);
286 if (sbinNumber.length() < snumBins.length()) {
287 int diff = snumBins.length() - sbinNumber.length();
288 for (int h = 0; h < diff; h++) { binLabel += "0"; }
290 binLabel += sbinNumber;
291 if (i < binLabels.size()) { binLabel = binLabels[i]; }
293 output << binLabel << '\t';
298 m->printedListHeaders = true;
300 catch(exception& e) {
301 m->errorOut(e, "ListVector", "printHeaders");
306 /***********************************************************************/
308 void ListVector::print(ostream& output){
310 output << label << '\t' << numBins << '\t';
312 vector<string> hold = data;
313 sort(hold.begin(), hold.end(), abundNamesSort);
315 for(int i=0;i<hold.size();i++){
317 output << hold[i] << '\t';
322 catch(exception& e) {
323 m->errorOut(e, "ListVector", "print");
329 /***********************************************************************/
331 RAbundVector ListVector::getRAbundVector(){
335 for(int i=0;i<data.size();i++){
336 int binSize = m->getNumNames(data[i]);
337 rav.push_back(binSize);
340 // This was here before to output data in a nice format, but it screws up the name mapping steps
341 // sort(rav.rbegin(), rav.rend());
343 // for(int i=data.size()-1;i>=0;i--){
344 // if(rav.get(i) == 0){ rav.pop_back(); }
353 catch(exception& e) {
354 m->errorOut(e, "ListVector", "getRAbundVector");
359 /***********************************************************************/
361 SAbundVector ListVector::getSAbundVector(){
363 SAbundVector sav(maxRank+1);
365 for(int i=0;i<data.size();i++){
366 int binSize = m->getNumNames(data[i]);
367 sav.set(binSize, sav.get(binSize) + 1);
374 catch(exception& e) {
375 m->errorOut(e, "ListVector", "getSAbundVector");
380 /***********************************************************************/
382 OrderVector ListVector::getOrderVector(map<string,int>* orderMap = NULL){
385 if(orderMap == NULL){
388 for(int i=0;i<data.size();i++){
389 int binSize = m->getNumNames(data[i]);
390 for(int j=0;j<binSize;j++){
394 random_shuffle(ov.begin(), ov.end());
402 OrderVector ov(numSeqs);
404 for(int i=0;i<data.size();i++){
405 string listOTU = data[i];
406 int length = listOTU.size();
410 for(int j=0;j<length;j++){
412 if(listOTU[j] != ','){
413 seqName += listOTU[j];
416 if(orderMap->count(seqName) == 0){
417 m->mothurOut(seqName + " not found, check *.names file\n");
421 ov.set((*orderMap)[seqName], i);
426 if(orderMap->count(seqName) == 0){
427 m->mothurOut(seqName + " not found, check *.names file\n");
430 ov.set((*orderMap)[seqName], i);
439 catch(exception& e) {
440 m->errorOut(e, "ListVector", "getOrderVector");
445 /***********************************************************************/