]> git.donarmstrong.com Git - mothur.git/blob - binsequencecommand.cpp
broke apart read matrix file
[mothur.git] / binsequencecommand.cpp
1 /*
2  *  binsequencecommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/3/09.
6  *  Copyright 2009 Schloss Lab UMASS Amhers. All rights reserved.
7  *
8  */
9
10 #include "binsequencecommand.h"
11
12 //**********************************************************************************************************************
13 BinSeqCommand::BinSeqCommand(){
14         try {
15                 globaldata = GlobalData::getInstance();
16                 fastafile = globaldata->getFastaFile();
17                 namesfile = globaldata->getNameFile();
18                 openInputFile(fastafile, in);
19                 
20                 fasta = new FastaMap();
21         }
22         catch(exception& e) {
23                 cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function BinSeqCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
24                 exit(1);
25         }
26         catch(...) {
27                 cout << "An unknown error has occurred in the BinSeqCommand class function BinSeqCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
28                 exit(1);
29         }       
30 }
31
32 //**********************************************************************************************************************
33
34 BinSeqCommand::~BinSeqCommand(){
35         delete input;
36         delete read;
37         delete fasta;
38         delete list;
39 }
40
41 //**********************************************************************************************************************
42
43 int BinSeqCommand::execute(){
44         try {
45                 int count = 1;
46                 string binnames, name, sequence;
47                 
48                 //read fastafile
49                 fasta->readFastaFile(in);
50                 
51                 //set format to list so input can get listvector
52                 globaldata->setFormat("list");
53                 
54                 //if user gave a namesfile then use it
55                 if (namesfile != "") {
56                         readNamesFile();
57                 }
58                 
59                 //read list file
60                 read = new ReadOTUFile(globaldata->getListFile());      
61                 read->read(&*globaldata); 
62                 
63                 input = globaldata->ginput;
64                 list = globaldata->gListVector;
65                                 
66                 while(list != NULL){
67                         
68                         if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){
69                                 
70                                 //make new folder for bin info
71                                 //string foldername = "/" + getRootName(globaldata->getListFile()) + list->getLabel() + ".bins/";
72                         //      mkdir(foldername.c_str()); 
73                         
74                                 string outputFileName = getRootName(globaldata->getListFile()) + list->getLabel() + ".fasta";
75                                 openOutputFile(outputFileName, out);
76
77                                 cout << list->getLabel() << '\t' << count << endl;
78                                 
79                                 //for each bin in the list vector
80                                 for (int i = 0; i < list->size(); i++) {
81                                 
82                                         //create output file
83                                         //string outputFileName = foldername + getRootName(globaldata->getListFile()) + "bin" + toString(i+1) + ".fasta";
84                                         //openOutputFile(outputFileName, out);
85
86                                         binnames = list->get(i);
87                                         while (binnames.find_first_of(',') != -1) { 
88                                                 name = binnames.substr(0,binnames.find_first_of(','));
89                                                 binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
90                                                 
91                                                 //do work for that name
92                                                 sequence = fasta->getSequence(name);
93                                                 if (sequence != "not found") {
94                                                         name = name + "|" + toString(i+1);
95                                                         out << ">" << name << endl;
96                                                         out << sequence << endl;
97                                                 }else { 
98                                                         cout << name << " is missing from your fasta or name file. Please correct. " << endl; 
99                                                         remove(outputFileName.c_str());
100                                                         return 0;
101                                                 }
102                                                 
103                                         }
104                                         
105                                         //get last name
106                                         sequence = fasta->getSequence(binnames);
107                                         if (sequence != "not found") {
108                                                 name = binnames + '|' + toString(i+1);
109                                                 out << ">" << name << endl;
110                                                 out << sequence << endl;
111                                         }else { 
112                                                 cout << binnames << " is missing from your fasta or name file. Please correct. " << endl; 
113                                                 remove(outputFileName.c_str());
114                                                 return 0;
115                                         }
116                                         //out.close();
117                                 }
118                                 out.close();
119                         }
120                         
121                         list = input->getListVector();
122                         count++;
123                 }
124                 
125                 return 0;
126         }
127         catch(exception& e) {
128                 cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
129                 exit(1);
130         }
131         catch(...) {
132                 cout << "An unknown error has occurred in the BinSeqCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
133                 exit(1);
134         }       
135 }
136
137 //**********************************************************************************************************************
138 void BinSeqCommand::readNamesFile() {
139         try {
140                 vector<string> dupNames;
141                 openInputFile(namesfile, inNames);
142                 
143                 string name, names, sequence;
144         
145                 while(inNames){
146                         inNames >> name;                        //read from first column  A
147                         inNames >> names;               //read from second column  A,B,C,D
148                         
149                         dupNames.clear();
150                         
151                         //parse names into vector
152                         splitAtComma(names, dupNames);
153                         
154                         //store names in fasta map
155                         sequence = fasta->getSequence(name);
156                         for (int i = 0; i < dupNames.size(); i++) {
157                                 fasta->push_back(dupNames[i], sequence);
158                         }
159                 
160                         gobble(inNames);
161                 }
162                 inNames.close();
163
164         }
165         catch(exception& e) {
166                 cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
167                 exit(1);
168         }
169         catch(...) {
170                 cout << "An unknown error has occurred in the BinSeqCommand class function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
171                 exit(1);
172         }       
173 }
174 //**********************************************************************************************************************
175
176
177