]> git.donarmstrong.com Git - mothur.git/blob - binsequencecommand.cpp
added smart distance feature and optimized all commands using line by line processing
[mothur.git] / binsequencecommand.cpp
1 /*
2  *  binsequencecommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/3/09.
6  *  Copyright 2009 Schloss Lab UMASS Amhers. All rights reserved.
7  *
8  */
9
10 #include "binsequencecommand.h"
11
12 //**********************************************************************************************************************
13 BinSeqCommand::BinSeqCommand(){
14         try {
15                 globaldata = GlobalData::getInstance();
16                 fastafile = globaldata->getFastaFile();
17                 namesfile = globaldata->getNameFile();
18                 groupfile = globaldata->getGroupFile();
19                 openInputFile(fastafile, in);
20                 
21                 if (groupfile != "") {
22                         //read in group map info.
23                         groupMap = new GroupMap(groupfile);
24                         groupMap->readMap();
25                 }
26                 
27                 fasta = new FastaMap();
28         }
29         catch(exception& e) {
30                 cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function BinSeqCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
31                 exit(1);
32         }
33         catch(...) {
34                 cout << "An unknown error has occurred in the BinSeqCommand class function BinSeqCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
35                 exit(1);
36         }       
37 }
38
39 //**********************************************************************************************************************
40
41 BinSeqCommand::~BinSeqCommand(){
42         delete input;
43         delete read;
44         delete fasta;
45         delete list;
46         if (groupfile != "") {
47                 delete groupMap;
48         }
49 }
50
51 //**********************************************************************************************************************
52
53 int BinSeqCommand::execute(){
54         try {
55                 int count = 1;
56                 int error = 0;
57                 
58                 //read fastafile
59                 fasta->readFastaFile(in);
60                 
61                 //set format to list so input can get listvector
62                 globaldata->setFormat("list");
63                 
64                 //if user gave a namesfile then use it
65                 if (namesfile != "") {
66                         readNamesFile();
67                 }
68                 
69                 //read list file
70                 read = new ReadOTUFile(globaldata->getListFile());      
71                 read->read(&*globaldata); 
72                 
73                 input = globaldata->ginput;
74                 list = globaldata->gListVector;
75                 ListVector* lastList = list;
76                 
77                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
78                 set<string> processedLabels;
79                 set<string> userLabels = globaldata->labels;
80
81                                 
82                 while((list != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) {
83                         
84                         if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){
85                                 
86                                 error = process(list, count);   
87                                 if (error == 1) { return 0; }   
88                                                         
89                                 processedLabels.insert(list->getLabel());
90                                 userLabels.erase(list->getLabel());
91
92                         }
93                         
94                         if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastList->getLabel()) != 1)) {
95                                 
96                                 error = process(lastList, count);       
97                                 if (error == 1) { return 0; }
98                                                                                                         
99                                 processedLabels.insert(lastList->getLabel());
100                                 userLabels.erase(lastList->getLabel());
101                                 
102                         }
103                         
104                         if (count != 1) { delete lastList; }
105                         lastList = list;                        
106
107                         list = input->getListVector();
108                         count++;
109                 }
110                 
111                 
112                 //output error messages about any remaining user labels
113                 set<string>::iterator it;
114                 bool needToRun = false;
115                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
116                         cout << "Your file does not include the label "<< *it; 
117                         if (processedLabels.count(lastList->getLabel()) != 1) {
118                                 cout << ". I will use " << lastList->getLabel() << "." << endl;
119                                 needToRun = true;
120                         }else {
121                                 cout << ". Please refer to " << lastList->getLabel() << "." << endl;
122                         }
123                 }
124                 
125                 //run last line if you need to
126                 if (needToRun == true)  {
127                         error = process(lastList, count);       
128                         if (error == 1) { return 0; }                   
129                 }
130                 
131                 delete lastList;
132                 return 0;
133         }
134         catch(exception& e) {
135                 cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
136                 exit(1);
137         }
138         catch(...) {
139                 cout << "An unknown error has occurred in the BinSeqCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
140                 exit(1);
141         }       
142 }
143
144 //**********************************************************************************************************************
145 void BinSeqCommand::readNamesFile() {
146         try {
147                 vector<string> dupNames;
148                 openInputFile(namesfile, inNames);
149                 
150                 string name, names, sequence;
151         
152                 while(inNames){
153                         inNames >> name;                        //read from first column  A
154                         inNames >> names;               //read from second column  A,B,C,D
155                         
156                         dupNames.clear();
157                         
158                         //parse names into vector
159                         splitAtComma(names, dupNames);
160                         
161                         //store names in fasta map
162                         sequence = fasta->getSequence(name);
163                         for (int i = 0; i < dupNames.size(); i++) {
164                                 fasta->push_back(dupNames[i], sequence);
165                         }
166                 
167                         gobble(inNames);
168                 }
169                 inNames.close();
170
171         }
172         catch(exception& e) {
173                 cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
174                 exit(1);
175         }
176         catch(...) {
177                 cout << "An unknown error has occurred in the BinSeqCommand class function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
178                 exit(1);
179         }       
180 }
181 //**********************************************************************************************************************
182 //return 1 if error, 0 otherwise
183 int BinSeqCommand::process(ListVector* list, int count) {
184         try {
185                                 string binnames, name, sequence;
186                                 string outputFileName = getRootName(globaldata->getListFile()) + list->getLabel() + ".fasta";
187                                 openOutputFile(outputFileName, out);
188
189                                 cout << list->getLabel() << '\t' << count << endl;
190                                 
191                                 //for each bin in the list vector
192                                 for (int i = 0; i < list->size(); i++) {
193
194                                         binnames = list->get(i);
195                                         while (binnames.find_first_of(',') != -1) { 
196                                                 name = binnames.substr(0,binnames.find_first_of(','));
197                                                 binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
198                                                 
199                                                 //do work for that name
200                                                 sequence = fasta->getSequence(name);
201                                                 if (sequence != "not found") {
202                                                         //if you don't have groups
203                                                         if (groupfile == "") {
204                                                                 name = name + "|" + toString(i+1);
205                                                                 out << ">" << name << endl;
206                                                                 out << sequence << endl;
207                                                         }else {//if you do have groups
208                                                                 string group = groupMap->getGroup(name);
209                                                                 if (group == "not found") {  
210                                                                         cout << name << " is missing from your group file. Please correct. " << endl;
211                                                                         remove(outputFileName.c_str());
212                                                                         return 1;
213                                                                 }else{
214                                                                         name = name + "|" + group + "|" + toString(i+1);
215                                                                         out << ">" << name << endl;
216                                                                         out << sequence << endl;
217                                                                 }
218                                                         }
219                                                 }else { 
220                                                         cout << name << " is missing from your fasta or name file. Please correct. " << endl; 
221                                                         remove(outputFileName.c_str());
222                                                         return 1;
223                                                 }
224                                                 
225                                         }
226                                         
227                                         //get last name
228                                         sequence = fasta->getSequence(binnames);
229                                         if (sequence != "not found") {
230                                                 //if you don't have groups
231                                                 if (groupfile == "") {
232                                                         binnames = binnames + "|" + toString(i+1);
233                                                         out << ">" << binnames << endl;
234                                                         out << sequence << endl;
235                                                 }else {//if you do have groups
236                                                         string group = groupMap->getGroup(binnames);
237                                                         if (group == "not found") {  
238                                                                 cout << binnames << " is missing from your group file. Please correct. " << endl;
239                                                                 remove(outputFileName.c_str());
240                                                                 return 1;
241                                                         }else{
242                                                                 binnames = binnames + "|" + group + "|" + toString(i+1);
243                                                                 out << ">" << binnames << endl;
244                                                                 out << sequence << endl;
245                                                         }
246                                                 }
247                                         }else { 
248                                                 cout << binnames << " is missing from your fasta or name file. Please correct. " << endl; 
249                                                 remove(outputFileName.c_str());
250                                                 return 1;
251                                         }
252                                 }
253                                         
254                                 out.close();
255                                 return 0;
256
257         }
258         catch(exception& e) {
259                 cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
260                 exit(1);
261         }
262         catch(...) {
263                 cout << "An unknown error has occurred in the BinSeqCommand class function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
264                 exit(1);
265         }       
266 }
267 //**********************************************************************************************************************
268
269