]> git.donarmstrong.com Git - mothur.git/blob - binsequencecommand.cpp
added get.rabund and get.sabund command and fixed bug introduced by line by line...
[mothur.git] / binsequencecommand.cpp
1 /*
2  *  binsequencecommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/3/09.
6  *  Copyright 2009 Schloss Lab UMASS Amhers. All rights reserved.
7  *
8  */
9
10 #include "binsequencecommand.h"
11
12 //**********************************************************************************************************************
13 BinSeqCommand::BinSeqCommand(){
14         try {
15                 globaldata = GlobalData::getInstance();
16                 fastafile = globaldata->getFastaFile();
17                 namesfile = globaldata->getNameFile();
18                 groupfile = globaldata->getGroupFile();
19                 openInputFile(fastafile, in);
20                 
21                 if (groupfile != "") {
22                         //read in group map info.
23                         groupMap = new GroupMap(groupfile);
24                         groupMap->readMap();
25                 }
26                 
27                 fasta = new FastaMap();
28         }
29         catch(exception& e) {
30                 cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function BinSeqCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
31                 exit(1);
32         }
33         catch(...) {
34                 cout << "An unknown error has occurred in the BinSeqCommand class function BinSeqCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
35                 exit(1);
36         }       
37 }
38
39 //**********************************************************************************************************************
40
41 BinSeqCommand::~BinSeqCommand(){
42         delete input;
43         delete read;
44         delete fasta;
45         delete list;
46         if (groupfile != "") {
47                 delete groupMap;
48         }
49 }
50
51 //**********************************************************************************************************************
52
53 int BinSeqCommand::execute(){
54         try {
55                 int count = 1;
56                 int error = 0;
57                 
58                 //read fastafile
59                 fasta->readFastaFile(in);
60                 
61                 //set format to list so input can get listvector
62                 globaldata->setFormat("list");
63                 
64                 //if user gave a namesfile then use it
65                 if (namesfile != "") {
66                         readNamesFile();
67                 }
68                 
69                 //read list file
70                 read = new ReadOTUFile(globaldata->getListFile());      
71                 read->read(&*globaldata); 
72                 
73                 input = globaldata->ginput;
74                 list = globaldata->gListVector;
75                 ListVector* lastList = list;
76                 
77                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
78                 set<string> processedLabels;
79                 set<string> userLabels = globaldata->labels;
80                 set<int> userLines = globaldata->lines;
81
82                                 
83                 while((list != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
84                         
85                         if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){
86                                 
87                                 error = process(list, count);   
88                                 if (error == 1) { return 0; }   
89                                                         
90                                 processedLabels.insert(list->getLabel());
91                                 userLabels.erase(list->getLabel());
92                                 userLines.erase(count);
93                         }
94                         
95                         if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastList->getLabel()) != 1)) {
96                                 
97                                 error = process(lastList, count);       
98                                 if (error == 1) { return 0; }
99                                                                                                         
100                                 processedLabels.insert(lastList->getLabel());
101                                 userLabels.erase(lastList->getLabel());
102                                 
103                         }
104                         
105                         if (count != 1) { delete lastList; }
106                         lastList = list;                        
107
108                         list = input->getListVector();
109                         count++;
110                 }
111                 
112                 
113                 //output error messages about any remaining user labels
114                 set<string>::iterator it;
115                 bool needToRun = false;
116                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
117                         cout << "Your file does not include the label "<< *it; 
118                         if (processedLabels.count(lastList->getLabel()) != 1) {
119                                 cout << ". I will use " << lastList->getLabel() << "." << endl;
120                                 needToRun = true;
121                         }else {
122                                 cout << ". Please refer to " << lastList->getLabel() << "." << endl;
123                         }
124                 }
125                 
126                 //run last line if you need to
127                 if (needToRun == true)  {
128                         error = process(lastList, count);       
129                         if (error == 1) { return 0; }                   
130                 }
131                 
132                 delete lastList;
133                 return 0;
134         }
135         catch(exception& e) {
136                 cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
137                 exit(1);
138         }
139         catch(...) {
140                 cout << "An unknown error has occurred in the BinSeqCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
141                 exit(1);
142         }       
143 }
144
145 //**********************************************************************************************************************
146 void BinSeqCommand::readNamesFile() {
147         try {
148                 vector<string> dupNames;
149                 openInputFile(namesfile, inNames);
150                 
151                 string name, names, sequence;
152         
153                 while(inNames){
154                         inNames >> name;                        //read from first column  A
155                         inNames >> names;               //read from second column  A,B,C,D
156                         
157                         dupNames.clear();
158                         
159                         //parse names into vector
160                         splitAtComma(names, dupNames);
161                         
162                         //store names in fasta map
163                         sequence = fasta->getSequence(name);
164                         for (int i = 0; i < dupNames.size(); i++) {
165                                 fasta->push_back(dupNames[i], sequence);
166                         }
167                 
168                         gobble(inNames);
169                 }
170                 inNames.close();
171
172         }
173         catch(exception& e) {
174                 cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
175                 exit(1);
176         }
177         catch(...) {
178                 cout << "An unknown error has occurred in the BinSeqCommand class function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
179                 exit(1);
180         }       
181 }
182 //**********************************************************************************************************************
183 //return 1 if error, 0 otherwise
184 int BinSeqCommand::process(ListVector* list, int count) {
185         try {
186                                 string binnames, name, sequence;
187                                 string outputFileName = getRootName(globaldata->getListFile()) + list->getLabel() + ".fasta";
188                                 openOutputFile(outputFileName, out);
189
190                                 cout << list->getLabel() << '\t' << count << endl;
191                                 
192                                 //for each bin in the list vector
193                                 for (int i = 0; i < list->size(); i++) {
194
195                                         binnames = list->get(i);
196                                         while (binnames.find_first_of(',') != -1) { 
197                                                 name = binnames.substr(0,binnames.find_first_of(','));
198                                                 binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
199                                                 
200                                                 //do work for that name
201                                                 sequence = fasta->getSequence(name);
202                                                 if (sequence != "not found") {
203                                                         //if you don't have groups
204                                                         if (groupfile == "") {
205                                                                 name = name + "|" + toString(i+1);
206                                                                 out << ">" << name << endl;
207                                                                 out << sequence << endl;
208                                                         }else {//if you do have groups
209                                                                 string group = groupMap->getGroup(name);
210                                                                 if (group == "not found") {  
211                                                                         cout << name << " is missing from your group file. Please correct. " << endl;
212                                                                         remove(outputFileName.c_str());
213                                                                         return 1;
214                                                                 }else{
215                                                                         name = name + "|" + group + "|" + toString(i+1);
216                                                                         out << ">" << name << endl;
217                                                                         out << sequence << endl;
218                                                                 }
219                                                         }
220                                                 }else { 
221                                                         cout << name << " is missing from your fasta or name file. Please correct. " << endl; 
222                                                         remove(outputFileName.c_str());
223                                                         return 1;
224                                                 }
225                                                 
226                                         }
227                                         
228                                         //get last name
229                                         sequence = fasta->getSequence(binnames);
230                                         if (sequence != "not found") {
231                                                 //if you don't have groups
232                                                 if (groupfile == "") {
233                                                         binnames = binnames + "|" + toString(i+1);
234                                                         out << ">" << binnames << endl;
235                                                         out << sequence << endl;
236                                                 }else {//if you do have groups
237                                                         string group = groupMap->getGroup(binnames);
238                                                         if (group == "not found") {  
239                                                                 cout << binnames << " is missing from your group file. Please correct. " << endl;
240                                                                 remove(outputFileName.c_str());
241                                                                 return 1;
242                                                         }else{
243                                                                 binnames = binnames + "|" + group + "|" + toString(i+1);
244                                                                 out << ">" << binnames << endl;
245                                                                 out << sequence << endl;
246                                                         }
247                                                 }
248                                         }else { 
249                                                 cout << binnames << " is missing from your fasta or name file. Please correct. " << endl; 
250                                                 remove(outputFileName.c_str());
251                                                 return 1;
252                                         }
253                                 }
254                                         
255                                 out.close();
256                                 return 0;
257
258         }
259         catch(exception& e) {
260                 cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
261                 exit(1);
262         }
263         catch(...) {
264                 cout << "An unknown error has occurred in the BinSeqCommand class function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
265                 exit(1);
266         }       
267 }
268 //**********************************************************************************************************************
269
270