]> git.donarmstrong.com Git - mothur.git/blob - filterseqscommand.cpp
added checks for ^C to quit command instead of program
[mothur.git] / filterseqscommand.cpp
1 /*
2  *  filterseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Thomas Ryabin on 5/4/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "filterseqscommand.h"
11 #include "sequence.hpp"
12
13 /**************************************************************************************/
14
15 FilterSeqsCommand::FilterSeqsCommand(string option)  {
16         try {
17                 abort = false;
18                 filterFileName = "";
19                 
20                 //allow user to run help
21                 if(option == "help") { help(); abort = true; }
22                 
23                 else {
24                         //valid paramters for this command
25                         string Array[] =  {"fasta", "trump", "soft", "hard", "vertical", "outputdir","inputdir"};
26                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
27                         
28                         OptionParser parser(option);
29                         map<string,string> parameters = parser.getParameters();
30                         
31                         ValidParameters validParameter;
32                         map<string,string>::iterator it;
33                         
34                         //check to make sure all parameters are valid for command
35                         for (it = parameters.begin(); it != parameters.end(); it++) { 
36                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
37                         }
38                         
39                         //if the user changes the input directory command factory will send this info to us in the output parameter 
40                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
41                         if (inputDir == "not found"){   inputDir = "";          }
42                         else {
43                                 string path;
44                                 it = parameters.find("fasta");
45                                 //user has given a template file
46                                 if(it != parameters.end()){ 
47                                         path = hasPath(it->second);
48                                         //if the user has not given a path then, add inputdir. else leave path alone.
49                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
50                                 }
51                                 
52                                 it = parameters.find("hard");
53                                 //user has given a template file
54                                 if(it != parameters.end()){ 
55                                         path = hasPath(it->second);
56                                         //if the user has not given a path then, add inputdir. else leave path alone.
57                                         if (path == "") {       parameters["hard"] = inputDir + it->second;             }
58                                 }
59                         }
60                         
61                         //check for required parameters
62                         fasta = validParameter.validFile(parameters, "fasta", false);
63                         if (fasta == "not found") { m->mothurOut("fasta is a required parameter for the filter.seqs command."); m->mothurOutEndLine(); abort = true;  }
64                         else { 
65                                 splitAtDash(fasta, fastafileNames);
66                                 
67                                 //go through files and make sure they are good, if not, then disregard them
68                                 for (int i = 0; i < fastafileNames.size(); i++) {
69                                         if (inputDir != "") {
70                                                 string path = hasPath(fastafileNames[i]);
71                                                 //if the user has not given a path then, add inputdir. else leave path alone.
72                                                 if (path == "") {       fastafileNames[i] = inputDir + fastafileNames[i];               }
73                                         }
74
75                                         int ableToOpen;
76                                         ifstream in;
77                                         ableToOpen = openInputFile(fastafileNames[i], in);
78                                         if (ableToOpen == 1) { 
79                                                 m->mothurOut(fastafileNames[i] + " will be disregarded."); m->mothurOutEndLine(); 
80                                                 //erase from file list
81                                                 fastafileNames.erase(fastafileNames.begin()+i);
82                                                 i--;
83                                         }else{  
84                                                 string simpleName = getSimpleName(fastafileNames[i]);
85                                                 filterFileName += simpleName.substr(0, simpleName.find_first_of('.'));
86                                         }
87                                         in.close();
88                                 }
89                                 
90                                 //make sure there is at least one valid file left
91                                 if (fastafileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
92                         }
93                         
94                         if (!abort) {
95                                 //if the user changes the output directory command factory will send this info to us in the output parameter 
96                                 outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
97                                         outputDir = ""; 
98                                         outputDir += hasPath(fastafileNames[0]); //if user entered a file with a path then preserve it  
99                                 }
100                         }
101                         //check for optional parameter and set defaults
102                         // ...at some point should added some additional type checking...
103                         
104                         string temp;
105                         temp = validParameter.validFile(parameters, "trump", false);                    if (temp == "not found") { temp = "*"; }
106                         trump = temp[0];
107                         
108                         temp = validParameter.validFile(parameters, "soft", false);                             if (temp == "not found") { soft = 0; }
109                         else {  soft = (float)atoi(temp.c_str()) / 100.0;  }
110                         
111                         hard = validParameter.validFile(parameters, "hard", true);                              if (hard == "not found") { hard = ""; }
112                         else if (hard == "not open") { abort = true; }  
113                         
114                         vertical = validParameter.validFile(parameters, "vertical", false);             if (vertical == "not found") { vertical = "T"; }
115                         
116                         numSeqs = 0;
117                         
118                         if (abort == false) {
119                         
120                                 if (soft != 0)                  {  F.setSoft(soft);             }
121                                 if (trump != '*')               {  F.setTrump(trump);   }
122                         
123                         }
124                                                 
125                 }
126                 
127         }
128         catch(exception& e) {
129                 m->errorOut(e, "FilterSeqsCommand", "FilterSeqsCommand");
130                 exit(1);
131         }
132 }
133
134 //**********************************************************************************************************************
135
136 void FilterSeqsCommand::help(){
137         try {
138                 m->mothurOut("The filter.seqs command reads a file containing sequences and creates a .filter and .filter.fasta file.\n");
139                 m->mothurOut("The filter.seqs command parameters are fasta, trump, soft, hard and vertical. \n");
140                 m->mothurOut("The fasta parameter is required. You may enter several fasta files to build the filter from and filter, by separating their names with -'s.\n");
141                 m->mothurOut("For example: fasta=abrecovery.fasta-amazon.fasta \n");
142                 m->mothurOut("The trump parameter .... The default is ...\n");
143                 m->mothurOut("The soft parameter .... The default is ....\n");
144                 m->mothurOut("The hard parameter .... The default is ....\n");
145                 m->mothurOut("The vertical parameter .... The default is T.\n");
146                 m->mothurOut("The filter.seqs command should be in the following format: \n");
147                 m->mothurOut("filter.seqs(fasta=yourFastaFile, trump=yourTrump, soft=yourSoft, hard=yourHard, vertical=yourVertical) \n");
148                 m->mothurOut("Example filter.seqs(fasta=abrecovery.fasta, trump=..., soft=..., hard=..., vertical=T).\n");
149                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
150                 
151         }
152         catch(exception& e) {
153                 m->errorOut(e, "FilterSeqsCommand", "help");
154                 exit(1);
155         }
156 }
157
158 /**************************************************************************************/
159
160 int FilterSeqsCommand::execute() {      
161         try {
162         
163                 if (abort == true) { return 0; }
164                 vector<string> outputNames;
165                 
166                 ifstream inFASTA;
167                 openInputFile(fastafileNames[0], inFASTA);
168                 
169                 Sequence testSeq(inFASTA);
170                 alignmentLength = testSeq.getAlignLength();
171                 inFASTA.close();
172                 
173                 F.setLength(alignmentLength);
174                 
175                 if(soft != 0 || isTrue(vertical)){
176                         F.initialize();
177                 }
178                 
179                 if(hard.compare("") != 0)       {       F.doHard(hard);         }
180                 else                                            {       F.setFilter(string(alignmentLength, '1'));      }
181
182                 if(trump != '*' || isTrue(vertical) || soft != 0){
183                         for (int i = 0; i < fastafileNames.size(); i++) {
184                                 ifstream in;
185                                 openInputFile(fastafileNames[i], in);
186                                 
187                                 while(!in.eof()){       //read through and create the filter...
188                                 
189                                         if (m->control_pressed) { in.close(); return 0; }
190                                         
191                                         Sequence seq(in);
192                                         if (seq.getName() != "") {
193                                                 if(trump != '*'){       F.doTrump(seq); }
194                                                 if(isTrue(vertical) || soft != 0){      F.getFreqs(seq);        }
195                                                 numSeqs++;
196                                                 cout.flush();
197                                         }
198                                 }
199                                 in.close();
200                         }
201                 
202                 }
203                 F.setNumSeqs(numSeqs);
204                 
205                 
206                 if(isTrue(vertical) == 1)       {       F.doVertical(); }
207                 if(soft != 0)                           {       F.doSoft();             }
208                 
209                 filter = F.getFilter();
210
211                 ofstream outFilter;
212                 
213                 string filterFile = outputDir + filterFileName + ".filter";
214                 openOutputFile(filterFile, outFilter);
215                 outFilter << filter << endl;
216                 outFilter.close();
217                 outputNames.push_back(filterFile);
218                 
219                 numSeqs = 0;
220                 for (int i = 0; i < fastafileNames.size(); i++) {
221                         ifstream in;
222                         openInputFile(fastafileNames[i], in);
223                         string filteredFasta = outputDir + getRootName(getSimpleName(fastafileNames[i])) + "filter.fasta";
224                         ofstream outFASTA;
225                         openOutputFile(filteredFasta, outFASTA);
226                         outputNames.push_back(filteredFasta);
227                         
228                         
229                         while(!in.eof()){
230                                 if (m->control_pressed) { in.close(); outFASTA.close(); for(int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }  return 0; }
231                                 
232                                 Sequence seq(in);
233                                 if (seq.getName() != "") {
234                                         string align = seq.getAligned();
235                                         string filterSeq = "";
236                                         
237                                         for(int j=0;j<alignmentLength;j++){
238                                                 if(filter[j] == '1'){
239                                                         filterSeq += align[j];
240                                                 }
241                                         }
242                                         
243                                         outFASTA << '>' << seq.getName() << endl << filterSeq << endl;
244                                         numSeqs++;
245                                 }
246                                 gobble(in);
247                         }
248                         outFASTA.close();
249                         in.close();
250                 }
251                 
252                 int filteredLength = 0;
253                 for(int i=0;i<alignmentLength;i++){
254                         if(filter[i] == '1'){   filteredLength++;       }
255                 }
256                 
257                 if (m->control_pressed) {  for(int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }  return 0; }
258
259                 
260                 m->mothurOutEndLine();
261                 m->mothurOut("Length of filtered alignment: " + toString(filteredLength)); m->mothurOutEndLine();
262                 m->mothurOut("Number of columns removed: " + toString((alignmentLength-filteredLength))); m->mothurOutEndLine();
263                 m->mothurOut("Length of the original alignment: " + toString(alignmentLength)); m->mothurOutEndLine();
264                 m->mothurOut("Number of sequences used to construct filter: " + toString(numSeqs)); m->mothurOutEndLine();
265                 
266                 
267                 m->mothurOutEndLine();
268                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
269                 for(int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();      }
270                 m->mothurOutEndLine();
271
272                 return 0;
273                 
274         }
275         catch(exception& e) {
276                 m->errorOut(e, "FilterSeqsCommand", "execute");
277                 exit(1);
278         }
279 }
280
281 /**************************************************************************************/