]> git.donarmstrong.com Git - mothur.git/blob - filterseqscommand.cpp
91d1181f3770b5010fcabd7e5c78e61b18186904
[mothur.git] / filterseqscommand.cpp
1 /*
2  *  filterseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Thomas Ryabin on 5/4/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "filterseqscommand.h"
11 #include "sequence.hpp"
12
13 /**************************************************************************************/
14
15 FilterSeqsCommand::FilterSeqsCommand(string option)  {
16         try {
17                 abort = false;
18                 filterFileName = "";
19                 
20                 //allow user to run help
21                 if(option == "help") { help(); abort = true; }
22                 
23                 else {
24                         //valid paramters for this command
25                         string Array[] =  {"fasta", "trump", "soft", "hard", "vertical", "outputdir","inputdir"};
26                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
27                         
28                         OptionParser parser(option);
29                         map<string,string> parameters = parser.getParameters();
30                         
31                         ValidParameters validParameter;
32                         map<string,string>::iterator it;
33                         
34                         //check to make sure all parameters are valid for command
35                         for (it = parameters.begin(); it != parameters.end(); it++) { 
36                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
37                         }
38                         
39                         //if the user changes the input directory command factory will send this info to us in the output parameter 
40                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
41                         if (inputDir == "not found"){   inputDir = "";          }
42                         else {
43                                 string path;
44                                 it = parameters.find("fasta");
45                                 //user has given a template file
46                                 if(it != parameters.end()){ 
47                                         path = hasPath(it->second);
48                                         //if the user has not given a path then, add inputdir. else leave path alone.
49                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
50                                 }
51                                 
52                                 it = parameters.find("hard");
53                                 //user has given a template file
54                                 if(it != parameters.end()){ 
55                                         path = hasPath(it->second);
56                                         //if the user has not given a path then, add inputdir. else leave path alone.
57                                         if (path == "") {       parameters["hard"] = inputDir + it->second;             }
58                                 }
59                         }
60                         
61                         //check for required parameters
62                         fasta = validParameter.validFile(parameters, "fasta", false);
63                         if (fasta == "not found") { m->mothurOut("fasta is a required parameter for the filter.seqs command."); m->mothurOutEndLine(); abort = true;  }
64                         else { 
65                                 splitAtDash(fasta, fastafileNames);
66                                 
67                                 //go through files and make sure they are good, if not, then disregard them
68                                 for (int i = 0; i < fastafileNames.size(); i++) {
69                                         if (inputDir != "") {
70                                                 string path = hasPath(fastafileNames[i]);
71                                                 //if the user has not given a path then, add inputdir. else leave path alone.
72                                                 if (path == "") {       fastafileNames[i] = inputDir + fastafileNames[i];               }
73                                         }
74
75                                         int ableToOpen;
76                                         ifstream in;
77                                         ableToOpen = openInputFile(fastafileNames[i], in);
78                                         if (ableToOpen == 1) { 
79                                                 m->mothurOut(fastafileNames[i] + " will be disregarded."); m->mothurOutEndLine(); 
80                                                 //erase from file list
81                                                 fastafileNames.erase(fastafileNames.begin()+i);
82                                                 i--;
83                                         }else{  
84                                                 string simpleName = getSimpleName(fastafileNames[i]);
85                                                 filterFileName += simpleName.substr(0, simpleName.find_first_of('.'));
86                                         }
87                                         in.close();
88                                 }
89                                 
90                                 //make sure there is at least one valid file left
91                                 if (fastafileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
92                         }
93                         
94                         if (!abort) {
95                                 //if the user changes the output directory command factory will send this info to us in the output parameter 
96                                 outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
97                                         outputDir = ""; 
98                                         outputDir += hasPath(fastafileNames[0]); //if user entered a file with a path then preserve it  
99                                 }
100                         }
101                         //check for optional parameter and set defaults
102                         // ...at some point should added some additional type checking...
103                         
104                         string temp;
105                         temp = validParameter.validFile(parameters, "trump", false);                    if (temp == "not found") { temp = "*"; }
106                         trump = temp[0];
107                         
108                         temp = validParameter.validFile(parameters, "soft", false);                             if (temp == "not found") { soft = 0; }
109                         else {  soft = (float)atoi(temp.c_str()) / 100.0;  }
110                         
111                         hard = validParameter.validFile(parameters, "hard", true);                              if (hard == "not found") { hard = ""; }
112                         else if (hard == "not open") { abort = true; }  
113                         
114                         vertical = validParameter.validFile(parameters, "vertical", false);             if (vertical == "not found") { vertical = "T"; }
115                         
116                         numSeqs = 0;
117                         
118                         if (abort == false) {
119                         
120                                 if (soft != 0)                  {  F.setSoft(soft);             }
121                                 if (trump != '*')               {  F.setTrump(trump);   }
122                         
123                         }
124                                                 
125                 }
126                 
127         }
128         catch(exception& e) {
129                 m->errorOut(e, "FilterSeqsCommand", "FilterSeqsCommand");
130                 exit(1);
131         }
132 }
133
134 //**********************************************************************************************************************
135
136 void FilterSeqsCommand::help(){
137         try {
138                 m->mothurOut("The filter.seqs command reads a file containing sequences and creates a .filter and .filter.fasta file.\n");
139                 m->mothurOut("The filter.seqs command parameters are fasta, trump, soft, hard and vertical. \n");
140                 m->mothurOut("The fasta parameter is required. You may enter several fasta files to build the filter from and filter, by separating their names with -'s.\n");
141                 m->mothurOut("For example: fasta=abrecovery.fasta-amazon.fasta \n");
142                 m->mothurOut("The trump parameter .... The default is ...\n");
143                 m->mothurOut("The soft parameter .... The default is ....\n");
144                 m->mothurOut("The hard parameter .... The default is ....\n");
145                 m->mothurOut("The vertical parameter .... The default is T.\n");
146                 m->mothurOut("The filter.seqs command should be in the following format: \n");
147                 m->mothurOut("filter.seqs(fasta=yourFastaFile, trump=yourTrump, soft=yourSoft, hard=yourHard, vertical=yourVertical) \n");
148                 m->mothurOut("Example filter.seqs(fasta=abrecovery.fasta, trump=..., soft=..., hard=..., vertical=T).\n");
149                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
150                 
151         }
152         catch(exception& e) {
153                 m->errorOut(e, "FilterSeqsCommand", "help");
154                 exit(1);
155         }
156 }
157
158 /**************************************************************************************/
159
160 int FilterSeqsCommand::execute() {      
161         try {
162         
163                 if (abort == true) { return 0; }
164                 vector<string> outputNames;
165                 
166                 ifstream inFASTA;
167                 openInputFile(fastafileNames[0], inFASTA);
168                 
169                 Sequence testSeq(inFASTA);
170                 alignmentLength = testSeq.getAlignLength();
171                 inFASTA.close();
172                 
173                 F.setLength(alignmentLength);
174                 
175                 if(soft != 0 || isTrue(vertical)){
176                         F.initialize();
177                 }
178                 
179                 if(hard.compare("") != 0)       {       F.doHard(hard);         }
180                 else                                            {       F.setFilter(string(alignmentLength, '1'));      }
181
182                 if(trump != '*' || isTrue(vertical) || soft != 0){
183                         for (int i = 0; i < fastafileNames.size(); i++) {
184                                 ifstream in;
185                                 openInputFile(fastafileNames[i], in);
186                                 
187                                 while(!in.eof()){       //read through and create the filter...
188                                         Sequence seq(in);
189                                         if (seq.getName() != "") {
190                                                 if(trump != '*'){       F.doTrump(seq); }
191                                                 if(isTrue(vertical) || soft != 0){      F.getFreqs(seq);        }
192                                                 numSeqs++;
193                                                 cout.flush();
194                                         }
195                                 }
196                                 in.close();
197                         }
198                 
199                 }
200                 F.setNumSeqs(numSeqs);
201                 
202                 
203                 if(isTrue(vertical) == 1)       {       F.doVertical(); }
204                 if(soft != 0)                           {       F.doSoft();             }
205                 
206                 filter = F.getFilter();
207
208                 ofstream outFilter;
209                 
210                 string filterFile = outputDir + filterFileName + ".filter";
211                 openOutputFile(filterFile, outFilter);
212                 outFilter << filter << endl;
213                 outFilter.close();
214                 outputNames.push_back(filterFile);
215                 
216                 numSeqs = 0;
217                 for (int i = 0; i < fastafileNames.size(); i++) {
218                         ifstream in;
219                         openInputFile(fastafileNames[i], in);
220                         string filteredFasta = outputDir + getRootName(getSimpleName(fastafileNames[i])) + "filter.fasta";
221                         ofstream outFASTA;
222                         openOutputFile(filteredFasta, outFASTA);
223                         outputNames.push_back(filteredFasta);
224                         
225                         
226                         while(!in.eof()){
227                                 Sequence seq(in);
228                                 if (seq.getName() != "") {
229                                         string align = seq.getAligned();
230                                         string filterSeq = "";
231                                         
232                                         for(int j=0;j<alignmentLength;j++){
233                                                 if(filter[j] == '1'){
234                                                         filterSeq += align[j];
235                                                 }
236                                         }
237                                         
238                                         outFASTA << '>' << seq.getName() << endl << filterSeq << endl;
239                                         numSeqs++;
240                                 }
241                                 gobble(in);
242                         }
243                         outFASTA.close();
244                         in.close();
245                 }
246                 
247                 int filteredLength = 0;
248                 for(int i=0;i<alignmentLength;i++){
249                         if(filter[i] == '1'){   filteredLength++;       }
250                 }
251                 
252                 m->mothurOutEndLine();
253                 m->mothurOut("Length of filtered alignment: " + toString(filteredLength)); m->mothurOutEndLine();
254                 m->mothurOut("Number of columns removed: " + toString((alignmentLength-filteredLength))); m->mothurOutEndLine();
255                 m->mothurOut("Length of the original alignment: " + toString(alignmentLength)); m->mothurOutEndLine();
256                 m->mothurOut("Number of sequences used to construct filter: " + toString(numSeqs)); m->mothurOutEndLine();
257                 
258                 
259                 m->mothurOutEndLine();
260                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
261                 for(int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();      }
262                 m->mothurOutEndLine();
263
264                 return 0;
265                 
266         }
267         catch(exception& e) {
268                 m->errorOut(e, "FilterSeqsCommand", "execute");
269                 exit(1);
270         }
271 }
272
273 /**************************************************************************************/