]> git.donarmstrong.com Git - mothur.git/blob - chimeraseqscommand.cpp
added set.dir command and modified commands to redirect input and output, removed...
[mothur.git] / chimeraseqscommand.cpp
1 /*
2  *  chimeraseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 6/29/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "chimeraseqscommand.h"
11 #include "bellerophon.h"
12 #include "pintail.h"
13 #include "ccode.h"
14 #include "chimeracheckrdp.h"
15 #include "chimeraslayer.h"
16
17
18 //***************************************************************************************************************
19
20 ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
21         try {
22                 abort = false;
23                 
24                 //allow user to run help
25                 if(option == "help") { help(); abort = true; }
26                 
27                 else {
28                         //valid paramters for this command
29                         string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim", "parents", "iters","outputdir","inputdir" };
30                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
31                         
32                         OptionParser parser(option);
33                         map<string,string> parameters = parser.getParameters();
34                         
35                         ValidParameters validParameter;
36                         map<string,string>::iterator it;
37                         
38                         //check to make sure all parameters are valid for command
39                         for (it = parameters.begin(); it != parameters.end(); it++) { 
40                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
41                         }
42                         
43                         //if the user changes the input directory command factory will send this info to us in the output parameter 
44                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
45                         if (inputDir == "not found"){   inputDir = "";          }
46                         else {
47                                 string path;
48                                 it = parameters.find("fasta");
49                                 //user has given a template file
50                                 if(it != parameters.end()){ 
51                                         path = hasPath(it->second);
52                                         //if the user has not given a path then, add inputdir. else leave path alone.
53                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
54                                 }
55                                 
56                                 it = parameters.find("template");
57                                 //user has given a template file
58                                 if(it != parameters.end()){ 
59                                         path = hasPath(it->second);
60                                         //if the user has not given a path then, add inputdir. else leave path alone.
61                                         if (path == "") {       parameters["template"] = inputDir + it->second;         }
62                                 }
63                                 
64                                 it = parameters.find("conservation");
65                                 //user has given a template file
66                                 if(it != parameters.end()){ 
67                                         path = hasPath(it->second);
68                                         //if the user has not given a path then, add inputdir. else leave path alone.
69                                         if (path == "") {       parameters["conservation"] = inputDir + it->second;             }
70                                 }
71                                 
72                                 it = parameters.find("quantile");
73                                 //user has given a template file
74                                 if(it != parameters.end()){ 
75                                         path = hasPath(it->second);
76                                         //if the user has not given a path then, add inputdir. else leave path alone.
77                                         if (path == "") {       parameters["quantile"] = inputDir + it->second;         }
78                                 }
79                                 
80                                 it = parameters.find("name");
81                                 //user has given a template file
82                                 if(it != parameters.end()){ 
83                                         path = hasPath(it->second);
84                                         //if the user has not given a path then, add inputdir. else leave path alone.
85                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
86                                 }
87                         }
88
89                         
90                         //check for required parameters
91                         fastafile = validParameter.validFile(parameters, "fasta", true);
92                         if (fastafile == "not open") { abort = true; }
93                         else if (fastafile == "not found") { fastafile = ""; mothurOut("fasta is a required parameter for the chimera.seqs command."); mothurOutEndLine(); abort = true;  }     
94                         
95                         //if the user changes the output directory command factory will send this info to us in the output parameter 
96                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
97                                 outputDir = ""; 
98                                 outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it  
99                         }
100
101                         templatefile = validParameter.validFile(parameters, "template", true);
102                         if (templatefile == "not open") { abort = true; }
103                         else if (templatefile == "not found") { templatefile = "";  }   
104                         
105                         consfile = validParameter.validFile(parameters, "conservation", true);
106                         if (consfile == "not open") { abort = true; }
107                         else if (consfile == "not found") { consfile = "";  }   
108                         
109                         quanfile = validParameter.validFile(parameters, "quantile", true);
110                         if (quanfile == "not open") { abort = true; }
111                         else if (quanfile == "not found") { quanfile = "";  }
112                         
113                         namefile = validParameter.validFile(parameters, "name", true);
114                         if (namefile == "not open") { abort = true; }
115                         else if (namefile == "not found") { namefile = "";  }
116
117                         maskfile = validParameter.validFile(parameters, "mask", false);
118                         if (maskfile == "not found") { maskfile = "";  }        
119                         else if (maskfile != "default")  { 
120                                 if (inputDir != "") {
121                                         string path = hasPath(maskfile);
122                                         //if the user has not given a path then, add inputdir. else leave path alone.
123                                         if (path == "") {       maskfile = inputDir + maskfile;         }
124                                 }
125
126                                 ifstream in;
127                                 int     ableToOpen = openInputFile(maskfile, in);
128                                 if (ableToOpen == 1) { abort = true; }
129                                 in.close();
130                         }
131                         
132                         method = validParameter.validFile(parameters, "method", false);                 if (method == "not found") { method = "pintail"; }
133                         
134                         string temp;
135                         temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
136                         filter = isTrue(temp);
137                         
138                         temp = validParameter.validFile(parameters, "correction", false);               if (temp == "not found") { temp = "T"; }
139                         correction = isTrue(temp);
140                         
141                         temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
142                         convert(temp, processors);
143                         
144                         temp = validParameter.validFile(parameters, "ksize", false);                    if (temp == "not found") { temp = "7"; }
145                         convert(temp, ksize);
146                         
147                         temp = validParameter.validFile(parameters, "svg", false);                              if (temp == "not found") { temp = "F"; }
148                         svg = isTrue(temp);
149                         
150                         temp = validParameter.validFile(parameters, "window", false);   
151                         if ((temp == "not found") && (method == "chimeraslayer")) { temp = "100"; }                     
152                         else if (temp == "not found") { temp = "0"; }
153                         convert(temp, window);
154                         
155                         temp = validParameter.validFile(parameters, "match", false);                    if (temp == "not found") { temp = "5"; }
156                         convert(temp, match);
157                         
158                         temp = validParameter.validFile(parameters, "mismatch", false);                 if (temp == "not found") { temp = "-4"; }
159                         convert(temp, mismatch);
160                         
161                         temp = validParameter.validFile(parameters, "divergence", false);               if (temp == "not found") { temp = "1.0"; }
162                         convert(temp, divR);
163                         
164                         temp = validParameter.validFile(parameters, "minsim", false);                   if (temp == "not found") { temp = "90"; }
165                         convert(temp, minSimilarity);
166                         
167                         temp = validParameter.validFile(parameters, "parents", false);                  if (temp == "not found") { temp = "5"; }
168                         convert(temp, parents); 
169                         
170                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
171                         convert(temp, iters); 
172                          
173                         temp = validParameter.validFile(parameters, "increment", false);                
174                         if ((temp == "not found") && ((method == "chimeracheck") || (method == "chimeraslayer"))) { temp = "10"; }
175                         else if (temp == "not found") { temp = "25"; }
176                         convert(temp, increment);
177                         
178                         temp = validParameter.validFile(parameters, "numwanted", false);
179                         if ((temp == "not found") && (method == "chimeraslayer")) { temp = "10"; }              
180                         else if (temp == "not found") { temp = "20"; }
181                         convert(temp, numwanted);
182
183                         
184                         
185                         if (((method != "bellerophon")) && (templatefile == "")) { mothurOut("You must provide a template file with the pintail, ccode, chimeraslayer or chimeracheck methods."); mothurOutEndLine(); abort = true;  }
186                         
187
188                 }
189         }
190         catch(exception& e) {
191                 errorOut(e, "ChimeraSeqsCommand", "ChimeraSeqsCommand");
192                 exit(1);
193         }
194 }
195 //**********************************************************************************************************************
196
197 void ChimeraSeqsCommand::help(){
198         try {
199         
200                 //"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name"
201                 //mothurOut("chimera.seqs ASSUMES that your sequences are ALIGNED and if using a template that the template file sequences are the same length as the fasta file sequences.\n\n");
202                 mothurOut("The chimera.seqs command reads a fastafile and creates list of potentially chimeric sequences.\n");
203                 mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors, mask, method, window, increment, template, conservation, quantile, numwanted, ksize, svg, name, iters.\n");
204                 mothurOut("The fasta parameter is always required and template is required if using pintail, ccode or chimeracheck.\n");
205                 mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
206                 mothurOut("The correction parameter allows you to put more emphasis on the distance between highly similar sequences and less emphasis on the differences between remote homologs.\n");
207                 mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
208                 mothurOut("The method parameter allows you to specify the method for finding chimeric sequences.  The default is pintail. Options include bellerophon, ccode and chimeracheck \n");
209                 mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences. \n");
210                 mothurOut("The window parameter allows you to specify the window size for searching for chimeras. \n");
211                 mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences.\n");
212                 mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences. \n");
213                 mothurOut("The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n");
214                 mothurOut("The quantile parameter allows you to enter a file containing quantiles for a template files sequences.\n");
215                 mothurOut("The numwanted parameter allows you to specify how many sequences you would each query sequence compared with.\n");
216                 mothurOut("The ksize parameter allows you to input kmersize. \n");
217                 mothurOut("The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence.\n");
218                 mothurOut("The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n");
219                 //mothurOut("The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method.\n");
220                 mothurOut("NOT ALL PARAMETERS ARE USED BY ALL METHODS. Please look below for method specifics.\n\n");
221                 mothurOut("Details for each method: \n"); 
222                 mothurOut("\tpintail: \n"); 
223                 mothurOut("\t\tparameters: fasta=required, template=required, filter=F, mask=no mask, processors=1, window=300, increment=25, conservation=not required, but will improve speed, quantile=not required, but will greatly improve speed. \n"); 
224                 mothurOut("\t\tIf you have run chimera.seqs using pintail a .quan and .freq file will be created for your template, if you have not provided them for use in future command executions.\n");
225                 mothurOut("\tbellerophon: \n"); 
226                 mothurOut("\t\tparameters: fasta=required, filter=F, processors=1, window=1/4 length of seq, increment=25, correction=T. \n"); 
227                 mothurOut("\tccode: \n"); 
228                 mothurOut("\t\tparameters: fasta=required, template=required, filter=F, mask=no mask, processors=1, window=10% of length, numwanted=20\n"); 
229                 mothurOut("\tchimeracheck: \n"); 
230                 mothurOut("\t\tparameters: fasta=required, template=required, processors=1, increment=10, ksize=7, svg=F, name=none\n\n"); 
231                 //mothurOut("\tchimeraslayer: \n"); 
232                 //mothurOut("\t\tparameters: fasta=required, template=required, processors=1, increment=10, mask=no mask, numwanted=10, match=5, mismatch=-4, divergence=1.0, minsim=90, parents=5, iters=1000, window=100. \n\n"); 
233                 mothurOut("The chimera.seqs command should be in the following format: \n");
234                 mothurOut("chimera.seqs(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors, method=bellerophon) \n");
235                 mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, method=bellerophon, window=200) \n");
236                 mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");        
237         }
238         catch(exception& e) {
239                 errorOut(e, "ChimeraSeqsCommand", "help");
240                 exit(1);
241         }
242 }
243
244 //***************************************************************************************************************
245
246 ChimeraSeqsCommand::~ChimeraSeqsCommand(){      /*      do nothing      */      }
247
248 //***************************************************************************************************************
249
250 int ChimeraSeqsCommand::execute(){
251         try{
252                 
253                 if (abort == true) { return 0; }
254                 
255                 if (method == "bellerophon")                    {               chimera = new Bellerophon(fastafile, outputDir);                        }
256                 else if (method == "pintail")                   {               chimera = new Pintail(fastafile, templatefile, outputDir);      }
257                 else if (method == "ccode")                             {               chimera = new Ccode(fastafile, templatefile);                           }
258                 else if (method == "chimeracheck")              {               chimera = new ChimeraCheckRDP(fastafile, templatefile, outputDir);      }
259                 else if (method == "chimeraslayer")             {               chimera = new ChimeraSlayer(fastafile, templatefile);           }
260                 else { mothurOut("Not a valid method."); mothurOutEndLine(); return 0;          }
261                 
262                 //set user options
263                 if (maskfile == "default") { mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); mothurOutEndLine();  }
264                 
265                 //saves time to avoid generating it
266                 chimera->setCons(consfile);     
267                 
268                 //saves time to avoid generating it
269                 chimera->setQuantiles(quanfile);                                
270                 
271                 chimera->setMask(maskfile);
272                 chimera->setFilter(filter);
273                 chimera->setCorrection(correction);
274                 chimera->setProcessors(processors);
275                 chimera->setWindow(window);
276                 chimera->setIncrement(increment);
277                 chimera->setNumWanted(numwanted);
278                 chimera->setKmerSize(ksize);
279                 chimera->setSVG(svg);
280                 chimera->setName(namefile);
281                 chimera->setMatch(match);
282                 chimera->setMisMatch(mismatch);
283                 chimera->setDivR(divR);
284                 chimera->setParents(parents);
285                 chimera->setMinSim(minSimilarity);
286                 chimera->setIters(iters);
287                 
288                                 
289                 //find chimeras
290                 int error = chimera->getChimeras();
291                 
292                 //there was a problem
293                 if (error == 1) {  return 0;  }
294
295                 string outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".chimeras";
296                 ofstream out;
297                 openOutputFile(outputFileName, out);
298                 
299                 //print results
300                 chimera->print(out);
301                 
302                 out.close();
303                 
304                 delete chimera;
305                 
306                 return 0;
307                 
308         }
309         catch(exception& e) {
310                 errorOut(e, "ChimeraSeqsCommand", "execute");
311                 exit(1);
312         }
313 }
314 /**************************************************************************************************/
315