2 * chimeraseqscommand.cpp
5 * Created by Sarah Westcott on 6/29/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "chimeraseqscommand.h"
11 #include "bellerophon.h"
14 #include "chimeracheckrdp.h"
15 #include "chimeraslayer.h"
18 //***************************************************************************************************************
20 ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
24 //allow user to run help
25 if(option == "help") { help(); abort = true; }
28 //valid paramters for this command
29 string Array[] = {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim", "parents", "iters","outputdir","inputdir" };
30 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
32 OptionParser parser(option);
33 map<string,string> parameters = parser.getParameters();
35 ValidParameters validParameter;
36 map<string,string>::iterator it;
38 //check to make sure all parameters are valid for command
39 for (it = parameters.begin(); it != parameters.end(); it++) {
40 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
43 //if the user changes the input directory command factory will send this info to us in the output parameter
44 string inputDir = validParameter.validFile(parameters, "inputdir", false);
45 if (inputDir == "not found"){ inputDir = ""; }
48 it = parameters.find("fasta");
49 //user has given a template file
50 if(it != parameters.end()){
51 path = hasPath(it->second);
52 //if the user has not given a path then, add inputdir. else leave path alone.
53 if (path == "") { parameters["fasta"] = inputDir + it->second; }
56 it = parameters.find("template");
57 //user has given a template file
58 if(it != parameters.end()){
59 path = hasPath(it->second);
60 //if the user has not given a path then, add inputdir. else leave path alone.
61 if (path == "") { parameters["template"] = inputDir + it->second; }
64 it = parameters.find("conservation");
65 //user has given a template file
66 if(it != parameters.end()){
67 path = hasPath(it->second);
68 //if the user has not given a path then, add inputdir. else leave path alone.
69 if (path == "") { parameters["conservation"] = inputDir + it->second; }
72 it = parameters.find("quantile");
73 //user has given a template file
74 if(it != parameters.end()){
75 path = hasPath(it->second);
76 //if the user has not given a path then, add inputdir. else leave path alone.
77 if (path == "") { parameters["quantile"] = inputDir + it->second; }
80 it = parameters.find("name");
81 //user has given a template file
82 if(it != parameters.end()){
83 path = hasPath(it->second);
84 //if the user has not given a path then, add inputdir. else leave path alone.
85 if (path == "") { parameters["name"] = inputDir + it->second; }
90 //check for required parameters
91 fastafile = validParameter.validFile(parameters, "fasta", true);
92 if (fastafile == "not open") { abort = true; }
93 else if (fastafile == "not found") { fastafile = ""; mothurOut("fasta is a required parameter for the chimera.seqs command."); mothurOutEndLine(); abort = true; }
95 //if the user changes the output directory command factory will send this info to us in the output parameter
96 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
98 outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it
101 templatefile = validParameter.validFile(parameters, "template", true);
102 if (templatefile == "not open") { abort = true; }
103 else if (templatefile == "not found") { templatefile = ""; }
105 consfile = validParameter.validFile(parameters, "conservation", true);
106 if (consfile == "not open") { abort = true; }
107 else if (consfile == "not found") { consfile = ""; }
109 quanfile = validParameter.validFile(parameters, "quantile", true);
110 if (quanfile == "not open") { abort = true; }
111 else if (quanfile == "not found") { quanfile = ""; }
113 namefile = validParameter.validFile(parameters, "name", true);
114 if (namefile == "not open") { abort = true; }
115 else if (namefile == "not found") { namefile = ""; }
117 maskfile = validParameter.validFile(parameters, "mask", false);
118 if (maskfile == "not found") { maskfile = ""; }
119 else if (maskfile != "default") {
120 if (inputDir != "") {
121 string path = hasPath(maskfile);
122 //if the user has not given a path then, add inputdir. else leave path alone.
123 if (path == "") { maskfile = inputDir + maskfile; }
127 int ableToOpen = openInputFile(maskfile, in);
128 if (ableToOpen == 1) { abort = true; }
132 method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "pintail"; }
135 temp = validParameter.validFile(parameters, "filter", false); if (temp == "not found") { temp = "F"; }
136 filter = isTrue(temp);
138 temp = validParameter.validFile(parameters, "correction", false); if (temp == "not found") { temp = "T"; }
139 correction = isTrue(temp);
141 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; }
142 convert(temp, processors);
144 temp = validParameter.validFile(parameters, "ksize", false); if (temp == "not found") { temp = "7"; }
145 convert(temp, ksize);
147 temp = validParameter.validFile(parameters, "svg", false); if (temp == "not found") { temp = "F"; }
150 temp = validParameter.validFile(parameters, "window", false);
151 if ((temp == "not found") && (method == "chimeraslayer")) { temp = "100"; }
152 else if (temp == "not found") { temp = "0"; }
153 convert(temp, window);
155 temp = validParameter.validFile(parameters, "match", false); if (temp == "not found") { temp = "5"; }
156 convert(temp, match);
158 temp = validParameter.validFile(parameters, "mismatch", false); if (temp == "not found") { temp = "-4"; }
159 convert(temp, mismatch);
161 temp = validParameter.validFile(parameters, "divergence", false); if (temp == "not found") { temp = "1.0"; }
164 temp = validParameter.validFile(parameters, "minsim", false); if (temp == "not found") { temp = "90"; }
165 convert(temp, minSimilarity);
167 temp = validParameter.validFile(parameters, "parents", false); if (temp == "not found") { temp = "5"; }
168 convert(temp, parents);
170 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
171 convert(temp, iters);
173 temp = validParameter.validFile(parameters, "increment", false);
174 if ((temp == "not found") && ((method == "chimeracheck") || (method == "chimeraslayer"))) { temp = "10"; }
175 else if (temp == "not found") { temp = "25"; }
176 convert(temp, increment);
178 temp = validParameter.validFile(parameters, "numwanted", false);
179 if ((temp == "not found") && (method == "chimeraslayer")) { temp = "10"; }
180 else if (temp == "not found") { temp = "20"; }
181 convert(temp, numwanted);
185 if (((method != "bellerophon")) && (templatefile == "")) { mothurOut("You must provide a template file with the pintail, ccode, chimeraslayer or chimeracheck methods."); mothurOutEndLine(); abort = true; }
190 catch(exception& e) {
191 errorOut(e, "ChimeraSeqsCommand", "ChimeraSeqsCommand");
195 //**********************************************************************************************************************
197 void ChimeraSeqsCommand::help(){
200 //"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name"
201 //mothurOut("chimera.seqs ASSUMES that your sequences are ALIGNED and if using a template that the template file sequences are the same length as the fasta file sequences.\n\n");
202 mothurOut("The chimera.seqs command reads a fastafile and creates list of potentially chimeric sequences.\n");
203 mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors, mask, method, window, increment, template, conservation, quantile, numwanted, ksize, svg, name, iters.\n");
204 mothurOut("The fasta parameter is always required and template is required if using pintail, ccode or chimeracheck.\n");
205 mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
206 mothurOut("The correction parameter allows you to put more emphasis on the distance between highly similar sequences and less emphasis on the differences between remote homologs.\n");
207 mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n");
208 mothurOut("The method parameter allows you to specify the method for finding chimeric sequences. The default is pintail. Options include bellerophon, ccode and chimeracheck \n");
209 mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences. \n");
210 mothurOut("The window parameter allows you to specify the window size for searching for chimeras. \n");
211 mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences.\n");
212 mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences. \n");
213 mothurOut("The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n");
214 mothurOut("The quantile parameter allows you to enter a file containing quantiles for a template files sequences.\n");
215 mothurOut("The numwanted parameter allows you to specify how many sequences you would each query sequence compared with.\n");
216 mothurOut("The ksize parameter allows you to input kmersize. \n");
217 mothurOut("The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence.\n");
218 mothurOut("The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n");
219 //mothurOut("The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method.\n");
220 mothurOut("NOT ALL PARAMETERS ARE USED BY ALL METHODS. Please look below for method specifics.\n\n");
221 mothurOut("Details for each method: \n");
222 mothurOut("\tpintail: \n");
223 mothurOut("\t\tparameters: fasta=required, template=required, filter=F, mask=no mask, processors=1, window=300, increment=25, conservation=not required, but will improve speed, quantile=not required, but will greatly improve speed. \n");
224 mothurOut("\t\tIf you have run chimera.seqs using pintail a .quan and .freq file will be created for your template, if you have not provided them for use in future command executions.\n");
225 mothurOut("\tbellerophon: \n");
226 mothurOut("\t\tparameters: fasta=required, filter=F, processors=1, window=1/4 length of seq, increment=25, correction=T. \n");
227 mothurOut("\tccode: \n");
228 mothurOut("\t\tparameters: fasta=required, template=required, filter=F, mask=no mask, processors=1, window=10% of length, numwanted=20\n");
229 mothurOut("\tchimeracheck: \n");
230 mothurOut("\t\tparameters: fasta=required, template=required, processors=1, increment=10, ksize=7, svg=F, name=none\n\n");
231 //mothurOut("\tchimeraslayer: \n");
232 //mothurOut("\t\tparameters: fasta=required, template=required, processors=1, increment=10, mask=no mask, numwanted=10, match=5, mismatch=-4, divergence=1.0, minsim=90, parents=5, iters=1000, window=100. \n\n");
233 mothurOut("The chimera.seqs command should be in the following format: \n");
234 mothurOut("chimera.seqs(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors, method=bellerophon) \n");
235 mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, method=bellerophon, window=200) \n");
236 mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
238 catch(exception& e) {
239 errorOut(e, "ChimeraSeqsCommand", "help");
244 //***************************************************************************************************************
246 ChimeraSeqsCommand::~ChimeraSeqsCommand(){ /* do nothing */ }
248 //***************************************************************************************************************
250 int ChimeraSeqsCommand::execute(){
253 if (abort == true) { return 0; }
255 if (method == "bellerophon") { chimera = new Bellerophon(fastafile, outputDir); }
256 else if (method == "pintail") { chimera = new Pintail(fastafile, templatefile, outputDir); }
257 else if (method == "ccode") { chimera = new Ccode(fastafile, templatefile, outputDir); }
258 else if (method == "chimeracheck") { chimera = new ChimeraCheckRDP(fastafile, templatefile, outputDir); }
259 else if (method == "chimeraslayer") { chimera = new ChimeraSlayer(fastafile, templatefile); }
260 else { mothurOut("Not a valid method."); mothurOutEndLine(); return 0; }
263 if (maskfile == "default") { mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); mothurOutEndLine(); }
265 //saves time to avoid generating it
266 chimera->setCons(consfile);
268 //saves time to avoid generating it
269 chimera->setQuantiles(quanfile);
271 chimera->setMask(maskfile);
272 chimera->setFilter(filter);
273 chimera->setCorrection(correction);
274 chimera->setProcessors(processors);
275 chimera->setWindow(window);
276 chimera->setIncrement(increment);
277 chimera->setNumWanted(numwanted);
278 chimera->setKmerSize(ksize);
279 chimera->setSVG(svg);
280 chimera->setName(namefile);
281 chimera->setMatch(match);
282 chimera->setMisMatch(mismatch);
283 chimera->setDivR(divR);
284 chimera->setParents(parents);
285 chimera->setMinSim(minSimilarity);
286 chimera->setIters(iters);
290 int error = chimera->getChimeras();
292 //there was a problem
293 if (error == 1) { return 0; }
295 string outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".chimeras";
297 openOutputFile(outputFileName, out);
309 catch(exception& e) {
310 errorOut(e, "ChimeraSeqsCommand", "execute");
314 /**************************************************************************************************/