2 * chimeraseqscommand.cpp
5 * Created by Sarah Westcott on 6/29/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "chimeraseqscommand.h"
11 #include "bellerophon.h"
14 #include "chimeracheckrdp.h"
15 #include "chimeraslayer.h"
18 //***************************************************************************************************************
20 ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
24 //allow user to run help
25 if(option == "help") { help(); abort = true; }
28 //valid paramters for this command
29 string Array[] = {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim", "parents", "iters" };
30 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
32 OptionParser parser(option);
33 map<string,string> parameters = parser.getParameters();
35 ValidParameters validParameter;
37 //check to make sure all parameters are valid for command
38 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
39 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
42 //check for required parameters
43 fastafile = validParameter.validFile(parameters, "fasta", true);
44 if (fastafile == "not open") { abort = true; }
45 else if (fastafile == "not found") { fastafile = ""; mothurOut("fasta is a required parameter for the chimera.seqs command."); mothurOutEndLine(); abort = true; }
47 templatefile = validParameter.validFile(parameters, "template", true);
48 if (templatefile == "not open") { abort = true; }
49 else if (templatefile == "not found") { templatefile = ""; }
51 consfile = validParameter.validFile(parameters, "conservation", true);
52 if (consfile == "not open") { abort = true; }
53 else if (consfile == "not found") { consfile = ""; }
55 quanfile = validParameter.validFile(parameters, "quantile", true);
56 if (quanfile == "not open") { abort = true; }
57 else if (quanfile == "not found") { quanfile = ""; }
59 namefile = validParameter.validFile(parameters, "name", true);
60 if (namefile == "not open") { abort = true; }
61 else if (namefile == "not found") { namefile = ""; }
63 maskfile = validParameter.validFile(parameters, "mask", false);
64 if (maskfile == "not found") { maskfile = ""; }
65 else if (maskfile != "default") {
67 int ableToOpen = openInputFile(maskfile, in);
68 if (ableToOpen == 1) { abort = true; }
72 method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "pintail"; }
75 temp = validParameter.validFile(parameters, "filter", false); if (temp == "not found") { temp = "F"; }
76 filter = isTrue(temp);
78 temp = validParameter.validFile(parameters, "correction", false); if (temp == "not found") { temp = "T"; }
79 correction = isTrue(temp);
81 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; }
82 convert(temp, processors);
84 temp = validParameter.validFile(parameters, "ksize", false); if (temp == "not found") { temp = "7"; }
87 temp = validParameter.validFile(parameters, "svg", false); if (temp == "not found") { temp = "F"; }
90 temp = validParameter.validFile(parameters, "window", false);
91 if ((temp == "not found") && (method == "chimeraslayer")) { temp = "100"; }
92 else if (temp == "not found") { temp = "0"; }
93 convert(temp, window);
95 temp = validParameter.validFile(parameters, "match", false); if (temp == "not found") { temp = "5"; }
98 temp = validParameter.validFile(parameters, "mismatch", false); if (temp == "not found") { temp = "-4"; }
99 convert(temp, mismatch);
101 temp = validParameter.validFile(parameters, "divergence", false); if (temp == "not found") { temp = "1.0"; }
104 temp = validParameter.validFile(parameters, "minsim", false); if (temp == "not found") { temp = "90"; }
105 convert(temp, minSimilarity);
107 temp = validParameter.validFile(parameters, "parents", false); if (temp == "not found") { temp = "5"; }
108 convert(temp, parents);
110 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
111 convert(temp, iters);
113 temp = validParameter.validFile(parameters, "increment", false);
114 if ((temp == "not found") && ((method == "chimeracheck") || (method == "chimeraslayer"))) { temp = "10"; }
115 else if (temp == "not found") { temp = "25"; }
116 convert(temp, increment);
118 temp = validParameter.validFile(parameters, "numwanted", false);
119 if ((temp == "not found") && (method == "chimeraslayer")) { temp = "10"; }
120 else if (temp == "not found") { temp = "20"; }
121 convert(temp, numwanted);
125 if (((method != "bellerophon")) && (templatefile == "")) { mothurOut("You must provide a template file with the pintail, ccode or chimeracheck methods."); mothurOutEndLine(); abort = true; }
130 catch(exception& e) {
131 errorOut(e, "ChimeraSeqsCommand", "ChimeraSeqsCommand");
135 //**********************************************************************************************************************
137 void ChimeraSeqsCommand::help(){
140 //"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name"
141 //mothurOut("chimera.seqs ASSUMES that your sequences are ALIGNED and if using a template that the template file sequences are the same length as the fasta file sequences.\n\n");
142 mothurOut("The chimera.seqs command reads a fastafile and creates list of potentially chimeric sequences.\n");
143 mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors, mask, method, window, increment, template, conservation, quantile, numwanted, ksize, svg, name, iters.\n");
144 mothurOut("The fasta parameter is always required and template is required if using pintail, ccode or chimeracheck.\n");
145 mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
146 mothurOut("The correction parameter allows you to put more emphasis on the distance between highly similar sequences and less emphasis on the differences between remote homologs.\n");
147 mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n");
148 mothurOut("The method parameter allows you to specify the method for finding chimeric sequences. The default is pintail. Options include bellerophon, ccode and chimeracheck \n");
149 mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences. \n");
150 mothurOut("The window parameter allows you to specify the window size for searching for chimeras. \n");
151 mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences.\n");
152 mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences. \n");
153 mothurOut("The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n");
154 mothurOut("The quantile parameter allows you to enter a file containing quantiles for a template files sequences.\n");
155 mothurOut("The numwanted parameter allows you to specify how many sequences you would each query sequence compared with.\n");
156 mothurOut("The ksize parameter allows you to input kmersize. \n");
157 mothurOut("The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence.\n");
158 mothurOut("The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n");
159 mothurOut("The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method.\n");
160 mothurOut("NOT ALL PARAMETERS ARE USED BY ALL METHODS. Please look below for method specifics.\n\n");
161 mothurOut("Details for each method: \n");
162 mothurOut("\tpintail: \n");
163 mothurOut("\t\tparameters: fasta=required, template=required, filter=F, mask=no mask, processors=1, window=300, increment=25, conservation=not required, but will improve speed, quantile=not required, but will greatly improve speed. \n");
164 mothurOut("\t\tIf you have run chimera.seqs using pintail a .quan and .freq file will be created for your template, if you have not provided them for use in future command executions.\n");
165 mothurOut("\tbellerophon: \n");
166 mothurOut("\t\tparameters: fasta=required, filter=F, processors=1, window=1/4 length of seq, increment=25, correction=T. \n");
167 mothurOut("\tccode: \n");
168 mothurOut("\t\tparameters: fasta=required, template=required, filter=F, mask=no mask, processors=1, window=10% of length, numwanted=20\n");
169 mothurOut("\tchimeracheck: \n");
170 mothurOut("\t\tparameters: fasta=required, template=required, processors=1, increment=10, ksize=7, svg=F, name=none\n\n");
171 mothurOut("The chimera.seqs command should be in the following format: \n");
172 mothurOut("chimera.seqs(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors, method=bellerophon) \n");
173 mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, method=bellerophon, window=200) \n");
174 mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
176 catch(exception& e) {
177 errorOut(e, "ChimeraSeqsCommand", "help");
182 //***************************************************************************************************************
184 ChimeraSeqsCommand::~ChimeraSeqsCommand(){ /* do nothing */ }
186 //***************************************************************************************************************
188 int ChimeraSeqsCommand::execute(){
191 if (abort == true) { return 0; }
193 if (method == "bellerophon") { chimera = new Bellerophon(fastafile); }
194 else if (method == "pintail") { chimera = new Pintail(fastafile, templatefile); }
195 else if (method == "ccode") { chimera = new Ccode(fastafile, templatefile); }
196 else if (method == "chimeracheck") { chimera = new ChimeraCheckRDP(fastafile, templatefile); }
197 else if (method == "chimeraslayer") { chimera = new ChimeraSlayer(fastafile, templatefile); }
198 else { mothurOut("Not a valid method."); mothurOutEndLine(); return 0; }
201 if (maskfile == "default") { mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); mothurOutEndLine(); }
203 //saves time to avoid generating it
204 chimera->setCons(consfile);
206 //saves time to avoid generating it
207 chimera->setQuantiles(quanfile);
209 chimera->setMask(maskfile);
210 chimera->setFilter(filter);
211 chimera->setCorrection(correction);
212 chimera->setProcessors(processors);
213 chimera->setWindow(window);
214 chimera->setIncrement(increment);
215 chimera->setNumWanted(numwanted);
216 chimera->setKmerSize(ksize);
217 chimera->setSVG(svg);
218 chimera->setName(namefile);
219 chimera->setMatch(match);
220 chimera->setMisMatch(mismatch);
221 chimera->setDivR(divR);
222 chimera->setParents(parents);
223 chimera->setMinSim(minSimilarity);
224 chimera->setIters(iters);
228 chimera->getChimeras();
230 string outputFileName = getRootName(fastafile) + method + maskfile + ".chimeras";
232 openOutputFile(outputFileName, out);
244 catch(exception& e) {
245 errorOut(e, "ChimeraSeqsCommand", "execute");
249 /**************************************************************************************************/