2 * chimeraseqscommand.cpp
5 * Created by Sarah Westcott on 6/29/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "chimeraseqscommand.h"
11 #include "bellerophon.h"
14 #include "chimeracheckrdp.h"
17 //***************************************************************************************************************
19 ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
23 //allow user to run help
24 if(option == "help") { help(); abort = true; }
27 //valid paramters for this command
28 string Array[] = {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize" };
29 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
31 OptionParser parser(option);
32 map<string,string> parameters = parser.getParameters();
34 ValidParameters validParameter;
36 //check to make sure all parameters are valid for command
37 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
38 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
41 //check for required parameters
42 fastafile = validParameter.validFile(parameters, "fasta", true);
43 if (fastafile == "not open") { abort = true; }
44 else if (fastafile == "not found") { fastafile = ""; mothurOut("fasta is a required parameter for the chimera.seqs command."); mothurOutEndLine(); abort = true; }
46 templatefile = validParameter.validFile(parameters, "template", true);
47 if (templatefile == "not open") { abort = true; }
48 else if (templatefile == "not found") { templatefile = ""; }
50 consfile = validParameter.validFile(parameters, "conservation", true);
51 if (consfile == "not open") { abort = true; }
52 else if (consfile == "not found") { consfile = ""; }
54 quanfile = validParameter.validFile(parameters, "quantile", true);
55 if (quanfile == "not open") { abort = true; }
56 else if (quanfile == "not found") { quanfile = ""; }
58 maskfile = validParameter.validFile(parameters, "mask", false);
59 if (maskfile == "not found") { maskfile = ""; }
60 else if (maskfile != "default") {
62 int ableToOpen = openInputFile(maskfile, in);
63 if (ableToOpen == 1) { abort = true; }
67 method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "pintail"; }
70 temp = validParameter.validFile(parameters, "filter", false); if (temp == "not found") { temp = "F"; }
71 filter = isTrue(temp);
73 temp = validParameter.validFile(parameters, "correction", false); if (temp == "not found") { temp = "T"; }
74 correction = isTrue(temp);
76 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; }
77 convert(temp, processors);
79 temp = validParameter.validFile(parameters, "ksize", false); if (temp == "not found") { temp = "7"; }
82 temp = validParameter.validFile(parameters, "window", false); if (temp == "not found") { temp = "0"; }
83 convert(temp, window);
85 temp = validParameter.validFile(parameters, "increment", false);
86 if ((temp == "not found") && (method == "chimeracheck")) { temp = "10"; }
87 else if (temp == "not found") { temp = "25"; }
88 convert(temp, increment);
90 temp = validParameter.validFile(parameters, "numwanted", false); if (temp == "not found") { temp = "20"; }
91 convert(temp, numwanted);
95 if (((method == "pintail") || (method == "alignsim")) && (templatefile == "")) { mothurOut("You must provide a template file with the pintail and alignsim methods."); mothurOutEndLine(); abort = true; }
100 catch(exception& e) {
101 errorOut(e, "ChimeraSeqsCommand", "ChimeraSeqsCommand");
105 //**********************************************************************************************************************
107 void ChimeraSeqsCommand::help(){
109 mothurOut("chimera.seqs ASSUMES that your sequences are ALIGNED and if using a template that the template file sequences are the same length as the fasta file sequences.\n\n");
110 mothurOut("The chimera.seqs command reads a fastafile and creates list of potentially chimeric sequences.\n");
111 mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors, mask, method, window, increment, template, conservation and quantile.\n");
112 mothurOut("The fasta parameter is always required and template is required if using pintail.\n");
113 mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. The default is false. \n");
114 mothurOut("The correction parameter allows you to put more emphasis on the distance between highly similar sequences and less emphasis on the differences between remote homologs. The default is true. This only applies when the method is bellerphon.\n");
115 mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n");
116 mothurOut("The method parameter allows you to specify the method for finding chimeric sequences. The default is pintail. Options include..... \n");
117 mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the pintail. The default is no mask. If you enter mask=default, then the mask is 236627 EU009184.1 Shigella dysenteriae str. FBD013. \n");
118 mothurOut("The window parameter allows you to specify the window size for searching for chimeras. The default is 300 is method is pintail unless the sequence length is less than 300, and 1/4 sequence length for bellerphon.\n");
119 mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences. The default is 25.\n");
120 mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences for use by the pintail algorythm. It is a required parameter if using pintail.\n");
121 mothurOut("The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment for use by the pintail algorythm. It is not required, but will speed up the pintail method.\n");
122 mothurOut("The quantile parameter allows you to enter a file containing quantiles for a template files sequences for use by the pintail algorythm. It is not required, but will speed up the pintail method.\n");
123 mothurOut("If you have run chimera.seqs using pintail a .quan and .freq file will be created if you have not provided them for use in future command executions.");
124 mothurOut("The chimera.seqs command should be in the following format: \n");
125 mothurOut("chimera.seqs(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors, method=bellerophon) \n");
126 mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, processors=2, method=yourMethod) \n");
127 mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
129 catch(exception& e) {
130 errorOut(e, "ChimeraSeqsCommand", "help");
135 //***************************************************************************************************************
137 ChimeraSeqsCommand::~ChimeraSeqsCommand(){ /* do nothing */ }
139 //***************************************************************************************************************
141 int ChimeraSeqsCommand::execute(){
144 if (abort == true) { return 0; }
146 if (method == "bellerophon") { chimera = new Bellerophon(fastafile); }
147 else if (method == "pintail") { chimera = new Pintail(fastafile, templatefile); }
148 else if (method == "ccode") { chimera = new Ccode(fastafile, templatefile); }
149 else if (method == "chimeracheck") { chimera = new ChimeraCheckRDP(fastafile, templatefile); }
150 else { mothurOut("Not a valid method."); mothurOutEndLine(); return 0; }
153 if (maskfile == "default") { mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); mothurOutEndLine(); }
155 //saves time to avoid generating it
156 if (consfile != "") { chimera->setCons(consfile); }
157 else { chimera->setCons(""); }
159 //saves time to avoid generating it
160 if (quanfile != "") { chimera->setQuantiles(quanfile); }
161 else { chimera->setQuantiles(""); }
163 chimera->setMask(maskfile);
164 chimera->setFilter(filter);
165 chimera->setCorrection(correction);
166 chimera->setProcessors(processors);
167 chimera->setWindow(window);
168 chimera->setIncrement(increment);
169 chimera->setNumWanted(numwanted);
170 chimera->setKmerSize(ksize);
173 chimera->getChimeras();
175 string outputFileName = getRootName(fastafile) + method + maskfile + ".chimeras";
177 openOutputFile(outputFileName, out);
189 catch(exception& e) {
190 errorOut(e, "ChimeraSeqsCommand", "execute");
194 /**************************************************************************************************/