]> git.donarmstrong.com Git - mothur.git/blob - chimeraseqscommand.cpp
bd0ad956aababd99e7e926cfd7a305c20890f428
[mothur.git] / chimeraseqscommand.cpp
1 /*
2  *  chimeraseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 6/29/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "chimeraseqscommand.h"
11 #include "bellerophon.h"
12 #include "pintail.h"
13 #include "ccode.h"
14 #include "chimeracheckrdp.h"
15
16
17 //***************************************************************************************************************
18
19 ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
20         try {
21                 abort = false;
22                 
23                 //allow user to run help
24                 if(option == "help") { help(); abort = true; }
25                 
26                 else {
27                         //valid paramters for this command
28                         string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name" };
29                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
30                         
31                         OptionParser parser(option);
32                         map<string,string> parameters = parser.getParameters();
33                         
34                         ValidParameters validParameter;
35                         
36                         //check to make sure all parameters are valid for command
37                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
38                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
39                         }
40                         
41                         //check for required parameters
42                         fastafile = validParameter.validFile(parameters, "fasta", true);
43                         if (fastafile == "not open") { abort = true; }
44                         else if (fastafile == "not found") { fastafile = ""; mothurOut("fasta is a required parameter for the chimera.seqs command."); mothurOutEndLine(); abort = true;  }     
45                         
46                         templatefile = validParameter.validFile(parameters, "template", true);
47                         if (templatefile == "not open") { abort = true; }
48                         else if (templatefile == "not found") { templatefile = "";  }   
49                         
50                         consfile = validParameter.validFile(parameters, "conservation", true);
51                         if (consfile == "not open") { abort = true; }
52                         else if (consfile == "not found") { consfile = "";  }   
53                         
54                         quanfile = validParameter.validFile(parameters, "quantile", true);
55                         if (quanfile == "not open") { abort = true; }
56                         else if (quanfile == "not found") { quanfile = "";  }
57                         
58                         namefile = validParameter.validFile(parameters, "name", true);
59                         if (namefile == "not open") { abort = true; }
60                         else if (namefile == "not found") { namefile = "";  }
61
62                         maskfile = validParameter.validFile(parameters, "mask", false);
63                         if (maskfile == "not found") { maskfile = "";  }        
64                         else if (maskfile != "default")  { 
65                                 ifstream in;
66                                 int     ableToOpen = openInputFile(maskfile, in);
67                                 if (ableToOpen == 1) { abort = true; }
68                                 in.close();
69                         }
70                         
71                         method = validParameter.validFile(parameters, "method", false);                 if (method == "not found") { method = "pintail"; }
72                         
73                         string temp;
74                         temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
75                         filter = isTrue(temp);
76                         
77                         temp = validParameter.validFile(parameters, "correction", false);               if (temp == "not found") { temp = "T"; }
78                         correction = isTrue(temp);
79                         
80                         temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
81                         convert(temp, processors);
82                         
83                         temp = validParameter.validFile(parameters, "ksize", false);                    if (temp == "not found") { temp = "7"; }
84                         convert(temp, ksize);
85                         
86                         temp = validParameter.validFile(parameters, "svg", false);                              if (temp == "not found") { temp = "F"; }
87                         svg = isTrue(temp);
88                         
89                         temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
90                         convert(temp, window);
91                                         
92                         temp = validParameter.validFile(parameters, "increment", false);                
93                         if ((temp == "not found") && (method == "chimeracheck")) { temp = "10"; }
94                         else if (temp == "not found") { temp = "25"; }
95                         convert(temp, increment);
96                         
97                         temp = validParameter.validFile(parameters, "numwanted", false);                if (temp == "not found") { temp = "20"; }
98                         convert(temp, numwanted);
99
100                         
101                         
102                         if (((method != "bellerophon")) && (templatefile == "")) { mothurOut("You must provide a template file with the pintail, ccode or chimeracheck methods."); mothurOutEndLine(); abort = true;  }
103                         
104
105                 }
106         }
107         catch(exception& e) {
108                 errorOut(e, "ChimeraSeqsCommand", "ChimeraSeqsCommand");
109                 exit(1);
110         }
111 }
112 //**********************************************************************************************************************
113
114 void ChimeraSeqsCommand::help(){
115         try {
116         
117                 //"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name"
118                 //mothurOut("chimera.seqs ASSUMES that your sequences are ALIGNED and if using a template that the template file sequences are the same length as the fasta file sequences.\n\n");
119                 mothurOut("The chimera.seqs command reads a fastafile and creates list of potentially chimeric sequences.\n");
120                 mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors, mask, method, window, increment, template, conservation, quantile, numwanted, ksize, svg, name.\n");
121                 mothurOut("The fasta parameter is always required and template is required if using pintail, ccode or chimeracheck.\n");
122                 mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
123                 mothurOut("The correction parameter allows you to put more emphasis on the distance between highly similar sequences and less emphasis on the differences between remote homologs.\n");
124                 mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
125                 mothurOut("The method parameter allows you to specify the method for finding chimeric sequences.  The default is pintail. Options include bellerophon, ccode and chimeracheck \n");
126                 mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences. \n");
127                 mothurOut("The window parameter allows you to specify the window size for searching for chimeras. \n");
128                 mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences.\n");
129                 mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences. \n");
130                 mothurOut("The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n");
131                 mothurOut("The quantile parameter allows you to enter a file containing quantiles for a template files sequences.\n");
132                 mothurOut("The numwanted parameter allows you to specify how many sequences you would each query sequence compared with.\n");
133                 mothurOut("The ksize parameter allows you to input kmersize. \n");
134                 mothurOut("The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence.\n");
135                 mothurOut("The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n");
136                 mothurOut("NOT ALL PARAMETERS ARE USED BY ALL METHODS. Please look below for method specifics.\n\n");
137                 mothurOut("Details for each method: \n"); 
138                 mothurOut("\tpintail: \n"); 
139                 mothurOut("\t\tparameters: fasta=required, template=required, filter=F, mask=no mask, processors=1, window=300, increment=25, conservation=not required, but will improve speed, quantile=not required, but will greatly improve speed. \n"); 
140                 mothurOut("\t\tIf you have run chimera.seqs using pintail a .quan and .freq file will be created for your template, if you have not provided them for use in future command executions.\n");
141                 mothurOut("\tbellerophon: \n"); 
142                 mothurOut("\t\tparameters: fasta=required, filter=F, processors=1, window=1/4 length of seq, increment=25, correction=T. \n"); 
143                 mothurOut("\tccode: \n"); 
144                 mothurOut("\t\tparameters: fasta=required, template=required, filter=F, mask=no mask, processors=1, window=10% of length, numwanted=20\n"); 
145                 mothurOut("\tchimeracheck: \n"); 
146                 mothurOut("\t\tparameters: fasta=required, template=required, processors=1, increment=10, ksize=7, svg=F, name=none\n\n"); 
147                 mothurOut("The chimera.seqs command should be in the following format: \n");
148                 mothurOut("chimera.seqs(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors, method=bellerophon) \n");
149                 mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, method=bellerophon, window=200) \n");
150                 mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");        
151         }
152         catch(exception& e) {
153                 errorOut(e, "ChimeraSeqsCommand", "help");
154                 exit(1);
155         }
156 }
157
158 //***************************************************************************************************************
159
160 ChimeraSeqsCommand::~ChimeraSeqsCommand(){      /*      do nothing      */      }
161
162 //***************************************************************************************************************
163
164 int ChimeraSeqsCommand::execute(){
165         try{
166                 
167                 if (abort == true) { return 0; }
168                 
169                 if (method == "bellerophon")                    {               chimera = new Bellerophon(fastafile);                                           }
170                 else if (method == "pintail")                   {               chimera = new Pintail(fastafile, templatefile);                         }
171                 else if (method == "ccode")                             {               chimera = new Ccode(fastafile, templatefile);                           }
172                 else if (method == "chimeracheck")              {               chimera = new ChimeraCheckRDP(fastafile, templatefile);         }
173                 else { mothurOut("Not a valid method."); mothurOutEndLine(); return 0;          }
174                 
175                 //set user options
176                 if (maskfile == "default") { mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); mothurOutEndLine();  }
177                 
178                 //saves time to avoid generating it
179                 chimera->setCons(consfile);     
180                 
181                 //saves time to avoid generating it
182                 chimera->setQuantiles(quanfile);                                
183                 
184                 chimera->setMask(maskfile);
185                 chimera->setFilter(filter);
186                 chimera->setCorrection(correction);
187                 chimera->setProcessors(processors);
188                 chimera->setWindow(window);
189                 chimera->setIncrement(increment);
190                 chimera->setNumWanted(numwanted);
191                 chimera->setKmerSize(ksize);
192                 chimera->setSVG(svg);
193                 chimera->setName(namefile);
194                                 
195                 //find chimeras
196                 chimera->getChimeras();
197                 
198                 string outputFileName = getRootName(fastafile) + method + maskfile + ".chimeras";
199                 ofstream out;
200                 openOutputFile(outputFileName, out);
201                 
202                 //print results
203                 chimera->print(out);
204                 
205                 out.close();
206                 
207                 delete chimera;
208                 
209                 return 0;
210                 
211         }
212         catch(exception& e) {
213                 errorOut(e, "ChimeraSeqsCommand", "execute");
214                 exit(1);
215         }
216 }
217 /**************************************************************************************************/
218