]> git.donarmstrong.com Git - mothur.git/blob - chimeraseqscommand.cpp
6af8aa3f3d4f1ae3085743eb90b4472edfbfe301
[mothur.git] / chimeraseqscommand.cpp
1 /*
2  *  chimeraseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 6/29/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "chimeraseqscommand.h"
11 #include "bellerophon.h"
12 #include "pintail.h"
13 #include "alignedsimilarity.h"
14 #include "ccode.h"
15
16
17 //***************************************************************************************************************
18
19 ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
20         try {
21                 abort = false;
22                 
23                 //allow user to run help
24                 if(option == "help") { help(); abort = true; }
25                 
26                 else {
27                         //valid paramters for this command
28                         string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted" };
29                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
30                         
31                         OptionParser parser(option);
32                         map<string,string> parameters = parser.getParameters();
33                         
34                         ValidParameters validParameter;
35                         
36                         //check to make sure all parameters are valid for command
37                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
38                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
39                         }
40                         
41                         //check for required parameters
42                         fastafile = validParameter.validFile(parameters, "fasta", true);
43                         if (fastafile == "not open") { abort = true; }
44                         else if (fastafile == "not found") { fastafile = ""; mothurOut("fasta is a required parameter for the chimera.seqs command."); mothurOutEndLine(); abort = true;  }     
45                         
46                         templatefile = validParameter.validFile(parameters, "template", true);
47                         if (templatefile == "not open") { abort = true; }
48                         else if (templatefile == "not found") { templatefile = "";  }   
49                         
50                         consfile = validParameter.validFile(parameters, "conservation", true);
51                         if (consfile == "not open") { abort = true; }
52                         else if (consfile == "not found") { consfile = "";  }   
53                         
54                         quanfile = validParameter.validFile(parameters, "quantile", true);
55                         if (quanfile == "not open") { abort = true; }
56                         else if (quanfile == "not found") { quanfile = "";  }
57                                 
58                         maskfile = validParameter.validFile(parameters, "mask", false);
59                         if (maskfile == "not found") { maskfile = "";  }        
60                         else if (maskfile != "default")  { 
61                                 ifstream in;
62                                 int     ableToOpen = openInputFile(maskfile, in);
63                                 if (ableToOpen == 1) { abort = true; }
64                                 in.close();
65                         }
66
67                         string temp;
68                         temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
69                         filter = isTrue(temp);
70                         
71                         temp = validParameter.validFile(parameters, "correction", false);               if (temp == "not found") { temp = "T"; }
72                         correction = isTrue(temp);
73                         
74                         temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
75                         convert(temp, processors);
76                         
77                         temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
78                         convert(temp, window);
79                                         
80                         temp = validParameter.validFile(parameters, "increment", false);                        if (temp == "not found") { temp = "25"; }
81                         convert(temp, increment);
82                         
83                         temp = validParameter.validFile(parameters, "numwanted", false);                        if (temp == "not found") { temp = "20"; }
84                         convert(temp, numwanted);
85
86                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "pintail"; }
87                         
88                         if (((method == "pintail") || (method == "alignsim")) && (templatefile == "")) { mothurOut("You must provide a template file with the pintail and alignsim methods."); mothurOutEndLine(); abort = true;  }
89                         
90
91                 }
92         }
93         catch(exception& e) {
94                 errorOut(e, "ChimeraSeqsCommand", "ChimeraSeqsCommand");
95                 exit(1);
96         }
97 }
98 //**********************************************************************************************************************
99
100 void ChimeraSeqsCommand::help(){
101         try {
102                 mothurOut("chimera.seqs ASSUMES that your sequences are ALIGNED and if using a template that the template file sequences are the same length as the fasta file sequences.\n\n");
103                 mothurOut("The chimera.seqs command reads a fastafile and creates list of potentially chimeric sequences.\n");
104                 mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors, mask, method, window, increment, template, conservation and quantile.\n");
105                 mothurOut("The fasta parameter is always required and template is required if using pintail.\n");
106                 mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter.  The default is false. \n");
107                 mothurOut("The correction parameter allows you to put more emphasis on the distance between highly similar sequences and less emphasis on the differences between remote homologs.   The default is true. This only applies when the method is bellerphon.\n");
108                 mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
109                 mothurOut("The method parameter allows you to specify the method for finding chimeric sequences.  The default is pintail. Options include..... \n");
110                 mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the pintail.  The default is no mask.  If you enter mask=default, then the mask is 236627 EU009184.1 Shigella dysenteriae str. FBD013. \n");
111                 mothurOut("The window parameter allows you to specify the window size for searching for chimeras.  The default is 300 is method is pintail unless the sequence length is less than 300, and 1/4 sequence length for bellerphon.\n");
112                 mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences.  The default is 25.\n");
113                 mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences for use by the pintail algorythm. It is a required parameter if using pintail.\n");
114                 mothurOut("The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment for use by the pintail algorythm. It is not required, but will speed up the pintail method.\n");
115                 mothurOut("The quantile parameter allows you to enter a file containing quantiles for a template files sequences for use by the pintail algorythm. It is not required, but will speed up the pintail method.\n");
116                 mothurOut("If you have run chimera.seqs using pintail a .quan and .freq file will be created if you have not provided them for use in future command executions.");
117                 mothurOut("The chimera.seqs command should be in the following format: \n");
118                 mothurOut("chimera.seqs(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors, method=bellerophon) \n");
119                 mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, processors=2, method=yourMethod) \n");
120                 mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");        
121         }
122         catch(exception& e) {
123                 errorOut(e, "ChimeraSeqsCommand", "help");
124                 exit(1);
125         }
126 }
127
128 //***************************************************************************************************************
129
130 ChimeraSeqsCommand::~ChimeraSeqsCommand(){      /*      do nothing      */      }
131
132 //***************************************************************************************************************
133
134 int ChimeraSeqsCommand::execute(){
135         try{
136                 
137                 if (abort == true) { return 0; }
138                 
139                 if (method == "bellerophon")    {               chimera = new Bellerophon(fastafile);                           }
140                 else if (method == "pintail")   {               chimera = new Pintail(fastafile, templatefile);         }
141                 //else if (method == "alignsim")        {               chimera = new AlignSim(fastafile, templatefile);        }
142                 else if (method == "ccode")             {               chimera = new Ccode(fastafile, templatefile);           }
143                 else { mothurOut("Not a valid method."); mothurOutEndLine(); return 0;          }
144                 
145                 //set user options
146                 if (maskfile == "default") { mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); mothurOutEndLine();  }
147                 
148                 //saves time to avoid generating it
149                 if (consfile != "")                     {               chimera->setCons(consfile);                                             }
150                 else                                            {               chimera->setCons("");                                                   }
151                 
152                 //saves time to avoid generating it
153                 if (quanfile != "")                     {               chimera->setQuantiles(quanfile);                                }
154                 else                                            {               chimera->setQuantiles("");                                              }
155                 
156                 chimera->setMask(maskfile);
157                 chimera->setFilter(filter);
158                 chimera->setCorrection(correction);
159                 chimera->setProcessors(processors);
160                 chimera->setWindow(window);
161                 chimera->setIncrement(increment);
162                 chimera->setNumWanted(numwanted);
163                                 
164                 //find chimeras
165                 chimera->getChimeras();
166                 
167                 string outputFileName = getRootName(fastafile) + method + maskfile + ".chimeras";
168                 ofstream out;
169                 openOutputFile(outputFileName, out);
170                 
171                 //print results
172                 chimera->print(out);
173                 
174                 out.close();
175                 
176                 delete chimera;
177                 
178                 return 0;
179                 
180         }
181         catch(exception& e) {
182                 errorOut(e, "ChimeraSeqsCommand", "execute");
183                 exit(1);
184         }
185 }
186 /**************************************************************************************************/
187