]> git.donarmstrong.com Git - mothur.git/blob - parsefastaqcommand.cpp
added classify.shared command and random forest files. added count file to pcr.seqs...
[mothur.git] / parsefastaqcommand.cpp
1 /*
2  *  parsefastaqcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/30/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "parsefastaqcommand.h"
11 #include "sequence.hpp"
12
13 //**********************************************************************************************************************
14 vector<string> ParseFastaQCommand::setParameters(){     
15         try {
16                 CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfastq);
17                 CommandParameter pfasta("fasta", "Bool", "", "T", "", "", "",false,false); parameters.push_back(pfasta);
18                 CommandParameter pqual("qfile", "Bool", "", "T", "", "", "",false,false); parameters.push_back(pqual);
19                 CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa", "sanger", "", "", "",false,false); parameters.push_back(pformat);
20         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
21                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
22                 
23                 vector<string> myArray;
24                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
25                 return myArray;
26         }
27         catch(exception& e) {
28                 m->errorOut(e, "ParseFastaQCommand", "setParameters");
29                 exit(1);
30         }
31 }
32 //**********************************************************************************************************************
33 string ParseFastaQCommand::getHelpString(){     
34         try {
35                 string helpString = "";
36                 helpString += "The fastq.info command reads a fastq file and creates a fasta and quality file.\n";
37                 helpString += "The fastq.info command parameters are fastq, fasta, qfile and format; fastq is required.\n";
38         helpString += "The fastq.info command should be in the following format: fastq.info(fastaq=yourFastaQFile).\n";
39                 helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa or illumina, default=sanger.\n";
40         helpString += "The fasta parameter allows you to indicate whether you want a fasta file generated. Default=T.\n";
41         helpString += "The qfile parameter allows you to indicate whether you want a quality file generated. Default=T.\n";
42                 helpString += "Example fastq.info(fastaq=test.fastaq).\n";
43                 helpString += "Note: No spaces between parameter labels (i.e. fastq), '=' and yourFastQFile.\n";
44                 return helpString;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "ParseFastaQCommand", "getHelpString");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 string ParseFastaQCommand::getOutputFileNameTag(string type, string inputName=""){      
53         try {
54         string outputFileName = "";
55                 map<string, vector<string> >::iterator it;
56         
57         //is this a type this command creates
58         it = outputTypes.find(type);
59         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
60         else {
61             if (type == "fasta") {  outputFileName =  "fasta"; }
62             else if (type == "qfile") {  outputFileName =  "qual"; }
63             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
64         }
65         return outputFileName;
66         }
67         catch(exception& e) {
68                 m->errorOut(e, "ParseFastaQCommand", "getOutputFileNameTag");
69                 exit(1);
70         }
71 }
72
73 //**********************************************************************************************************************
74 ParseFastaQCommand::ParseFastaQCommand(){       
75         try {
76                 abort = true; calledHelp = true; 
77                 setParameters();
78                 vector<string> tempOutNames;
79                 outputTypes["fasta"] = tempOutNames;
80                 outputTypes["qfile"] = tempOutNames;
81         }
82         catch(exception& e) {
83                 m->errorOut(e, "ParseFastaQCommand", "ParseFastaQCommand");
84                 exit(1);
85         }
86 }
87 //**********************************************************************************************************************
88 ParseFastaQCommand::ParseFastaQCommand(string option){
89         try {
90                 abort = false; calledHelp = false;   
91                 
92                 if(option == "help") {  help(); abort = true; calledHelp = true; }
93                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
94                 
95                 else {
96                         vector<string> myArray = setParameters();
97                         
98                         OptionParser parser(option);
99                         map<string,string> parameters = parser.getParameters();
100                         
101                         ValidParameters validParameter;
102                         map<string,string>::iterator it;
103
104                         //check to make sure all parameters are valid for command
105                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
106                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
107                         }
108                         
109                         //initialize outputTypes
110                         vector<string> tempOutNames;
111                         outputTypes["fasta"] = tempOutNames;
112                         outputTypes["qfile"] = tempOutNames;
113                         
114                         //if the user changes the input directory command factory will send this info to us in the output parameter 
115                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
116                         if (inputDir == "not found"){   inputDir = "";          }
117                         else {
118                                 string path;
119                                 it = parameters.find("fastq");
120                                 //user has given a template file
121                                 if(it != parameters.end()){ 
122                                         path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         if (path == "") {       parameters["fastq"] = inputDir + it->second;            }
125                                 }
126                         }
127                         
128                         //check for required parameters
129                         fastaQFile = validParameter.validFile(parameters, "fastq", true);
130                         if (fastaQFile == "not found") {        m->mothurOut("fastq is a required parameter for the fastq.info command.");      m->mothurOutEndLine();  abort = true;   }
131                         else if (fastaQFile == "not open")      {       fastaQFile = ""; abort = true;  }       
132                         
133                         //if the user changes the output directory command factory will send this info to us in the output parameter 
134                         outputDir = validParameter.validFile(parameters, "outputdir", false);   if (outputDir == "not found"){  outputDir = m->hasPath(fastaQFile);     }
135                         
136                         string temp;
137                         temp = validParameter.validFile(parameters, "fasta", false);    if(temp == "not found"){        temp = "T";     }
138                         fasta = m->isTrue(temp); 
139
140                         temp = validParameter.validFile(parameters, "qfile", false);    if(temp == "not found"){        temp = "T";     }
141                         qual = m->isTrue(temp); 
142                         
143             format = validParameter.validFile(parameters, "format", false);             if (format == "not found"){     format = "sanger";      }
144             
145             if ((format != "sanger") && (format != "illumina") && (format != "solexa"))  { 
146                                 m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa and illumina, aborting." ); m->mothurOutEndLine();
147                                 abort=true;
148                         }
149
150                         if ((!fasta) && (!qual)) { m->mothurOut("[ERROR]: no outputs selected. Aborting."); m->mothurOutEndLine(); abort=true; }
151
152                 }               
153         }
154         catch(exception& e) {
155                 m->errorOut(e, "ParseFastaQCommand", "ParseFastaQCommand");
156                 exit(1);
157         }
158 }
159 //**********************************************************************************************************************
160
161 int ParseFastaQCommand::execute(){
162         try {
163                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
164                 
165                 //open Output Files
166                 string fastaFile = outputDir + m->getRootName(m->getSimpleName(fastaQFile)) + getOutputFileNameTag("fasta");
167                 string qualFile = outputDir + m->getRootName(m->getSimpleName(fastaQFile)) + getOutputFileNameTag("qfile");
168                 ofstream outFasta, outQual;
169                 
170                 if (fasta) { m->openOutputFile(fastaFile, outFasta);  outputNames.push_back(fastaFile); outputTypes["fasta"].push_back(fastaFile);      }
171                 if (qual) { m->openOutputFile(qualFile, outQual);       outputNames.push_back(qualFile);  outputTypes["qfile"].push_back(qualFile);             }
172                 
173                 ifstream in;
174                 m->openInputFile(fastaQFile, in);
175         
176         //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference.
177         for (int i = -64; i < 65; i++) { 
178             char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499));
179             convertTable.push_back(temp);
180         }
181                 
182                 while (!in.eof()) {
183                         
184                         if (m->control_pressed) { break; }
185                 
186                         //read sequence name
187                         string name = m->getline(in); m->gobble(in);
188                         if (name == "") {  m->mothurOut("[ERROR]: Blank fasta name."); m->mothurOutEndLine(); m->control_pressed = true; break; }
189                         else if (name[0] != '@') { m->mothurOut("[ERROR]: reading " + name + " expected a name with @ as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
190                         else { name = name.substr(1); }
191                         
192                         //read sequence
193                         string sequence = m->getline(in); m->gobble(in);
194                         if (sequence == "") {  m->mothurOut("[ERROR]: missing sequence for " + name); m->mothurOutEndLine(); m->control_pressed = true; break; }
195                         
196                         //read sequence name
197                         string name2 = m->getline(in); m->gobble(in);
198                         if (name2 == "") {  m->mothurOut("[ERROR]: Blank quality name."); m->mothurOutEndLine(); m->control_pressed = true; break; }
199                         else if (name2[0] != '+') { m->mothurOut("[ERROR]: reading " + name2 + " expected a name with + as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
200                         else { name2 = name2.substr(1);  }
201                         
202                         //read quality scores
203                         string quality = m->getline(in); m->gobble(in);
204                         if (quality == "") {  m->mothurOut("[ERROR]: missing quality for " + name2); m->mothurOutEndLine(); m->control_pressed = true; break; }
205                         
206                         //sanity check sequence length and number of quality scores match
207                         if (name2 != "") { if (name != name2) { m->mothurOut("[ERROR]: names do not match. read " + name + " for fasta and " + name2 + " for quality."); m->mothurOutEndLine(); m->control_pressed = true; break; } }
208                         if (quality.length() != sequence.length()) { m->mothurOut("[ERROR]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; break; }
209                         
210                         //print sequence info to files
211                         if (fasta) { outFasta << ">" << name << endl << sequence << endl; }
212                         
213                         if (qual) { 
214                                 vector<int> qualScores = convertQual(quality);
215                                 outQual << ">" << name << endl;
216                                 for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; }
217                                 outQual << endl;
218                         }
219                 }
220                 
221                 in.close();
222                 if (fasta)      { outFasta.close();     }
223                 if (qual)       { outQual.close();      }
224                 
225                 if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(fastaFile); m->mothurRemove(qualFile); return 0; }
226                 
227                 //set fasta file as new current fastafile
228                 string current = "";
229                 itTypes = outputTypes.find("fasta");
230                 if (itTypes != outputTypes.end()) {
231                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
232                 }
233                 
234                 itTypes = outputTypes.find("qfile");
235                 if (itTypes != outputTypes.end()) {
236                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
237                 }               
238                 
239                 m->mothurOutEndLine();
240                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
241                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
242                 m->mothurOutEndLine();
243
244                 return 0;
245         }
246         catch(exception& e) {
247                 m->errorOut(e, "ParseFastaQCommand", "execute");
248                 exit(1);
249         }
250 }
251 //**********************************************************************************************************************
252 vector<int> ParseFastaQCommand::convertQual(string qual) {
253         try {
254                 vector<int> qualScores;
255                 
256                 for (int i = 0; i < qual.length(); i++) { 
257             
258             int temp = 0;
259             temp = int(qual[i]);
260             if (format == "illumina") {
261                 temp -= 64; //char '@'
262             }else if (format == "solexa") {
263                 temp = int(convertTable[temp]); //convert to sanger
264                 temp -= 33; //char '!'
265             }else {
266                 temp -= 33; //char '!'
267             }
268                         qualScores.push_back(temp);
269                 }
270                 
271                 return qualScores;
272         }
273         catch(exception& e) {
274                 m->errorOut(e, "ParseFastaQCommand", "convertQual");
275                 exit(1);
276         }
277 }
278 //**********************************************************************************************************************
279
280
281