]> git.donarmstrong.com Git - mothur.git/blob - parsefastaqcommand.cpp
added sequence name to error string in fastq.info. Changed np_shannon to npshannon.
[mothur.git] / parsefastaqcommand.cpp
1 /*
2  *  parsefastaqcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/30/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "parsefastaqcommand.h"
11 #include "sequence.hpp"
12
13 //**********************************************************************************************************************
14 vector<string> ParseFastaQCommand::setParameters(){     
15         try {
16                 CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfastq);
17                 CommandParameter pfasta("fasta", "Bool", "", "T", "", "", "",false,false); parameters.push_back(pfasta);
18                 CommandParameter pqual("qfile", "Bool", "", "T", "", "", "",false,false); parameters.push_back(pqual);
19                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
20                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
21                 
22                 vector<string> myArray;
23                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
24                 return myArray;
25         }
26         catch(exception& e) {
27                 m->errorOut(e, "ParseFastaQCommand", "setParameters");
28                 exit(1);
29         }
30 }
31 //**********************************************************************************************************************
32 string ParseFastaQCommand::getHelpString(){     
33         try {
34                 string helpString = "";
35                 helpString += "The fastq.info command reads a fastq file and creates a fasta and quality file.\n";
36                 helpString += "The fastq.info command parameter is fastq, and it is required.\n";
37                 helpString += "The fastq.info command should be in the following format: fastq.info(fastaq=yourFastaQFile).\n";
38                 helpString += "Example fastq.info(fastaq=test.fastaq).\n";
39                 helpString += "Note: No spaces between parameter labels (i.e. fastq), '=' and yourFastQFile.\n";
40                 return helpString;
41         }
42         catch(exception& e) {
43                 m->errorOut(e, "ParseFastaQCommand", "getHelpString");
44                 exit(1);
45         }
46 }
47 //**********************************************************************************************************************
48 ParseFastaQCommand::ParseFastaQCommand(){       
49         try {
50                 abort = true; calledHelp = true; 
51                 setParameters();
52                 vector<string> tempOutNames;
53                 outputTypes["fasta"] = tempOutNames;
54                 outputTypes["qfile"] = tempOutNames;
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "ParseFastaQCommand", "ParseFastaQCommand");
58                 exit(1);
59         }
60 }
61 //**********************************************************************************************************************
62 ParseFastaQCommand::ParseFastaQCommand(string option){
63         try {
64                 abort = false; calledHelp = false;   
65                 
66                 if(option == "help") {  help(); abort = true; calledHelp = true; }
67                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
68                 
69                 else {
70                         vector<string> myArray = setParameters();
71                         
72                         OptionParser parser(option);
73                         map<string,string> parameters = parser.getParameters();
74                         
75                         ValidParameters validParameter;
76                         map<string,string>::iterator it;
77
78                         //check to make sure all parameters are valid for command
79                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
80                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
81                         }
82                         
83                         //initialize outputTypes
84                         vector<string> tempOutNames;
85                         outputTypes["fasta"] = tempOutNames;
86                         outputTypes["qfile"] = tempOutNames;
87                         
88                         //if the user changes the input directory command factory will send this info to us in the output parameter 
89                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
90                         if (inputDir == "not found"){   inputDir = "";          }
91                         else {
92                                 string path;
93                                 it = parameters.find("fastq");
94                                 //user has given a template file
95                                 if(it != parameters.end()){ 
96                                         path = m->hasPath(it->second);
97                                         //if the user has not given a path then, add inputdir. else leave path alone.
98                                         if (path == "") {       parameters["fastq"] = inputDir + it->second;            }
99                                 }
100                         }
101                         
102                         //check for required parameters
103                         fastaQFile = validParameter.validFile(parameters, "fastq", true);
104                         if (fastaQFile == "not found") {        m->mothurOut("fastq is a required parameter for the fastq.info command.");      m->mothurOutEndLine();  abort = true;   }
105                         else if (fastaQFile == "not open")      {       fastaQFile = ""; abort = true;  }       
106                         
107                         //if the user changes the output directory command factory will send this info to us in the output parameter 
108                         outputDir = validParameter.validFile(parameters, "outputdir", false);   if (outputDir == "not found"){  outputDir = m->hasPath(fastaQFile);     }
109                         
110                         string temp;
111                         temp = validParameter.validFile(parameters, "fasta", false);    if(temp == "not found"){        temp = "T";     }
112                         fasta = m->isTrue(temp); 
113
114                         temp = validParameter.validFile(parameters, "qfile", false);    if(temp == "not found"){        temp = "T";     }
115                         qual = m->isTrue(temp); 
116                         
117                         if ((!fasta) && (!qual)) { m->mothurOut("[ERROR]: no outputs selected. Aborting."); m->mothurOutEndLine(); abort=true; }
118
119                 }               
120         }
121         catch(exception& e) {
122                 m->errorOut(e, "ParseFastaQCommand", "ParseFastaQCommand");
123                 exit(1);
124         }
125 }
126 //**********************************************************************************************************************
127
128 int ParseFastaQCommand::execute(){
129         try {
130                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
131                 
132                 //open Output Files
133                 string fastaFile = outputDir + m->getRootName(m->getSimpleName(fastaQFile)) + "fasta";
134                 string qualFile = outputDir + m->getRootName(m->getSimpleName(fastaQFile)) + "qual";
135                 ofstream outFasta, outQual;
136                 
137                 if (fasta) { m->openOutputFile(fastaFile, outFasta);  outputNames.push_back(fastaFile); outputTypes["fasta"].push_back(fastaFile);      }
138                 if (qual) { m->openOutputFile(qualFile, outQual);       outputNames.push_back(qualFile);  outputTypes["qfile"].push_back(qualFile);             }
139                 
140                 ifstream in;
141                 m->openInputFile(fastaQFile, in);
142                 
143                 while (!in.eof()) {
144                         
145                         if (m->control_pressed) { break; }
146                 
147                         //read sequence name
148                         string name = m->getline(in); m->gobble(in);
149                         if (name == "") {  m->mothurOut("[ERROR]: Blank fasta name."); m->mothurOutEndLine(); m->control_pressed = true; break; }
150                         else if (name[0] != '@') { m->mothurOut("[ERROR]: reading " + name + " expected a name with @ as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
151                         else { name = name.substr(1); }
152                         
153                         //read sequence
154                         string sequence = m->getline(in); m->gobble(in);
155                         if (sequence == "") {  m->mothurOut("[ERROR]: missing sequence for " + name); m->mothurOutEndLine(); m->control_pressed = true; break; }
156                         
157                         //read sequence name
158                         string name2 = m->getline(in); m->gobble(in);
159                         if (name2 == "") {  m->mothurOut("[ERROR]: Blank quality name."); m->mothurOutEndLine(); m->control_pressed = true; break; }
160                         else if (name2[0] != '+') { m->mothurOut("[ERROR]: reading " + name2 + " expected a name with + as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
161                         else { name2 = name2.substr(1);  }
162                         
163                         //read quality scores
164                         string quality = m->getline(in); m->gobble(in);
165                         if (quality == "") {  m->mothurOut("[ERROR]: missing quality for " + name2); m->mothurOutEndLine(); m->control_pressed = true; break; }
166                         
167                         //sanity check sequence length and number of quality scores match
168                         if (name2 != "") { if (name != name2) { m->mothurOut("[ERROR]: names do not match. read " + name + " for fasta and " + name2 + " for quality."); m->mothurOutEndLine(); m->control_pressed = true; break; } }
169                         if (quality.length() != sequence.length()) { m->mothurOut("[ERROR]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; break; }
170                         
171                         //print sequence info to files
172                         if (fasta) { outFasta << ">" << name << endl << sequence << endl; }
173                         
174                         if (qual) { 
175                                 vector<int> qualScores = convertQual(quality);
176                                 outQual << ">" << name << endl;
177                                 for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; }
178                                 outQual << endl;
179                         }
180                 }
181                 
182                 in.close();
183                 if (fasta)      { outFasta.close();     }
184                 if (qual)       { outQual.close();      }
185                 
186                 if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(fastaFile); m->mothurRemove(qualFile); return 0; }
187                 
188                 //set fasta file as new current fastafile
189                 string current = "";
190                 itTypes = outputTypes.find("fasta");
191                 if (itTypes != outputTypes.end()) {
192                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
193                 }
194                 
195                 itTypes = outputTypes.find("qfile");
196                 if (itTypes != outputTypes.end()) {
197                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
198                 }               
199                 
200                 m->mothurOutEndLine();
201                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
202                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
203                 m->mothurOutEndLine();
204
205                 return 0;
206         }
207         catch(exception& e) {
208                 m->errorOut(e, "ParseFastaQCommand", "execute");
209                 exit(1);
210         }
211 }
212 //**********************************************************************************************************************
213 vector<int> ParseFastaQCommand::convertQual(string qual) {
214         try {
215                 vector<int> qualScores;
216                 
217                 int controlChar = int('!');
218                 
219                 for (int i = 0; i < qual.length(); i++) { 
220                         int temp = int(qual[i]);
221                         temp -= controlChar;
222                         
223                         qualScores.push_back(temp);
224                 }
225                 
226                 return qualScores;
227         }
228         catch(exception& e) {
229                 m->errorOut(e, "ParseFastaQCommand", "convertQual");
230                 exit(1);
231         }
232 }
233 //**********************************************************************************************************************
234
235
236