]> git.donarmstrong.com Git - mothur.git/blob - summaryqualcommand.cpp
changed added group output to indicator command. a few changes to work with the guy
[mothur.git] / summaryqualcommand.cpp
1 /*
2  *  summaryqualcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/28/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "summaryqualcommand.h"
11 #include "counttable.h"
12
13 //**********************************************************************************************************************
14 vector<string> SummaryQualCommand::setParameters(){     
15         try {
16                 CommandParameter pqual("qfile", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pqual);
17                 CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
18         CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
19                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
20                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
21                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
22                 
23                 vector<string> myArray;
24                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
25                 return myArray;
26         }
27         catch(exception& e) {
28                 m->errorOut(e, "SummaryQualCommand", "setParameters");
29                 exit(1);
30         }
31 }
32 //**********************************************************************************************************************
33 string SummaryQualCommand::getHelpString(){     
34         try {
35                 string helpString = "";
36                 helpString += "The summary.qual command reads a quality file and an optional name or count file, and summarizes the quality information.\n";
37                 helpString += "The summary.tax command parameters are qfile, name, count and processors. qfile is required, unless you have a valid current quality file.\n";
38                 helpString += "The name parameter allows you to enter a name file associated with your quality file. \n";
39         helpString += "The count parameter allows you to enter a count file associated with your quality file. \n";
40                 helpString += "The summary.qual command should be in the following format: \n";
41                 helpString += "summary.qual(qfile=yourQualityFile) \n";
42                 helpString += "Note: No spaces between parameter labels (i.e. qfile), '=' and parameters (i.e.yourQualityFile).\n";     
43                 return helpString;
44         }
45         catch(exception& e) {
46                 m->errorOut(e, "SummaryQualCommand", "getHelpString");
47                 exit(1);
48         }
49 }
50 //**********************************************************************************************************************
51 string SummaryQualCommand::getOutputFileNameTag(string type, string inputName=""){      
52         try {
53         string outputFileName = "";
54                 map<string, vector<string> >::iterator it;
55         
56         //is this a type this command creates
57         it = outputTypes.find(type);
58         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
59         else {
60             if (type == "summary")            {   outputFileName =  "qual.summary";   }
61             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
62         }
63         return outputFileName;
64         }
65         catch(exception& e) {
66                 m->errorOut(e, "SummaryQualCommand", "getOutputFileNameTag");
67                 exit(1);
68         }
69 }
70 //**********************************************************************************************************************
71 SummaryQualCommand::SummaryQualCommand(){       
72         try {
73                 abort = true; calledHelp = true; 
74                 setParameters();
75                 vector<string> tempOutNames;
76                 outputTypes["summary"] = tempOutNames;
77         }
78         catch(exception& e) {
79                 m->errorOut(e, "SummaryQualCommand", "SummaryQualCommand");
80                 exit(1);
81         }
82 }
83 //***************************************************************************************************************
84
85 SummaryQualCommand::SummaryQualCommand(string option)  {
86         try {
87                 abort = false; calledHelp = false;   
88                 
89                 //allow user to run help
90                 if(option == "help") { help(); abort = true; calledHelp = true; }
91                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
92                 
93                 else {
94                         vector<string> myArray = setParameters();
95                         
96                         OptionParser parser(option);
97                         map<string,string> parameters = parser.getParameters();
98                         
99                         ValidParameters validParameter;
100                         map<string,string>::iterator it;
101                         
102                         //check to make sure all parameters are valid for command
103                         for (it = parameters.begin(); it != parameters.end(); it++) { 
104                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
105                         }
106                         
107                         //if the user changes the input directory command factory will send this info to us in the output parameter 
108                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
109                         if (inputDir == "not found"){   inputDir = "";          }
110                         else {
111                                 string path;
112                                 it = parameters.find("qfile");
113                                 //user has given a template file
114                                 if(it != parameters.end()){ 
115                                         path = m->hasPath(it->second);
116                                         //if the user has not given a path then, add inputdir. else leave path alone.
117                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
118                                 }
119                                 
120                                 it = parameters.find("name");
121                                 //user has given a template file
122                                 if(it != parameters.end()){ 
123                                         path = m->hasPath(it->second);
124                                         //if the user has not given a path then, add inputdir. else leave path alone.
125                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
126                                 }
127                 
128                 it = parameters.find("count");
129                                 //user has given a template file
130                                 if(it != parameters.end()){ 
131                                         path = m->hasPath(it->second);
132                                         //if the user has not given a path then, add inputdir. else leave path alone.
133                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
134                                 }
135                         }
136                         
137                         //initialize outputTypes
138                         vector<string> tempOutNames;
139                         outputTypes["summary"] = tempOutNames;
140                         
141                         //check for required parameters
142                         qualfile = validParameter.validFile(parameters, "qfile", true);
143                         if (qualfile == "not open") { qualfile = ""; abort = true; }
144                         else if (qualfile == "not found") {                             
145                                 qualfile = m->getQualFile(); 
146                                 if (qualfile != "") { m->mothurOut("Using " + qualfile + " as input file for the qfile parameter."); m->mothurOutEndLine(); }
147                                 else {  m->mothurOut("You have no current quality file and the qfile parameter is required."); m->mothurOutEndLine(); abort = true; }
148                         }else { m->setQualFile(qualfile); }     
149                         
150                         namefile = validParameter.validFile(parameters, "name", true);
151                         if (namefile == "not open") { namefile = ""; abort = true; }
152                         else if (namefile == "not found") { namefile = "";  }   
153                         else { m->setNameFile(namefile); }
154             
155             countfile = validParameter.validFile(parameters, "count", true);
156                         if (countfile == "not open") { abort = true; countfile = ""; }  
157                         else if (countfile == "not found") { countfile = ""; }
158                         else { m->setCountTableFile(countfile); }
159                         
160             if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
161                         
162                         //if the user changes the output directory command factory will send this info to us in the output parameter 
163                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
164                                 outputDir = ""; 
165                                 outputDir += m->hasPath(qualfile); //if user entered a file with a path then preserve it        
166                         }
167                         
168                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
169                         m->setProcessors(temp);
170                         m->mothurConvert(temp, processors);     
171                         
172             
173                         if (countfile == "") {
174                 if (namefile == "") {
175                     vector<string> files; files.push_back(qualfile);
176                     parser.getNameFile(files);
177                 }
178             }
179                 }
180         }
181         catch(exception& e) {
182                 m->errorOut(e, "SummaryQualCommand", "SummaryQualCommand");
183                 exit(1);
184         }
185 }
186 //***************************************************************************************************************
187 int SummaryQualCommand::execute(){
188         try{
189                 
190                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
191                 
192                 int start = time(NULL);
193                 int numSeqs = 0;
194                 
195                 vector<int> position;
196                 vector<int> averageQ;
197                 vector< vector<int> > scores;
198                                 
199                 if (m->control_pressed) { return 0; }
200                 
201                 if (namefile != "") { nameMap = m->readNames(namefile); }
202                 else if (countfile != "") {
203             CountTable ct;
204             ct.readTable(countfile);
205             nameMap = ct.getNameMap();
206         }
207         
208                 vector<unsigned long long> positions; 
209 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
210                 positions = m->divideFile(qualfile, processors);
211                 for (int i = 0; i < (positions.size()-1); i++) {        lines.push_back(linePair(positions[i], positions[(i+1)]));      }
212 #else   
213                 if (processors == 1) {
214                         lines.push_back(linePair(0, 1000)); 
215                 }else {
216                         positions = m->setFilePosFasta(qualfile, numSeqs); 
217             if (positions.size() < processors) { processors = positions.size(); }
218                         
219                         //figure out how many sequences you have to process
220                         int numSeqsPerProcessor = numSeqs / processors;
221                         for (int i = 0; i < processors; i++) {
222                                 int startIndex =  i * numSeqsPerProcessor;
223                                 if(i == (processors - 1)){      numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;        }
224                                 lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
225                         }
226                 }
227 #endif
228                 
229                 
230                 if(processors == 1){ numSeqs = driverCreateSummary(position, averageQ, scores, qualfile, lines[0]);  }
231                 else{  numSeqs = createProcessesCreateSummary(position, averageQ, scores, qualfile);  }
232                 
233                 if (m->control_pressed) {  return 0; }
234                 
235                 //print summary file
236                 string summaryFile = outputDir + m->getRootName(m->getSimpleName(qualfile)) + getOutputFileNameTag("summary");
237                 printQual(summaryFile, position, averageQ, scores);
238                 
239                 if (m->control_pressed) {  m->mothurRemove(summaryFile); return 0; }
240                 
241                 //output results to screen
242                 cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
243                 m->mothurOutEndLine();
244                 m->mothurOut("Position\tNumSeqs\tAverageQ"); m->mothurOutEndLine();
245                 for (int i = 0; i < position.size(); i+=100) {
246                         float average = averageQ[i] / (float) position[i];
247                         cout << i << '\t' << position[i] << '\t' << average;
248                         m->mothurOutJustToLog(toString(i) + "\t" + toString(position[i]) + "\t" + toString(average)); m->mothurOutEndLine();
249                 }
250                 
251                 m->mothurOutEndLine();
252                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
253                 m->mothurOutEndLine();
254                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
255                 m->mothurOut(summaryFile); m->mothurOutEndLine();       outputNames.push_back(summaryFile); outputTypes["summary"].push_back(summaryFile);
256                 m->mothurOutEndLine();
257                 
258                 return 0;
259         }
260         catch(exception& e) {
261                 m->errorOut(e, "SummaryQualCommand", "execute");
262                 exit(1);
263         }
264 }
265 /**************************************************************************************/
266 int SummaryQualCommand::driverCreateSummary(vector<int>& position, vector<int>& averageQ, vector< vector<int> >& scores, string filename, linePair filePos) {   
267         try {
268                 ifstream in;
269                 m->openInputFile(filename, in);
270                 
271                 in.seekg(filePos.start);
272                 
273                 bool done = false;
274                 int count = 0;
275                 
276                 while (!done) {
277                         
278                         if (m->control_pressed) { in.close(); return 1; }
279                         
280                         QualityScores current(in); m->gobble(in);
281                         
282                         if (current.getName() != "") {
283                                 
284                                 int num = 1;
285                                 if ((namefile != "") || (countfile != "")) {
286                                         //make sure this sequence is in the namefile, else error 
287                                         map<string, int>::iterator it = nameMap.find(current.getName());
288                                         
289                                         if (it == nameMap.end()) { m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
290                                         else { num = it->second; }
291                                 }
292                                 
293                                 vector<int> thisScores = current.getQualityScores();
294                                 
295                                 //resize to num of positions setting number of seqs with that size to 1
296                                 if (position.size() < thisScores.size()) { position.resize(thisScores.size(), 0); }
297                                 if (averageQ.size() < thisScores.size()) { averageQ.resize(thisScores.size(), 0); }
298                                 if (scores.size() < thisScores.size()) { 
299                                         scores.resize(thisScores.size()); 
300                                         for (int i = 0; i < scores.size(); i++) { scores[i].resize(41, 0); }
301                                 }
302                                 
303                                 //increase counts of number of seqs with this position
304                                 //average is really the total, we will average in execute
305                                 for (int i = 0; i < thisScores.size(); i++) { 
306                                         position[i] += num; 
307                                         averageQ[i] += (thisScores[i] * num); //weighting for namesfile
308                                         if (thisScores[i] > 40) { m->mothurOut("[ERROR]: " + current.getName() + " has a quality scores of " + toString(thisScores[i]) + ", expecting values to be less than 40."); m->mothurOutEndLine(); m->control_pressed = true; }
309                                         else { scores[i][thisScores[i]] += num; }  
310                                 }
311                                 
312                                 count += num;
313                         }
314                         
315 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
316                         unsigned long long pos = in.tellg();
317                         if ((pos == -1) || (pos >= filePos.end)) { break; }
318 #else
319                         if (in.eof()) { break; }
320 #endif
321                 }
322                 
323                 in.close();
324                 
325                 return count;
326         }
327         catch(exception& e) {
328                 m->errorOut(e, "SummaryQualCommand", "driverCreateSummary");
329                 exit(1);
330         }
331 }
332 /**************************************************************************************************/
333 int SummaryQualCommand::createProcessesCreateSummary(vector<int>& position, vector<int>& averageQ, vector< vector<int> >& scores, string filename) {
334         try {
335                 int process = 1;
336                 int numSeqs = 0;
337                 processIDS.clear();
338                 
339 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
340                 
341                 //loop through and create all the processes you want
342                 while (process != processors) {
343                         int pid = fork();
344                         
345                         if (pid > 0) {
346                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
347                                 process++;
348                         }else if (pid == 0){
349                                 numSeqs = driverCreateSummary(position, averageQ, scores, qualfile, lines[process]);
350                                 
351                                 //pass numSeqs to parent
352                                 ofstream out;
353                                 string tempFile = qualfile + toString(getpid()) + ".num.temp";
354                                 m->openOutputFile(tempFile, out);
355                                 
356                                 out << numSeqs << endl;
357                                 out << position.size() << endl;
358                                 for (int k = 0; k < position.size(); k++)                       {               out << position[k] << '\t'; }  out << endl;
359                                 for (int k = 0; k < averageQ.size(); k++)                       {               out << averageQ[k] << '\t'; }  out << endl;
360                                 for (int k = 0; k < scores.size(); k++) {               
361                                         for (int j = 0; j < 41; j++) {
362                                                 out << scores[k][j] << '\t'; 
363                                         }
364                                         out << endl;
365                                 }  
366                                 out << endl;
367                                 
368                                 out.close();
369                                 
370                                 exit(0);
371                         }else { 
372                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
373                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
374                                 exit(0);
375                         }
376                 }
377                 
378                 //do your part
379                 numSeqs = driverCreateSummary(position, averageQ, scores, qualfile, lines[0]);
380                 
381                 //force parent to wait until all the processes are done
382                 for (int i=0;i<processIDS.size();i++) { 
383                         int temp = processIDS[i];
384                         wait(&temp);
385                 }
386                 
387                 //parent reads in and combine Filter info
388                 for (int i = 0; i < processIDS.size(); i++) {
389                         string tempFilename = qualfile + toString(processIDS[i]) + ".num.temp";
390                         ifstream in;
391                         m->openInputFile(tempFilename, in);
392                         
393                         int temp, tempNum;
394                         in >> tempNum; m->gobble(in); numSeqs += tempNum;
395                         in >> tempNum; m->gobble(in);
396                         
397                         if (position.size() < tempNum) { position.resize(tempNum, 0); }
398                         if (averageQ.size() < tempNum) { averageQ.resize(tempNum, 0); }
399                         if (scores.size() < tempNum) { 
400                                 scores.resize(tempNum); 
401                                 for (int i = 0; i < scores.size(); i++) { scores[i].resize(41, 0); }
402                         }
403                         
404                         for (int k = 0; k < tempNum; k++)                       {               in >> temp; position[k] += temp;                        }               m->gobble(in);
405                         for (int k = 0; k < tempNum; k++)                       {               in >> temp; averageQ[k] += temp;                }               m->gobble(in);
406                         for (int k = 0; k < tempNum; k++)                       {       
407                                 for (int j = 0; j < 41; j++) {
408                                         in >> temp; scores[k][j] += temp;
409                                         m->gobble(in);
410                                 }       
411                         }
412                         
413                         in.close();
414                         m->mothurRemove(tempFilename);
415                 }
416                 
417 #else
418                 //////////////////////////////////////////////////////////////////////////////////////////////////////
419                 //Windows version shared memory, so be careful when passing variables through the seqSumQualData struct. 
420                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
421                 //Taking advantage of shared memory to pass results vectors.
422                 //////////////////////////////////////////////////////////////////////////////////////////////////////
423                 
424                 vector<seqSumQualData*> pDataArray; 
425                 DWORD   dwThreadIdArray[processors];
426                 HANDLE  hThreadArray[processors]; 
427                 
428         bool hasNameMap = false;
429         if ((namefile !="") || (countfile != "")) { hasNameMap = true; }
430         
431                 //Create processor worker threads.
432                 for( int i=0; i<processors; i++ ){
433                         
434                         // Allocate memory for thread data.
435                         seqSumQualData* tempSum = new seqSumQualData(filename, m, lines[i].start, lines[i].end, hasNameMap, nameMap);
436                         pDataArray.push_back(tempSum);
437                         processIDS.push_back(i);
438         
439                         hThreadArray[i] = CreateThread(NULL, 0, MySeqSumQualThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
440                 }
441                 
442                 //Wait until all threads have terminated.
443                 WaitForMultipleObjects(processors, hThreadArray, TRUE, INFINITE);
444                 
445                 //Close all thread handles and free memory allocations.
446                 for(int i=0; i < pDataArray.size(); i++){
447                         numSeqs += pDataArray[i]->count;
448             int tempNum = pDataArray[i]->position.size();
449             if (position.size() < tempNum) { position.resize(tempNum, 0); }
450                         if (averageQ.size() < tempNum) { averageQ.resize(tempNum, 0); }
451                         if (scores.size() < tempNum) { 
452                                 scores.resize(tempNum); 
453                                 for (int i = 0; i < scores.size(); i++) { scores[i].resize(41, 0); }
454                         }
455             
456             for (int k = 0; k < tempNum; k++)                   {                position[k]    +=  pDataArray[i]->position[k];         }               
457                         for (int k = 0; k < tempNum; k++)                       {                averageQ[k]    +=  pDataArray[i]->averageQ[k];         }               
458                         for (int k = 0; k < tempNum; k++)                       {       for (int j = 0; j < 41; j++) {  scores[k][j] += pDataArray[i]->scores[k][j];   }        }
459
460                         CloseHandle(hThreadArray[i]);
461                         delete pDataArray[i];
462                 }
463 #endif          
464                 return numSeqs;
465         }
466         catch(exception& e) {
467                 m->errorOut(e, "SummaryQualCommand", "createProcessesCreateSummary");
468                 exit(1);
469         }
470 }
471 /**************************************************************************************************/
472 int SummaryQualCommand::printQual(string sumFile, vector<int>& position, vector<int>& averageQ, vector< vector<int> >& scores) {
473         try {
474                 ofstream out;
475                 m->openOutputFile(sumFile, out);
476                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
477                 outputNames.push_back(sumFile); outputTypes["summary"].push_back(sumFile);
478                 
479                 //print headings
480                 out << "Position\tnumSeqs\tAverageQ\t";
481                 for (int i = 0; i < 41; i++) { out << "q" << i << '\t'; }
482                 out << endl;
483                 
484                 for (int i = 0; i < position.size(); i++) {
485                         
486                         if (m->control_pressed) { out.close(); return 0; }
487                         
488                         double average = averageQ[i] / (float) position[i];
489                         out << i << '\t' << position[i] << '\t' << average << '\t';
490                         
491                         for (int j = 0; j < 41; j++) {
492                                 out << scores[i][j] << '\t';
493                         }
494                         out << endl;
495                 }
496                 
497                 out.close();
498                 
499                 return 0;
500         }
501         catch(exception& e) {
502                 m->errorOut(e, "SummaryQualCommand", "printQual");
503                 exit(1);
504         }
505 }
506
507 /**************************************************************************************/
508
509