]> git.donarmstrong.com Git - mothur.git/blob - seqsummarycommand.h
Merge remote-tracking branch 'mothur/master'
[mothur.git] / seqsummarycommand.h
1 #ifndef SEQSUMMARYCOMMAND_H
2 #define SEQSUMMARYCOMMAND_H
3
4 /*
5  *  seqcoordcommand.h
6  *  Mothur
7  *
8  *  Created by Pat Schloss on 5/30/09.
9  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
10  *
11  */
12
13 #include "mothur.h"
14 #include "command.hpp"
15 #include "sequence.hpp"
16
17 /**************************************************************************************************/
18
19 class SeqSummaryCommand : public Command {
20 public:
21         SeqSummaryCommand(string);
22         SeqSummaryCommand();
23         ~SeqSummaryCommand(){}
24         
25         vector<string> setParameters();
26         string getCommandName()                 { return "summary.seqs";                        }
27         string getCommandCategory()             { return "Sequence Processing";         }
28         string getOutputFileNameTag(string, string);
29         string getHelpString(); 
30         string getCitation() { return "http://www.mothur.org/wiki/Summary.seqs"; }
31         string getDescription()         { return "summarize the quality of sequences in an unaligned or aligned fasta file"; }
32         
33         int execute(); 
34         void help() { m->mothurOut(getHelpString()); }          
35 private:
36         bool abort;
37         string fastafile, outputDir, namefile, countfile;
38         int processors;
39         vector<string> outputNames;
40         map<string, int> nameMap;
41         
42         struct linePair {
43                 unsigned long long start;
44                 unsigned long long end;
45                 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
46         };
47
48         vector<linePair*> lines;
49         vector<int> processIDS;
50         
51         int createProcessesCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string, string);
52         int driverCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string, string, linePair*);       
53
54         #ifdef USE_MPI
55         int MPICreateSummary(int, int, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, MPI_File&, MPI_File&, vector<unsigned long long>&);        
56         #endif
57
58
59 };
60
61 /**************************************************************************************************/
62 //custom data structure for threads to use.
63 // This is passed by void pointer so it can be any data type
64 // that can be passed using a single void pointer (LPVOID).
65 struct seqSumData {
66         vector<int> startPosition;
67         vector<int> endPosition;
68         vector<int> seqLength; 
69         vector<int> ambigBases; 
70         vector<int> longHomoPolymer; 
71         string filename; 
72         string sumFile; 
73         unsigned long long start;
74         unsigned long long end;
75         int count;
76         MothurOut* m;
77         bool hasNameMap;
78         map<string, int> nameMap;
79         
80         
81         seqSumData(){}
82         seqSumData(string f, string sf, MothurOut* mout, unsigned long long st, unsigned long long en, bool na, map<string, int> nam) {
83                 filename = f;
84                 sumFile = sf;
85                 m = mout;
86                 start = st;
87                 end = en;
88                 hasNameMap = na;
89                 nameMap = nam;
90                 count = 0;
91         }
92 };
93
94 /**************************************************************************************************/
95 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
96 #else
97 static DWORD WINAPI MySeqSumThreadFunction(LPVOID lpParam){ 
98         seqSumData* pDataArray;
99         pDataArray = (seqSumData*)lpParam;
100         
101         try {
102                 ofstream outSummary;
103                 pDataArray->m->openOutputFile(pDataArray->sumFile, outSummary);
104                 
105                 ifstream in;
106                 pDataArray->m->openInputFile(pDataArray->filename, in);
107
108                 //print header if you are process 0
109                 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
110                         outSummary << "seqname\tstart\tend\tnbases\tambigs\tpolymer\tnumSeqs" << endl;  
111                         in.seekg(0);
112                 }else { //this accounts for the difference in line endings. 
113                         in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); 
114                 }
115                 
116                 pDataArray->count = pDataArray->end;
117                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
118                         
119                         if (pDataArray->m->control_pressed) { in.close(); outSummary.close(); pDataArray->count = 1; return 1; }
120                         
121                         Sequence current(in); pDataArray->m->gobble(in); 
122                         
123                         if (current.getName() != "") {
124                                 
125                                 int num = 1;
126                                 if (pDataArray->hasNameMap){
127                                         //make sure this sequence is in the namefile, else error 
128                                         map<string, int>::iterator it = pDataArray->nameMap.find(current.getName());
129                                         
130                                         if (it == pDataArray->nameMap.end()) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " is not in your name or count file, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; }
131                                         else { num = it->second; }
132                                 }
133                                 
134                                 //for each sequence this sequence represents
135                                 for (int i = 0; i < num; i++) {
136                                         pDataArray->startPosition.push_back(current.getStartPos());
137                                         pDataArray->endPosition.push_back(current.getEndPos());
138                                         pDataArray->seqLength.push_back(current.getNumBases());
139                                         pDataArray->ambigBases.push_back(current.getAmbigBases());
140                                         pDataArray->longHomoPolymer.push_back(current.getLongHomoPolymer());
141                                 }
142                                 
143                                 outSummary << current.getName() << '\t';
144                                 outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
145                                 outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
146                                 outSummary << current.getLongHomoPolymer() << '\t' << num << endl;
147                         }
148                 }
149                 
150                 in.close();
151                 outSummary.close();
152                 
153                 return 0;
154                 
155         }
156         catch(exception& e) {
157                 pDataArray->m->errorOut(e, "SeqSummaryCommand", "MySeqSumThreadFunction");
158                 exit(1);
159         }
160
161 #endif
162
163
164
165
166 #endif
167
168 /**************************************************************************************************/
169
170