]> git.donarmstrong.com Git - mothur.git/blob - seqsummarycommand.h
Added summary.qual command. Added fontsize parameter to heatmap.sim and venn commands
[mothur.git] / seqsummarycommand.h
1 #ifndef SEQSUMMARYCOMMAND_H
2 #define SEQSUMMARYCOMMAND_H
3
4 /*
5  *  seqcoordcommand.h
6  *  Mothur
7  *
8  *  Created by Pat Schloss on 5/30/09.
9  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
10  *
11  */
12
13 #include "mothur.h"
14 #include "command.hpp"
15 #include "sequence.hpp"
16
17 /**************************************************************************************************/
18
19 class SeqSummaryCommand : public Command {
20 public:
21         SeqSummaryCommand(string);
22         SeqSummaryCommand();
23         ~SeqSummaryCommand(){}
24         
25         vector<string> setParameters();
26         string getCommandName()                 { return "summary.seqs";                        }
27         string getCommandCategory()             { return "Sequence Processing";         }
28         string getHelpString(); 
29         string getCitation() { return "http://www.mothur.org/wiki/Summary.seqs"; }
30         string getDescription()         { return "summarize the quality of sequences in an unaligned or aligned fasta file"; }
31         
32         int execute(); 
33         void help() { m->mothurOut(getHelpString()); }          
34 private:
35         bool abort;
36         string fastafile, outputDir, namefile;
37         int processors;
38         vector<string> outputNames;
39         map<string, int> nameMap;
40         
41         struct linePair {
42                 unsigned long long start;
43                 unsigned long long end;
44                 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
45         };
46
47         vector<linePair*> lines;
48         vector<int> processIDS;
49         
50         int createProcessesCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string, string);
51         int driverCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string, string, linePair*);       
52
53         #ifdef USE_MPI
54         int MPICreateSummary(int, int, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, MPI_File&, MPI_File&, vector<unsigned long long>&);        
55         #endif
56
57
58 };
59
60 /**************************************************************************************************/
61 //custom data structure for threads to use.
62 // This is passed by void pointer so it can be any data type
63 // that can be passed using a single void pointer (LPVOID).
64 struct seqSumData {
65         vector<int>* startPosition;
66         vector<int>* endPosition;
67         vector<int>* seqLength; 
68         vector<int>* ambigBases; 
69         vector<int>* longHomoPolymer; 
70         string filename; 
71         string sumFile; 
72         unsigned long long start;
73         unsigned long long end;
74         int count;
75         MothurOut* m;
76         string namefile;
77         map<string, int> nameMap;
78         
79         
80         seqSumData(){}
81         seqSumData(vector<int>* s, vector<int>* e, vector<int>* l, vector<int>* a, vector<int>* h, string f, string sf, MothurOut* mout, unsigned long long st, unsigned long long en, string na, map<string, int> nam) {
82                 startPosition = s;
83                 endPosition = e;
84                 seqLength = l;
85                 ambigBases = a;
86                 longHomoPolymer = h;
87                 filename = f;
88                 sumFile = sf;
89                 m = mout;
90                 start = st;
91                 end = en;
92                 namefile = na;
93                 nameMap = nam;
94                 count = 0;
95         }
96 };
97
98 /**************************************************************************************************/
99 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
100 #else
101 static DWORD WINAPI MySeqSumThreadFunction(LPVOID lpParam){ 
102         seqSumData* pDataArray;
103         pDataArray = (seqSumData*)lpParam;
104         
105         try {
106                 ofstream outSummary;
107                 pDataArray->m->openOutputFile(pDataArray->sumFile, outSummary);
108                 
109                 ifstream in;
110                 pDataArray->m->openInputFile(pDataArray->filename, in);
111
112                 //print header if you are process 0
113                 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
114                         outSummary << "seqname\tstart\tend\tnbases\tambigs\tpolymer\tnumSeqs" << endl;  
115                         in.seekg(0);
116                 }else { //this accounts for the difference in line endings. 
117                         in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); 
118                 }
119                 
120                 pDataArray->count = pDataArray->end;
121                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
122                         
123                         if (pDataArray->m->control_pressed) { in.close(); outSummary.close(); pDataArray->count = 1; return 1; }
124                         
125                         Sequence current(in); pDataArray->m->gobble(in); 
126                         
127                         if (current.getName() != "") {
128                                 
129                                 int num = 1;
130                                 if (pDataArray->namefile != "") {
131                                         //make sure this sequence is in the namefile, else error 
132                                         map<string, int>::iterator it = pDataArray->nameMap.find(current.getName());
133                                         
134                                         if (it == pDataArray->nameMap.end()) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; }
135                                         else { num = it->second; }
136                                 }
137                                 
138                                 //for each sequence this sequence represents
139                                 for (int i = 0; i < num; i++) {
140                                         pDataArray->startPosition->push_back(current.getStartPos());
141                                         pDataArray->endPosition->push_back(current.getEndPos());
142                                         pDataArray->seqLength->push_back(current.getNumBases());
143                                         pDataArray->ambigBases->push_back(current.getAmbigBases());
144                                         pDataArray->longHomoPolymer->push_back(current.getLongHomoPolymer());
145                                 }
146                                 
147                                 outSummary << current.getName() << '\t';
148                                 outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
149                                 outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
150                                 outSummary << current.getLongHomoPolymer() << '\t' << num << endl;
151                         }
152                 }
153                 
154                 in.close();
155                 outSummary.close();
156                 
157                 return 0;
158                 
159         }
160         catch(exception& e) {
161                 pDataArray->m->errorOut(e, "SeqSummaryCommand", "MySeqSumThreadFunction");
162                 exit(1);
163         }
164
165 #endif
166
167
168
169
170 #endif
171
172 /**************************************************************************************************/
173
174