]> git.donarmstrong.com Git - mothur.git/blob - countseqscommand.h
update .gitignore
[mothur.git] / countseqscommand.h
1 #ifndef COuNTSEQSCOMMAND_H
2 #define COuNTSEQSCOMMAND_H
3
4 /*
5  *  countseqscommand.h
6  *  Mothur
7  *
8  *  Created by westcott on 6/1/11.
9  *  Copyright 2011 Schloss Lab. All rights reserved.
10  *
11  */
12
13 #include "command.hpp"
14 #include "groupmap.h"
15 #include "sharedrabundvector.h"
16
17 class CountSeqsCommand : public Command {
18         
19 public:
20         
21         CountSeqsCommand(string);
22         CountSeqsCommand();     
23         ~CountSeqsCommand(){}
24         
25         vector<string> setParameters();
26         string getCommandName()                 { return "count.seqs";                          }
27         string getCommandCategory()             { return "Sequence Processing";         }
28         
29         string getHelpString(); 
30     string getOutputPattern(string);    
31         string getCitation() { return "http://www.mothur.org/wiki/Count.seqs"; }
32         string getDescription()         { return "makes a count file from a names or shared file"; }
33
34         int execute(); 
35         void help() { m->mothurOut(getHelpString()); }  
36         
37         
38 private:
39     
40     struct linePair {
41                 unsigned long long start;
42                 unsigned long long end;
43                 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
44         };
45     
46         string namefile, groupfile, outputDir, groups, sharedfile;
47         bool abort, large, allLines;
48         vector<string> Groups, outputNames;
49     int processors;
50     set<string> labels;
51     
52     int processSmall(string);
53     int processLarge(string);
54     map<int, string> processNameFile(string);
55     map<int, string> getGroupNames(string, set<string>&);
56     
57     int createProcesses(GroupMap*&, string);
58     int driver(unsigned long long, unsigned long long, string, GroupMap*&);
59     int processShared(vector<SharedRAbundVector*>& lookup, map<string, string> variables);
60
61     
62 };
63
64 /***********************************************************************/
65 struct countData {
66     unsigned long long start;
67         unsigned long long end;
68         MothurOut* m;
69     string outputFileName, namefile, groupfile;
70     GroupMap* groupMap;
71     int total;
72     vector<string> Groups;
73     
74         countData(){}
75         countData(string fn, GroupMap* g, MothurOut* mout, unsigned long long st, unsigned long long en, string gfn, string nfn, vector<string> gr) {
76         m = mout;
77                 start = st;
78                 end = en;
79         groupMap = g;
80         groupfile = gfn;
81         namefile = nfn;
82         outputFileName = fn;
83         Groups = gr;
84         total = 0;
85         }
86 };
87
88 /**************************************************************************************************/
89 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
90 #else
91 static DWORD WINAPI MyCountThreadFunction(LPVOID lpParam){
92         countData* pDataArray;
93         pDataArray = (countData*)lpParam;
94         try {
95         ofstream out;
96         pDataArray->m->openOutputFile(pDataArray->outputFileName, out);
97         
98         ifstream in;
99                 pDataArray->m->openInputFile(pDataArray->namefile, in);
100                 in.seekg(pDataArray->start);
101         
102         //print header if you are process 0
103                 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
104                         in.seekg(0);
105                 }else { //this accounts for the difference in line endings.
106                         in.seekg(pDataArray->start-1); pDataArray->m->gobble(in);
107                 }
108         
109         pDataArray->total = 0;
110                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
111             
112                         if (pDataArray->m->control_pressed) { break; }
113                         
114                         string firstCol, secondCol;
115                         in >> firstCol; pDataArray->m->gobble(in); in >> secondCol; pDataArray->m->gobble(in);
116             //cout << firstCol << '\t' << secondCol << endl;
117             pDataArray->m->checkName(firstCol);
118             pDataArray->m->checkName(secondCol);
119             
120                         vector<string> names;
121                         pDataArray->m->splitAtChar(secondCol, names, ',');
122                         
123                         if (pDataArray->groupfile != "") {
124                                 //set to 0
125                                 map<string, int> groupCounts;
126                                 int total = 0;
127                                 for (int i = 0; i < pDataArray->Groups.size(); i++) { groupCounts[pDataArray->Groups[i]] = 0; }
128                                 
129                                 //get counts for each of the users groups
130                                 for (int i = 0; i < names.size(); i++) {
131                                         string group = pDataArray->groupMap->getGroup(names[i]);
132                                         
133                                         if (group == "not found") { pDataArray->m->mothurOut("[ERROR]: " + names[i] + " is not in your groupfile, please correct."); pDataArray->m->mothurOutEndLine(); }
134                                         else {
135                                                 map<string, int>::iterator it = groupCounts.find(group);
136                                                 
137                                                 //if not found, then this sequence is not from a group we care about
138                                                 if (it != groupCounts.end()) {
139                                                         it->second++;
140                                                         total++;
141                                                 }
142                                         }
143                                 }
144                                 
145                                 if (total != 0) {
146                                         out << firstCol << '\t' << total << '\t';
147                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
148                                                 out << it->second << '\t';
149                                         }
150                                         out << endl;
151                                 }
152                         }else {
153                                 out << firstCol << '\t' << names.size() << endl;
154                         }
155                         
156                         pDataArray->total += names.size();
157                 }
158                 in.close();
159         out.close();
160
161                 
162         return 0;
163     }
164         catch(exception& e) {
165                 pDataArray->m->errorOut(e, "CountSeqsCommand", "MyCountThreadFunction");
166                 exit(1);
167         }
168 }
169 #endif
170
171
172
173 #endif
174
175