]> git.donarmstrong.com Git - mothur.git/blob - countseqscommand.h
changed random forest output filename
[mothur.git] / countseqscommand.h
1 #ifndef COuNTSEQSCOMMAND_H
2 #define COuNTSEQSCOMMAND_H
3
4 /*
5  *  countseqscommand.h
6  *  Mothur
7  *
8  *  Created by westcott on 6/1/11.
9  *  Copyright 2011 Schloss Lab. All rights reserved.
10  *
11  */
12
13 #include "command.hpp"
14 #include "groupmap.h"
15
16 class CountSeqsCommand : public Command {
17         
18 public:
19         
20         CountSeqsCommand(string);
21         CountSeqsCommand();     
22         ~CountSeqsCommand(){}
23         
24         vector<string> setParameters();
25         string getCommandName()                 { return "count.seqs";                          }
26         string getCommandCategory()             { return "Sequence Processing";         }
27         
28         string getHelpString(); 
29     string getOutputPattern(string);    
30         string getCitation() { return "http://www.mothur.org/wiki/Count.seqs"; }
31         string getDescription()         { return "counts the number of sequences represented by each unique sequence in a namesfile"; }
32
33         int execute(); 
34         void help() { m->mothurOut(getHelpString()); }  
35         
36         
37 private:
38     
39     struct linePair {
40                 unsigned long long start;
41                 unsigned long long end;
42                 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
43         };
44     
45         string namefile, groupfile, outputDir, groups;
46         bool abort, large;
47         vector<string> Groups, outputNames;
48     int processors;
49     
50     int processSmall(string);
51     int processLarge(string);
52     map<int, string> processNameFile(string);
53     map<int, string> getGroupNames(string, set<string>&);
54     
55     int createProcesses(GroupMap*&, string);
56     int driver(unsigned long long, unsigned long long, string, GroupMap*&);
57     
58 };
59
60 /***********************************************************************/
61 struct countData {
62     unsigned long long start;
63         unsigned long long end;
64         MothurOut* m;
65     string outputFileName, namefile, groupfile;
66     GroupMap* groupMap;
67     int total;
68     vector<string> Groups;
69     
70         countData(){}
71         countData(string fn, GroupMap* g, MothurOut* mout, unsigned long long st, unsigned long long en, string gfn, string nfn, vector<string> gr) {
72         m = mout;
73                 start = st;
74                 end = en;
75         groupMap = g;
76         groupfile = gfn;
77         namefile = nfn;
78         outputFileName = fn;
79         Groups = gr;
80         total = 0;
81         }
82 };
83
84 /**************************************************************************************************/
85 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
86 #else
87 static DWORD WINAPI MyCountThreadFunction(LPVOID lpParam){
88         countData* pDataArray;
89         pDataArray = (countData*)lpParam;
90         try {
91         ofstream out;
92         pDataArray->m->openOutputFile(pDataArray->outputFileName, out);
93         
94         ifstream in;
95                 pDataArray->m->openInputFile(pDataArray->namefile, in);
96                 in.seekg(pDataArray->start);
97         
98         //print header if you are process 0
99                 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
100                         in.seekg(0);
101                 }else { //this accounts for the difference in line endings.
102                         in.seekg(pDataArray->start-1); pDataArray->m->gobble(in);
103                 }
104         
105         pDataArray->total = 0;
106                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
107             
108                         if (pDataArray->m->control_pressed) { break; }
109                         
110                         string firstCol, secondCol;
111                         in >> firstCol; pDataArray->m->gobble(in); in >> secondCol; pDataArray->m->gobble(in);
112             //cout << firstCol << '\t' << secondCol << endl;
113             pDataArray->m->checkName(firstCol);
114             pDataArray->m->checkName(secondCol);
115             
116                         vector<string> names;
117                         pDataArray->m->splitAtChar(secondCol, names, ',');
118                         
119                         if (pDataArray->groupfile != "") {
120                                 //set to 0
121                                 map<string, int> groupCounts;
122                                 int total = 0;
123                                 for (int i = 0; i < pDataArray->Groups.size(); i++) { groupCounts[pDataArray->Groups[i]] = 0; }
124                                 
125                                 //get counts for each of the users groups
126                                 for (int i = 0; i < names.size(); i++) {
127                                         string group = pDataArray->groupMap->getGroup(names[i]);
128                                         
129                                         if (group == "not found") { pDataArray->m->mothurOut("[ERROR]: " + names[i] + " is not in your groupfile, please correct."); pDataArray->m->mothurOutEndLine(); }
130                                         else {
131                                                 map<string, int>::iterator it = groupCounts.find(group);
132                                                 
133                                                 //if not found, then this sequence is not from a group we care about
134                                                 if (it != groupCounts.end()) {
135                                                         it->second++;
136                                                         total++;
137                                                 }
138                                         }
139                                 }
140                                 
141                                 if (total != 0) {
142                                         out << firstCol << '\t' << total << '\t';
143                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
144                                                 out << it->second << '\t';
145                                         }
146                                         out << endl;
147                                 }
148                         }else {
149                                 out << firstCol << '\t' << names.size() << endl;
150                         }
151                         
152                         pDataArray->total += names.size();
153                 }
154                 in.close();
155         out.close();
156
157                 
158         return 0;
159     }
160         catch(exception& e) {
161                 pDataArray->m->errorOut(e, "CountSeqsCommand", "MyCountThreadFunction");
162                 exit(1);
163         }
164 }
165 #endif
166
167
168
169 #endif
170
171