1 #ifndef COuNTSEQSCOMMAND_H
2 #define COuNTSEQSCOMMAND_H
8 * Created by westcott on 6/1/11.
9 * Copyright 2011 Schloss Lab. All rights reserved.
13 #include "command.hpp"
16 class CountSeqsCommand : public Command {
20 CountSeqsCommand(string);
24 vector<string> setParameters();
25 string getCommandName() { return "count.seqs"; }
26 string getCommandCategory() { return "Sequence Processing"; }
28 string getHelpString();
29 string getOutputPattern(string);
30 string getCitation() { return "http://www.mothur.org/wiki/Count.seqs"; }
31 string getDescription() { return "counts the number of sequences represented by each unique sequence in a namesfile"; }
34 void help() { m->mothurOut(getHelpString()); }
40 unsigned long long start;
41 unsigned long long end;
42 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
45 string namefile, groupfile, outputDir, groups;
47 vector<string> Groups, outputNames;
50 int processSmall(string);
51 int processLarge(string);
52 map<int, string> processNameFile(string);
53 map<int, string> getGroupNames(string, set<string>&);
55 int createProcesses(GroupMap*&, string);
56 int driver(unsigned long long, unsigned long long, string, GroupMap*&);
60 /***********************************************************************/
62 unsigned long long start;
63 unsigned long long end;
65 string outputFileName, namefile, groupfile;
68 vector<string> Groups;
71 countData(string fn, GroupMap* g, MothurOut* mout, unsigned long long st, unsigned long long en, string gfn, string nfn, vector<string> gr) {
84 /**************************************************************************************************/
85 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
87 static DWORD WINAPI MyCountThreadFunction(LPVOID lpParam){
88 countData* pDataArray;
89 pDataArray = (countData*)lpParam;
92 pDataArray->m->openOutputFile(pDataArray->outputFileName, out);
95 pDataArray->m->openInputFile(pDataArray->namefile, in);
96 in.seekg(pDataArray->start);
98 //print header if you are process 0
99 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
101 }else { //this accounts for the difference in line endings.
102 in.seekg(pDataArray->start-1); pDataArray->m->gobble(in);
105 pDataArray->total = 0;
106 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
108 if (pDataArray->m->control_pressed) { break; }
110 string firstCol, secondCol;
111 in >> firstCol; pDataArray->m->gobble(in); in >> secondCol; pDataArray->m->gobble(in);
112 //cout << firstCol << '\t' << secondCol << endl;
113 pDataArray->m->checkName(firstCol);
114 pDataArray->m->checkName(secondCol);
116 vector<string> names;
117 pDataArray->m->splitAtChar(secondCol, names, ',');
119 if (pDataArray->groupfile != "") {
121 map<string, int> groupCounts;
123 for (int i = 0; i < pDataArray->Groups.size(); i++) { groupCounts[pDataArray->Groups[i]] = 0; }
125 //get counts for each of the users groups
126 for (int i = 0; i < names.size(); i++) {
127 string group = pDataArray->groupMap->getGroup(names[i]);
129 if (group == "not found") { pDataArray->m->mothurOut("[ERROR]: " + names[i] + " is not in your groupfile, please correct."); pDataArray->m->mothurOutEndLine(); }
131 map<string, int>::iterator it = groupCounts.find(group);
133 //if not found, then this sequence is not from a group we care about
134 if (it != groupCounts.end()) {
142 out << firstCol << '\t' << total << '\t';
143 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
144 out << it->second << '\t';
149 out << firstCol << '\t' << names.size() << endl;
152 pDataArray->total += names.size();
160 catch(exception& e) {
161 pDataArray->m->errorOut(e, "CountSeqsCommand", "MyCountThreadFunction");