1 #ifndef COuNTSEQSCOMMAND_H
2 #define COuNTSEQSCOMMAND_H
8 * Created by westcott on 6/1/11.
9 * Copyright 2011 Schloss Lab. All rights reserved.
13 #include "command.hpp"
15 #include "sharedrabundvector.h"
17 class CountSeqsCommand : public Command {
21 CountSeqsCommand(string);
25 vector<string> setParameters();
26 string getCommandName() { return "count.seqs"; }
27 string getCommandCategory() { return "Sequence Processing"; }
29 string getHelpString();
30 string getOutputPattern(string);
31 string getCitation() { return "http://www.mothur.org/wiki/Count.seqs"; }
32 string getDescription() { return "makes a count file from a names or shared file"; }
35 void help() { m->mothurOut(getHelpString()); }
41 unsigned long long start;
42 unsigned long long end;
43 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
46 string namefile, groupfile, outputDir, groups, sharedfile;
47 bool abort, large, allLines;
48 vector<string> Groups, outputNames;
52 int processSmall(string);
53 int processLarge(string);
54 map<int, string> processNameFile(string);
55 map<int, string> getGroupNames(string, set<string>&);
57 int createProcesses(GroupMap*&, string);
58 int driver(unsigned long long, unsigned long long, string, GroupMap*&);
59 int processShared(vector<SharedRAbundVector*>& lookup, map<string, string> variables);
64 /***********************************************************************/
66 unsigned long long start;
67 unsigned long long end;
69 string outputFileName, namefile, groupfile;
72 vector<string> Groups;
75 countData(string fn, GroupMap* g, MothurOut* mout, unsigned long long st, unsigned long long en, string gfn, string nfn, vector<string> gr) {
88 /**************************************************************************************************/
89 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
91 static DWORD WINAPI MyCountThreadFunction(LPVOID lpParam){
92 countData* pDataArray;
93 pDataArray = (countData*)lpParam;
96 pDataArray->m->openOutputFile(pDataArray->outputFileName, out);
99 pDataArray->m->openInputFile(pDataArray->namefile, in);
100 in.seekg(pDataArray->start);
102 //print header if you are process 0
103 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
105 }else { //this accounts for the difference in line endings.
106 in.seekg(pDataArray->start-1); pDataArray->m->gobble(in);
109 pDataArray->total = 0;
110 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
112 if (pDataArray->m->control_pressed) { break; }
114 string firstCol, secondCol;
115 in >> firstCol; pDataArray->m->gobble(in); in >> secondCol; pDataArray->m->gobble(in);
116 //cout << firstCol << '\t' << secondCol << endl;
117 pDataArray->m->checkName(firstCol);
118 pDataArray->m->checkName(secondCol);
120 vector<string> names;
121 pDataArray->m->splitAtChar(secondCol, names, ',');
123 if (pDataArray->groupfile != "") {
125 map<string, int> groupCounts;
127 for (int i = 0; i < pDataArray->Groups.size(); i++) { groupCounts[pDataArray->Groups[i]] = 0; }
129 //get counts for each of the users groups
130 for (int i = 0; i < names.size(); i++) {
131 string group = pDataArray->groupMap->getGroup(names[i]);
133 if (group == "not found") { pDataArray->m->mothurOut("[ERROR]: " + names[i] + " is not in your groupfile, please correct."); pDataArray->m->mothurOutEndLine(); }
135 map<string, int>::iterator it = groupCounts.find(group);
137 //if not found, then this sequence is not from a group we care about
138 if (it != groupCounts.end()) {
146 out << firstCol << '\t' << total << '\t';
147 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
148 out << it->second << '\t';
153 out << firstCol << '\t' << names.size() << endl;
156 pDataArray->total += names.size();
164 catch(exception& e) {
165 pDataArray->m->errorOut(e, "CountSeqsCommand", "MyCountThreadFunction");