X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=countseqscommand.h;h=a5326d401e870c1878b5d8e7f4c2708bc0c48f04;hp=54982c11939e82fae8de341f74c1aa3a01e4f609;hb=615301e57c25e241356a9c2380648d117709458d;hpb=49d2b7459c5027557564b21e9487dadafbbbdc96 diff --git a/countseqscommand.h b/countseqscommand.h index 54982c1..a5326d4 100644 --- a/countseqscommand.h +++ b/countseqscommand.h @@ -11,6 +11,7 @@ */ #include "command.hpp" +#include "groupmap.h" class CountSeqsCommand : public Command { @@ -23,8 +24,9 @@ public: vector setParameters(); string getCommandName() { return "count.seqs"; } string getCommandCategory() { return "Sequence Processing"; } - string getOutputFileNameTag(string, string); + string getHelpString(); + string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Count.seqs"; } string getDescription() { return "counts the number of sequences represented by each unique sequence in a namesfile"; } @@ -33,11 +35,137 @@ public: private: + + struct linePair { + unsigned long long start; + unsigned long long end; + linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {} + }; + string namefile, groupfile, outputDir, groups; - bool abort; - vector Groups; + bool abort, large; + vector Groups, outputNames; + int processors; + + int processSmall(string); + int processLarge(string); + map processNameFile(string); + map getGroupNames(string, set&); + + int createProcesses(GroupMap*&, string); + int driver(unsigned long long, unsigned long long, string, GroupMap*&); + +}; + +/***********************************************************************/ +struct countData { + unsigned long long start; + unsigned long long end; + MothurOut* m; + string outputFileName, namefile, groupfile; + GroupMap* groupMap; + int total; + vector Groups; + + countData(){} + countData(string fn, GroupMap* g, MothurOut* mout, unsigned long long st, unsigned long long en, string gfn, string nfn, vector gr) { + m = mout; + start = st; + end = en; + groupMap = g; + groupfile = gfn; + namefile = nfn; + outputFileName = fn; + Groups = gr; + total = 0; + } }; +/**************************************************************************************************/ +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) +#else +static DWORD WINAPI MyCountThreadFunction(LPVOID lpParam){ + countData* pDataArray; + pDataArray = (countData*)lpParam; + try { + ofstream out; + pDataArray->m->openOutputFile(pDataArray->outputFileName, out); + + ifstream in; + pDataArray->m->openInputFile(pDataArray->namefile, in); + in.seekg(pDataArray->start); + + //print header if you are process 0 + if ((pDataArray->start == 0) || (pDataArray->start == 1)) { + in.seekg(0); + }else { //this accounts for the difference in line endings. + in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); + } + + pDataArray->total = 0; + for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process + + if (pDataArray->m->control_pressed) { break; } + + string firstCol, secondCol; + in >> firstCol; pDataArray->m->gobble(in); in >> secondCol; pDataArray->m->gobble(in); + //cout << firstCol << '\t' << secondCol << endl; + pDataArray->m->checkName(firstCol); + pDataArray->m->checkName(secondCol); + + vector names; + pDataArray->m->splitAtChar(secondCol, names, ','); + + if (pDataArray->groupfile != "") { + //set to 0 + map groupCounts; + int total = 0; + for (int i = 0; i < pDataArray->Groups.size(); i++) { groupCounts[pDataArray->Groups[i]] = 0; } + + //get counts for each of the users groups + for (int i = 0; i < names.size(); i++) { + string group = pDataArray->groupMap->getGroup(names[i]); + + if (group == "not found") { pDataArray->m->mothurOut("[ERROR]: " + names[i] + " is not in your groupfile, please correct."); pDataArray->m->mothurOutEndLine(); } + else { + map::iterator it = groupCounts.find(group); + + //if not found, then this sequence is not from a group we care about + if (it != groupCounts.end()) { + it->second++; + total++; + } + } + } + + if (total != 0) { + out << firstCol << '\t' << total << '\t'; + for (map::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) { + out << it->second << '\t'; + } + out << endl; + } + }else { + out << firstCol << '\t' << names.size() << endl; + } + + pDataArray->total += names.size(); + } + in.close(); + out.close(); + + + return 0; + } + catch(exception& e) { + pDataArray->m->errorOut(e, "CountSeqsCommand", "MyCountThreadFunction"); + exit(1); + } +} +#endif + + + #endif