X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=seqsummarycommand.h;h=552f4a08bee284bf914e6a50cd51176864b8b1d8;hp=01dd450194c2f454ddd1b6c44de3fda99cde0798;hb=a8e2df1b96a57f5f29576b08361b86a96a8eff4f;hpb=faf4e99552d6fb4595ff348b1f909ddc74536da2 diff --git a/seqsummarycommand.h b/seqsummarycommand.h index 01dd450..552f4a0 100644 --- a/seqsummarycommand.h +++ b/seqsummarycommand.h @@ -12,19 +12,161 @@ #include "mothur.h" #include "command.hpp" -#include "globaldata.hpp" - +#include "sequence.hpp" +/**************************************************************************************************/ class SeqSummaryCommand : public Command { public: + SeqSummaryCommand(string); SeqSummaryCommand(); - ~SeqSummaryCommand(); - int execute(); + ~SeqSummaryCommand(){} + + vector setParameters(); + string getCommandName() { return "summary.seqs"; } + string getCommandCategory() { return "Sequence Processing"; } + + string getHelpString(); + string getOutputPattern(string); + string getCitation() { return "http://www.mothur.org/wiki/Summary.seqs"; } + string getDescription() { return "summarize the quality of sequences in an unaligned or aligned fasta file"; } + int execute(); + void help() { m->mothurOut(getHelpString()); } private: - GlobalData* globaldata; + bool abort; + string fastafile, outputDir, namefile, countfile; + int processors; + vector outputNames; + map nameMap; + + struct linePair { + unsigned long long start; + unsigned long long end; + linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {} + }; + + vector lines; + vector processIDS; + + int createProcessesCreateSummary(vector&, vector&, vector&, vector&, vector&, string, string); + int driverCreateSummary(vector&, vector&, vector&, vector&, vector&, string, string, linePair*); + #ifdef USE_MPI + int MPICreateSummary(int, int, vector&, vector&, vector&, vector&, vector&, MPI_File&, MPI_File&, vector&); + #endif + + +}; + +/**************************************************************************************************/ +//custom data structure for threads to use. +// This is passed by void pointer so it can be any data type +// that can be passed using a single void pointer (LPVOID). +struct seqSumData { + vector startPosition; + vector endPosition; + vector seqLength; + vector ambigBases; + vector longHomoPolymer; + string filename; + string sumFile; + unsigned long long start; + unsigned long long end; + int count; + MothurOut* m; + bool hasNameMap; + map nameMap; + + + seqSumData(){} + seqSumData(string f, string sf, MothurOut* mout, unsigned long long st, unsigned long long en, bool na, map nam) { + filename = f; + sumFile = sf; + m = mout; + start = st; + end = en; + hasNameMap = na; + nameMap = nam; + count = 0; + } }; +/**************************************************************************************************/ +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) +#else +static DWORD WINAPI MySeqSumThreadFunction(LPVOID lpParam){ + seqSumData* pDataArray; + pDataArray = (seqSumData*)lpParam; + + try { + ofstream outSummary; + pDataArray->m->openOutputFile(pDataArray->sumFile, outSummary); + + ifstream in; + pDataArray->m->openInputFile(pDataArray->filename, in); + + //print header if you are process 0 + if ((pDataArray->start == 0) || (pDataArray->start == 1)) { + outSummary << "seqname\tstart\tend\tnbases\tambigs\tpolymer\tnumSeqs" << endl; + in.seekg(0); + }else { //this accounts for the difference in line endings. + in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); + } + + for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process + + pDataArray->count++; + + if (pDataArray->m->control_pressed) { in.close(); outSummary.close(); pDataArray->count = 1; return 1; } + + Sequence current(in); pDataArray->m->gobble(in); + + if (current.getName() != "") { + + int num = 1; + if (pDataArray->hasNameMap){ + //make sure this sequence is in the namefile, else error + map::iterator it = pDataArray->nameMap.find(current.getName()); + + if (it == pDataArray->nameMap.end()) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " is not in your name or count file, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; } + else { num = it->second; } + } + + //for each sequence this sequence represents + for (int i = 0; i < num; i++) { + pDataArray->startPosition.push_back(current.getStartPos()); + pDataArray->endPosition.push_back(current.getEndPos()); + pDataArray->seqLength.push_back(current.getNumBases()); + pDataArray->ambigBases.push_back(current.getAmbigBases()); + pDataArray->longHomoPolymer.push_back(current.getLongHomoPolymer()); + } + + outSummary << current.getName() << '\t'; + outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t'; + outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t'; + outSummary << current.getLongHomoPolymer() << '\t' << num << endl; + } + } + + in.close(); + outSummary.close(); + + return 0; + + } + catch(exception& e) { + pDataArray->m->errorOut(e, "SeqSummaryCommand", "MySeqSumThreadFunction"); + exit(1); + } +} #endif + + + + +#endif + +/**************************************************************************************************/ + +