1 #ifndef SEQSUMMARYCOMMAND_H
2 #define SEQSUMMARYCOMMAND_H
8 * Created by Pat Schloss on 5/30/09.
9 * Copyright 2009 Patrick D. Schloss. All rights reserved.
14 #include "command.hpp"
15 #include "sequence.hpp"
17 /**************************************************************************************************/
19 class SeqSummaryCommand : public Command {
21 SeqSummaryCommand(string);
23 ~SeqSummaryCommand(){}
25 vector<string> setParameters();
26 string getCommandName() { return "summary.seqs"; }
27 string getCommandCategory() { return "Sequence Processing"; }
28 string getHelpString();
29 string getCitation() { return "http://www.mothur.org/wiki/Summary.seqs"; }
30 string getDescription() { return "summarize the quality of sequences in an unaligned or aligned fasta file"; }
33 void help() { m->mothurOut(getHelpString()); }
36 string fastafile, outputDir, namefile;
38 vector<string> outputNames;
39 map<string, int> nameMap;
42 unsigned long long start;
43 unsigned long long end;
44 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
47 vector<linePair*> lines;
48 vector<int> processIDS;
50 int createProcessesCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string, string);
51 int driverCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string, string, linePair*);
54 int MPICreateSummary(int, int, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, MPI_File&, MPI_File&, vector<unsigned long long>&);
60 /**************************************************************************************************/
61 //custom data structure for threads to use.
62 // This is passed by void pointer so it can be any data type
63 // that can be passed using a single void pointer (LPVOID).
65 vector<int>* startPosition;
66 vector<int>* endPosition;
67 vector<int>* seqLength;
68 vector<int>* ambigBases;
69 vector<int>* longHomoPolymer;
72 unsigned long long start;
73 unsigned long long end;
77 map<string, int> nameMap;
81 seqSumData(vector<int>* s, vector<int>* e, vector<int>* l, vector<int>* a, vector<int>* h, string f, string sf, MothurOut* mout, unsigned long long st, unsigned long long en, string na, map<string, int> nam) {
98 /**************************************************************************************************/
99 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
101 static DWORD WINAPI MySeqSumThreadFunction(LPVOID lpParam){
102 seqSumData* pDataArray;
103 pDataArray = (seqSumData*)lpParam;
107 pDataArray->m->openOutputFile(pDataArray->sumFile, outSummary);
110 pDataArray->m->openInputFile(pDataArray->filename, in);
112 //print header if you are process 0
113 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
114 outSummary << "seqname\tstart\tend\tnbases\tambigs\tpolymer\tnumSeqs" << endl;
116 }else { //this accounts for the difference in line endings.
117 in.seekg(pDataArray->start-1); pDataArray->m->gobble(in);
120 pDataArray->count = pDataArray->end;
121 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
123 if (pDataArray->m->control_pressed) { in.close(); outSummary.close(); pDataArray->count = 1; return 1; }
125 Sequence current(in); pDataArray->m->gobble(in);
127 if (current.getName() != "") {
130 if (pDataArray->namefile != "") {
131 //make sure this sequence is in the namefile, else error
132 map<string, int>::iterator it = pDataArray->nameMap.find(current.getName());
134 if (it == pDataArray->nameMap.end()) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; }
135 else { num = it->second; }
138 //for each sequence this sequence represents
139 for (int i = 0; i < num; i++) {
140 pDataArray->startPosition->push_back(current.getStartPos());
141 pDataArray->endPosition->push_back(current.getEndPos());
142 pDataArray->seqLength->push_back(current.getNumBases());
143 pDataArray->ambigBases->push_back(current.getAmbigBases());
144 pDataArray->longHomoPolymer->push_back(current.getLongHomoPolymer());
147 outSummary << current.getName() << '\t';
148 outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
149 outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
150 outSummary << current.getLongHomoPolymer() << '\t' << num << endl;
160 catch(exception& e) {
161 pDataArray->m->errorOut(e, "SeqSummaryCommand", "MySeqSumThreadFunction");
172 /**************************************************************************************************/