1 #ifndef TRIMFLOWSCOMMAND_H
2 #define TRIMFLOWSCOMMAND_H
8 * Created by Pat Schloss on 12/22/10.
9 * Copyright 2010 Schloss Lab. All rights reserved.
14 #include "command.hpp"
15 #include "sequence.hpp"
18 #include "trimoligos.h"
20 class TrimFlowsCommand : public Command {
22 TrimFlowsCommand(string);
24 ~TrimFlowsCommand() {}
26 vector<string> setParameters();
27 string getCommandName() { return "trim.flows"; }
28 string getCommandCategory() { return "Sequence Processing"; }
30 string getHelpString();
31 string getOutputPattern(string);
32 string getCitation() { return "http://www.mothur.org/wiki/Trim.flows"; }
33 string getDescription() { return "trim.flows"; }
37 void help() { m->mothurOut(getHelpString()); }
43 unsigned long long start;
44 unsigned long long end;
45 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
48 vector<int> processIDS; //processid
49 vector<linePair*> lines;
51 vector<unsigned long long> getFlowFileBreaks();
52 int createProcessesCreateTrim(string, string, string, string, vector<vector<string> >);
53 int driverCreateTrim(string, string, string, string, vector<vector<string> >, linePair*);
54 string reverseOligo(string);
56 vector<string> outputNames;
57 set<string> filesToRemove;
59 void getOligos(vector<vector<string> >&); //a rewrite of what is in trimseqscommand.h
63 int numFPrimers, numRPrimers;
64 int maxFlows, minFlows, minLength, maxLength, maxHomoP, tdiffs, bdiffs, pdiffs, sdiffs, ldiffs, numLinkers, numSpacers;
70 string flowFileName, oligoFileName, outputDir;
72 map<string, int> barcodes;
73 map<string, int> primers;
74 vector<string> revPrimer;
75 vector<string> linker;
76 vector<string> spacer;
78 vector<string> primerNameVector; //needed here?
79 vector<string> barcodeNameVector; //needed here?
81 map<string, int> combos; //needed here?
82 map<string, int> groupToIndex; //needed here?
86 /**************************************************************************************************/
87 //custom data structure for threads to use.
88 // This is passed by void pointer so it can be any data type
89 // that can be passed using a single void pointer (LPVOID).
92 string trimFlowFileName;
93 string scrapFlowFileName;
96 vector<vector<string> > barcodePrimerComboFileNames;
97 map<string, int> barcodes;
98 map<string, int> primers;
99 vector<string> revPrimer;
100 bool fasta, allFiles;
101 unsigned long long start;
102 unsigned long long end;
105 int numFlows, maxFlows, minFlows, maxHomoP, tdiffs, bdiffs, pdiffs, threadID, count;
108 trimFlowData(string ff, string tf, string sf, string f, string fo, vector<vector<string> > bfn, map<string, int> bar, map<string, int> pri, vector<string> rev, bool fa, bool al, unsigned long long st, unsigned long long en, MothurOut* mout, float sig, float n, int numF, int maxF, int minF, int maxH, int td, int bd, int pd, int tid) {
110 trimFlowFileName = tf;
111 scrapFlowFileName = sf;
114 barcodePrimerComboFileNames = bfn;
136 /**************************************************************************************************/
137 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
139 static DWORD WINAPI MyTrimFlowThreadFunction(LPVOID lpParam){
140 trimFlowData* pDataArray;
141 pDataArray = (trimFlowData*)lpParam;
144 ofstream trimFlowFile;
145 pDataArray->m->openOutputFile(pDataArray->trimFlowFileName, trimFlowFile);
146 trimFlowFile.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint);
148 ofstream scrapFlowFile;
149 pDataArray->m->openOutputFile(pDataArray->scrapFlowFileName, scrapFlowFile);
150 scrapFlowFile.setf(ios::fixed, ios::floatfield); scrapFlowFile.setf(ios::showpoint);
153 if(pDataArray->fasta){ pDataArray->m->openOutputFile(pDataArray->fastaFileName, fastaFile); }
156 pDataArray->m->openInputFile(pDataArray->flowFileName, flowFile);
158 flowFile.seekg(pDataArray->start);
160 if(pDataArray->start == 0){
161 flowFile >> pDataArray->numFlows; pDataArray->m->gobble(flowFile);
162 scrapFlowFile << pDataArray->maxFlows << endl;
163 trimFlowFile << pDataArray->maxFlows << endl;
164 if(pDataArray->allFiles){
165 for(int i=0;i<pDataArray->barcodePrimerComboFileNames.size();i++){
166 for(int j=0;j<pDataArray->barcodePrimerComboFileNames[0].size();j++){
168 pDataArray->m->openOutputFile(pDataArray->barcodePrimerComboFileNames[i][j], temp);
169 temp << pDataArray->maxFlows << endl;
176 FlowData flowData(pDataArray->numFlows, pDataArray->signal, pDataArray->noise, pDataArray->maxHomoP, pDataArray->flowOrder);
177 cout << " thread flowdata address " << &flowData << '\t' << &flowFile << endl;
178 TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer);
180 pDataArray->count = pDataArray->end;
181 cout << pDataArray->threadID << '\t' << pDataArray->count << endl;
183 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
185 if (pDataArray->m->control_pressed) { break; }
186 cout << pDataArray->threadID << '\t' << count << endl;
188 int currentSeqDiffs = 0;
189 string trashCode = "";
191 flowData.getNext(flowFile);
192 cout << "thread good bit " << flowFile.good() << endl;
193 flowData.capFlows(pDataArray->maxFlows);
195 Sequence currSeq = flowData.getSequence();
196 if(!flowData.hasMinFlows(pDataArray->minFlows)){ //screen to see if sequence is of a minimum number of flows
202 int barcodeIndex = 0;
204 if(pDataArray->barcodes.size() != 0){
205 success = trimOligos.stripBarcode(currSeq, barcodeIndex);
206 if(success > pDataArray->bdiffs) { trashCode += 'b'; }
207 else{ currentSeqDiffs += success; }
210 if(pDataArray->primers.size() != 0){
211 success = trimOligos.stripForward(currSeq, primerIndex);
212 if(success > pDataArray->pdiffs) { trashCode += 'f'; }
213 else{ currentSeqDiffs += success; }
216 if (currentSeqDiffs > pDataArray->tdiffs) { trashCode += 't'; }
218 if(pDataArray->revPrimer.size() != 0){
219 success = trimOligos.stripReverse(currSeq);
220 if(!success) { trashCode += 'r'; }
223 if(trashCode.length() == 0){
225 flowData.printFlows(trimFlowFile);
227 if(pDataArray->fasta) { currSeq.setAligned(currSeq.getUnaligned()); currSeq.printSequence(fastaFile); }
229 if(pDataArray->allFiles){
231 pDataArray->m->openOutputFileAppend(pDataArray->barcodePrimerComboFileNames[barcodeIndex][primerIndex], output);
232 output.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint);
234 flowData.printFlows(output);
239 flowData.printFlows(scrapFlowFile, trashCode);
243 cout << pDataArray->threadID << '\t' << currSeq.getName() << endl;
245 if((count) % 10000 == 0){ pDataArray->m->mothurOut(toString(count)); pDataArray->m->mothurOutEndLine(); }
249 if((count) % 10000 != 0){ pDataArray->m->mothurOut(toString(count)); pDataArray->m->mothurOutEndLine(); }
251 trimFlowFile.close();
252 scrapFlowFile.close();
254 if(pDataArray->fasta){ fastaFile.close(); }
257 catch(exception& e) {
258 pDataArray->m->errorOut(e, "TrimFlowsCommand", "MyTrimFlowsThreadFunction");