]> git.donarmstrong.com Git - mothur.git/blob - trimflowscommand.h
8656fd0b5b314084c6fd08cf614288aca2bbc763
[mothur.git] / trimflowscommand.h
1 #ifndef TRIMFLOWSCOMMAND_H
2 #define TRIMFLOWSCOMMAND_H
3
4 /*
5  *  trimflowscommand.h
6  *  Mothur
7  *
8  *  Created by Pat Schloss on 12/22/10.
9  *  Copyright 2010 Schloss Lab. All rights reserved.
10  *
11  */
12
13 #include "mothur.h"
14 #include "command.hpp"
15 #include "sequence.hpp"
16 #include "flowdata.h"
17 #include "groupmap.h"
18 #include "trimoligos.h"
19
20 class TrimFlowsCommand : public Command {
21 public:
22         TrimFlowsCommand(string);
23         TrimFlowsCommand();
24         ~TrimFlowsCommand() {}
25         
26         vector<string> setParameters();
27         string getCommandName()                 { return "trim.flows";  }
28         string getCommandCategory()             { return "Sequence Processing";         }
29         string getHelpString(); 
30         string getCitation() { return "http://www.mothur.org/wiki/Trim.flows"; }
31         string getDescription()         { return "trim.flows"; }
32
33         
34         int execute(); 
35         void help() { m->mothurOut(getHelpString()); }  
36         
37 private:
38         bool abort;
39
40         struct linePair {
41                 unsigned long long start;
42                 unsigned long long end;
43                 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
44         };
45         int comboStarts;
46         vector<int> processIDS;   //processid
47         vector<linePair*> lines;
48
49         vector<unsigned long long> getFlowFileBreaks();
50         int createProcessesCreateTrim(string, string, string, string, vector<vector<string> >); 
51         int driverCreateTrim(string, string, string, string, vector<vector<string> >, linePair*);
52
53         vector<string> outputNames;
54         set<string> filesToRemove;
55         
56         void getOligos(vector<vector<string> >&);               //a rewrite of what is in trimseqscommand.h
57         
58         bool allFiles;
59         int processors;
60         int numFPrimers, numRPrimers;
61     int numLinkers, numSpacers;
62
63     int maxFlows, minFlows, minLength, maxLength, maxHomoP, tdiffs, bdiffs, pdiffs;
64         int numFlows;
65         float signal, noise;
66         bool fasta;
67         string flowOrder;       
68         
69         string flowFileName, oligoFileName, outputDir;
70
71         map<string, int> barcodes;
72         map<string, int> primers;
73         vector<string> revPrimer;
74
75         vector<string> primerNameVector;        //needed here?
76         vector<string> barcodeNameVector;       //needed here?
77
78         map<string, int> combos;                        //needed here?
79         map<string, int> groupToIndex;          //needed here?
80         
81 };
82
83 /**************************************************************************************************/
84 //custom data structure for threads to use.
85 // This is passed by void pointer so it can be any data type
86 // that can be passed using a single void pointer (LPVOID).
87 struct trimFlowData {
88         string flowFileName; 
89         string trimFlowFileName; 
90         string scrapFlowFileName;
91         string fastaFileName;
92         string flowOrder;
93         vector<vector<string> > barcodePrimerComboFileNames;
94         map<string, int> barcodes;
95         map<string, int> primers;
96         vector<string> revPrimer;
97         bool fasta, allFiles;
98         unsigned long long start;
99         unsigned long long end;
100         MothurOut* m;
101         float signal, noise;
102         int numFlows, maxFlows, minFlows, maxHomoP, tdiffs, bdiffs, pdiffs, threadID, count;
103         
104         trimFlowData(){}
105         trimFlowData(string ff, string tf, string sf, string f, string fo, vector<vector<string> > bfn, map<string, int> bar, map<string, int> pri, vector<string> rev, bool fa, bool al, unsigned long long st, unsigned long long en, MothurOut* mout, float sig, float n, int numF, int maxF, int minF, int maxH, int td, int bd, int pd, int tid) {
106                 flowFileName = ff;
107                 trimFlowFileName = tf;
108                 scrapFlowFileName = sf;
109                 fastaFileName = f;
110                 flowOrder = fo;
111                 barcodePrimerComboFileNames = bfn;
112                 barcodes = bar;
113                 primers = pri;
114                 revPrimer = rev;
115                 fasta = fa;
116                 allFiles = al;
117                 start = st;
118                 end = en;
119                 m = mout;
120                 signal = sig;
121                 noise = n;
122                 numFlows = numF;
123                 maxFlows = maxF;
124                 minFlows = minF;
125                 maxHomoP = maxH;
126                 tdiffs = td;
127                 bdiffs = bd;
128                 pdiffs = pd;
129                 threadID = tid;
130         }
131 };
132
133 /**************************************************************************************************/
134 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
135 #else
136 static DWORD WINAPI MyTrimFlowThreadFunction(LPVOID lpParam){ 
137         trimFlowData* pDataArray;
138         pDataArray = (trimFlowData*)lpParam;
139         
140         try {
141                 ofstream trimFlowFile;
142                 pDataArray->m->openOutputFile(pDataArray->trimFlowFileName, trimFlowFile);
143                 trimFlowFile.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint);
144                 
145                 ofstream scrapFlowFile;
146                 pDataArray->m->openOutputFile(pDataArray->scrapFlowFileName, scrapFlowFile);
147                 scrapFlowFile.setf(ios::fixed, ios::floatfield); scrapFlowFile.setf(ios::showpoint);
148                 
149                 ofstream fastaFile;
150                 if(pDataArray->fasta){  pDataArray->m->openOutputFile(pDataArray->fastaFileName, fastaFile);    }
151                 
152                 ifstream flowFile;
153                 pDataArray->m->openInputFile(pDataArray->flowFileName, flowFile);
154                 
155                 flowFile.seekg(pDataArray->start);
156                 
157                 if(pDataArray->start == 0){
158                         flowFile >> pDataArray->numFlows; pDataArray->m->gobble(flowFile);
159                         scrapFlowFile << pDataArray->maxFlows << endl;
160                         trimFlowFile << pDataArray->maxFlows << endl;
161                         if(pDataArray->allFiles){
162                                 for(int i=0;i<pDataArray->barcodePrimerComboFileNames.size();i++){
163                                         for(int j=0;j<pDataArray->barcodePrimerComboFileNames[0].size();j++){
164                                                 ofstream temp;
165                                                 pDataArray->m->openOutputFile(pDataArray->barcodePrimerComboFileNames[i][j], temp);
166                                                 temp << pDataArray->maxFlows << endl;
167                                                 temp.close();
168                                         }
169                                 }                       
170                         }
171                 }
172                 
173                 FlowData flowData(pDataArray->numFlows, pDataArray->signal, pDataArray->noise, pDataArray->maxHomoP, pDataArray->flowOrder);
174                 cout << " thread flowdata address " <<  &flowData  << '\t' << &flowFile << endl;
175                 TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer);
176                 
177                 pDataArray->count = pDataArray->end;
178                 cout << pDataArray->threadID << '\t' << pDataArray->count << endl;
179                 int count = 0;
180                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
181                         
182                         if (pDataArray->m->control_pressed) {  break; }
183                         cout << pDataArray->threadID << '\t' << count << endl;
184                         int success = 1;
185                         int currentSeqDiffs = 0;
186                         string trashCode = "";
187                                 
188                         flowData.getNext(flowFile);
189                         cout << "thread good bit " << flowFile.good() << endl;
190                         flowData.capFlows(pDataArray->maxFlows);        
191                         
192                         Sequence currSeq = flowData.getSequence();
193                         if(!flowData.hasMinFlows(pDataArray->minFlows)){        //screen to see if sequence is of a minimum number of flows
194                                 success = 0;
195                                 trashCode += 'l';
196                         }
197                         
198                         int primerIndex = 0;
199                         int barcodeIndex = 0;
200                         
201                         if(pDataArray->barcodes.size() != 0){
202                                 success = trimOligos.stripBarcode(currSeq, barcodeIndex);
203                                 if(success > pDataArray->bdiffs)                {       trashCode += 'b';       }
204                                 else{ currentSeqDiffs += success;  }
205                         }
206                         
207                         if(pDataArray->primers.size() != 0){
208                                 success = trimOligos.stripForward(currSeq, primerIndex);
209                                 if(success > pDataArray->pdiffs)                {       trashCode += 'f';       }
210                                 else{ currentSeqDiffs += success;  }
211                         }
212                         
213                         if (currentSeqDiffs > pDataArray->tdiffs)       {       trashCode += 't';   }
214                         
215                         if(pDataArray->revPrimer.size() != 0){
216                                 success = trimOligos.stripReverse(currSeq);
217                                 if(!success)                            {       trashCode += 'r';       }
218                         }
219                         
220                         if(trashCode.length() == 0){
221                                 
222                                 flowData.printFlows(trimFlowFile);
223                                 
224                                 if(pDataArray->fasta)   {       currSeq.printSequence(fastaFile);       }
225                                 
226                                 if(pDataArray->allFiles){
227                                         ofstream output;
228                                         pDataArray->m->openOutputFileAppend(pDataArray->barcodePrimerComboFileNames[barcodeIndex][primerIndex], output);
229                                         output.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint);
230                                         
231                                         flowData.printFlows(output);
232                                         output.close();
233                                 }                               
234                         }
235                         else{
236                                 flowData.printFlows(scrapFlowFile, trashCode);
237                         }
238                         
239                         count++;
240                                 cout << pDataArray->threadID << '\t' << currSeq.getName() << endl;              
241                         //report progress
242                         if((count) % 10000 == 0){       pDataArray->m->mothurOut(toString(count)); pDataArray->m->mothurOutEndLine();           }
243                         
244                 }
245                 //report progress
246                 if((count) % 10000 != 0){       pDataArray->m->mothurOut(toString(count)); pDataArray->m->mothurOutEndLine();           }
247                 
248                 trimFlowFile.close();
249                 scrapFlowFile.close();
250                 flowFile.close();
251                 if(pDataArray->fasta){  fastaFile.close();      }
252                 
253         }
254         catch(exception& e) {
255                 pDataArray->m->errorOut(e, "TrimFlowsCommand", "MyTrimFlowsThreadFunction");
256                 exit(1);
257         }
258
259 #endif
260
261
262 #endif