]> git.donarmstrong.com Git - mothur.git/blob - trimflowscommand.h
changed random forest output filename
[mothur.git] / trimflowscommand.h
1 #ifndef TRIMFLOWSCOMMAND_H
2 #define TRIMFLOWSCOMMAND_H
3
4 /*
5  *  trimflowscommand.h
6  *  Mothur
7  *
8  *  Created by Pat Schloss on 12/22/10.
9  *  Copyright 2010 Schloss Lab. All rights reserved.
10  *
11  */
12
13 #include "mothur.h"
14 #include "command.hpp"
15 #include "sequence.hpp"
16 #include "flowdata.h"
17 #include "groupmap.h"
18 #include "trimoligos.h"
19
20 class TrimFlowsCommand : public Command {
21 public:
22         TrimFlowsCommand(string);
23         TrimFlowsCommand();
24         ~TrimFlowsCommand() {}
25         
26         vector<string> setParameters();
27         string getCommandName()                 { return "trim.flows";  }
28         string getCommandCategory()             { return "Sequence Processing";         }
29         
30         string getHelpString(); 
31     string getOutputPattern(string);    
32         string getCitation() { return "http://www.mothur.org/wiki/Trim.flows"; }
33         string getDescription()         { return "trim.flows"; }
34
35         
36         int execute(); 
37         void help() { m->mothurOut(getHelpString()); }  
38         
39 private:
40         bool abort;
41
42         struct linePair {
43                 unsigned long long start;
44                 unsigned long long end;
45                 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
46         };
47         int comboStarts;
48         vector<int> processIDS;   //processid
49         vector<linePair*> lines;
50
51         vector<unsigned long long> getFlowFileBreaks();
52         int createProcessesCreateTrim(string, string, string, string, vector<vector<string> >); 
53         int driverCreateTrim(string, string, string, string, vector<vector<string> >, linePair*);
54     string reverseOligo(string);
55     
56         vector<string> outputNames;
57         set<string> filesToRemove;
58         
59         void getOligos(vector<vector<string> >&);               //a rewrite of what is in trimseqscommand.h
60         
61         bool allFiles;
62         int processors;
63         int numFPrimers, numRPrimers;
64         int maxFlows, minFlows, minLength, maxLength, maxHomoP, tdiffs, bdiffs, pdiffs, sdiffs, ldiffs, numLinkers, numSpacers;
65         int numFlows;
66         float signal, noise;
67         bool fasta;
68         string flowOrder;       
69         
70         string flowFileName, oligoFileName, outputDir;
71
72         map<string, int> barcodes;
73         map<string, int> primers;
74         vector<string> revPrimer;
75     vector<string> linker;
76     vector<string> spacer;
77
78         vector<string> primerNameVector;        //needed here?
79         vector<string> barcodeNameVector;       //needed here?
80
81         map<string, int> combos;                        //needed here?
82         map<string, int> groupToIndex;          //needed here?
83         
84 };
85
86 /**************************************************************************************************/
87 //custom data structure for threads to use.
88 // This is passed by void pointer so it can be any data type
89 // that can be passed using a single void pointer (LPVOID).
90 struct trimFlowData {
91         string flowFileName; 
92         string trimFlowFileName; 
93         string scrapFlowFileName;
94         string fastaFileName;
95         string flowOrder;
96         vector<vector<string> > barcodePrimerComboFileNames;
97         map<string, int> barcodes;
98         map<string, int> primers;
99         vector<string> revPrimer;
100         bool fasta, allFiles;
101         unsigned long long start;
102         unsigned long long end;
103         MothurOut* m;
104         float signal, noise;
105         int numFlows, maxFlows, minFlows, maxHomoP, tdiffs, bdiffs, pdiffs, threadID, count;
106         
107         trimFlowData(){}
108         trimFlowData(string ff, string tf, string sf, string f, string fo, vector<vector<string> > bfn, map<string, int> bar, map<string, int> pri, vector<string> rev, bool fa, bool al, unsigned long long st, unsigned long long en, MothurOut* mout, float sig, float n, int numF, int maxF, int minF, int maxH, int td, int bd, int pd, int tid) {
109                 flowFileName = ff;
110                 trimFlowFileName = tf;
111                 scrapFlowFileName = sf;
112                 fastaFileName = f;
113                 flowOrder = fo;
114                 barcodePrimerComboFileNames = bfn;
115                 barcodes = bar;
116                 primers = pri;
117                 revPrimer = rev;
118                 fasta = fa;
119                 allFiles = al;
120                 start = st;
121                 end = en;
122                 m = mout;
123                 signal = sig;
124                 noise = n;
125                 numFlows = numF;
126                 maxFlows = maxF;
127                 minFlows = minF;
128                 maxHomoP = maxH;
129                 tdiffs = td;
130                 bdiffs = bd;
131                 pdiffs = pd;
132                 threadID = tid;
133         }
134 };
135
136 /**************************************************************************************************/
137 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
138 #else
139 static DWORD WINAPI MyTrimFlowThreadFunction(LPVOID lpParam){ 
140         trimFlowData* pDataArray;
141         pDataArray = (trimFlowData*)lpParam;
142         
143         try {
144                 ofstream trimFlowFile;
145                 pDataArray->m->openOutputFile(pDataArray->trimFlowFileName, trimFlowFile);
146                 trimFlowFile.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint);
147                 
148                 ofstream scrapFlowFile;
149                 pDataArray->m->openOutputFile(pDataArray->scrapFlowFileName, scrapFlowFile);
150                 scrapFlowFile.setf(ios::fixed, ios::floatfield); scrapFlowFile.setf(ios::showpoint);
151                 
152                 ofstream fastaFile;
153                 if(pDataArray->fasta){  pDataArray->m->openOutputFile(pDataArray->fastaFileName, fastaFile);    }
154                 
155                 ifstream flowFile;
156                 pDataArray->m->openInputFile(pDataArray->flowFileName, flowFile);
157                 
158                 flowFile.seekg(pDataArray->start);
159                 
160                 if(pDataArray->start == 0){
161                         flowFile >> pDataArray->numFlows; pDataArray->m->gobble(flowFile);
162                         scrapFlowFile << pDataArray->maxFlows << endl;
163                         trimFlowFile << pDataArray->maxFlows << endl;
164                         if(pDataArray->allFiles){
165                                 for(int i=0;i<pDataArray->barcodePrimerComboFileNames.size();i++){
166                                         for(int j=0;j<pDataArray->barcodePrimerComboFileNames[0].size();j++){
167                                                 ofstream temp;
168                                                 pDataArray->m->openOutputFile(pDataArray->barcodePrimerComboFileNames[i][j], temp);
169                                                 temp << pDataArray->maxFlows << endl;
170                                                 temp.close();
171                                         }
172                                 }                       
173                         }
174                 }
175                 
176                 FlowData flowData(pDataArray->numFlows, pDataArray->signal, pDataArray->noise, pDataArray->maxHomoP, pDataArray->flowOrder);
177                 cout << " thread flowdata address " <<  &flowData  << '\t' << &flowFile << endl;
178                 TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer);
179                 
180                 pDataArray->count = pDataArray->end;
181                 cout << pDataArray->threadID << '\t' << pDataArray->count << endl;
182                 int count = 0;
183                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
184                         
185                         if (pDataArray->m->control_pressed) {  break; }
186                         cout << pDataArray->threadID << '\t' << count << endl;
187                         int success = 1;
188                         int currentSeqDiffs = 0;
189                         string trashCode = "";
190                                 
191                         flowData.getNext(flowFile);
192                         cout << "thread good bit " << flowFile.good() << endl;
193                         flowData.capFlows(pDataArray->maxFlows);        
194                         
195                         Sequence currSeq = flowData.getSequence();
196                         if(!flowData.hasMinFlows(pDataArray->minFlows)){        //screen to see if sequence is of a minimum number of flows
197                                 success = 0;
198                                 trashCode += 'l';
199                         }
200                         
201                         int primerIndex = 0;
202                         int barcodeIndex = 0;
203                         
204                         if(pDataArray->barcodes.size() != 0){
205                                 success = trimOligos.stripBarcode(currSeq, barcodeIndex);
206                                 if(success > pDataArray->bdiffs)                {       trashCode += 'b';       }
207                                 else{ currentSeqDiffs += success;  }
208                         }
209                         
210                         if(pDataArray->primers.size() != 0){
211                                 success = trimOligos.stripForward(currSeq, primerIndex);
212                                 if(success > pDataArray->pdiffs)                {       trashCode += 'f';       }
213                                 else{ currentSeqDiffs += success;  }
214                         }
215                         
216                         if (currentSeqDiffs > pDataArray->tdiffs)       {       trashCode += 't';   }
217                         
218                         if(pDataArray->revPrimer.size() != 0){
219                                 success = trimOligos.stripReverse(currSeq);
220                                 if(!success)                            {       trashCode += 'r';       }
221                         }
222                         
223                         if(trashCode.length() == 0){
224                                 
225                                 flowData.printFlows(trimFlowFile);
226                                 
227                                 if(pDataArray->fasta)   {       currSeq.setAligned(currSeq.getUnaligned()); currSeq.printSequence(fastaFile);   }
228                                 
229                                 if(pDataArray->allFiles){
230                                         ofstream output;
231                                         pDataArray->m->openOutputFileAppend(pDataArray->barcodePrimerComboFileNames[barcodeIndex][primerIndex], output);
232                                         output.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint);
233                                         
234                                         flowData.printFlows(output);
235                                         output.close();
236                                 }                               
237                         }
238                         else{
239                                 flowData.printFlows(scrapFlowFile, trashCode);
240                         }
241                         
242                         count++;
243                                 cout << pDataArray->threadID << '\t' << currSeq.getName() << endl;              
244                         //report progress
245                         if((count) % 10000 == 0){       pDataArray->m->mothurOut(toString(count)); pDataArray->m->mothurOutEndLine();           }
246                         
247                 }
248                 //report progress
249                 if((count) % 10000 != 0){       pDataArray->m->mothurOut(toString(count)); pDataArray->m->mothurOutEndLine();           }
250                 
251                 trimFlowFile.close();
252                 scrapFlowFile.close();
253                 flowFile.close();
254                 if(pDataArray->fasta){  fastaFile.close();      }
255                 
256         }
257         catch(exception& e) {
258                 pDataArray->m->errorOut(e, "TrimFlowsCommand", "MyTrimFlowsThreadFunction");
259                 exit(1);
260         }
261
262 #endif
263
264
265 #endif