]> git.donarmstrong.com Git - mothur.git/blob - trimflowscommand.h
update .gitignore
[mothur.git] / trimflowscommand.h
1 #ifndef TRIMFLOWSCOMMAND_H
2 #define TRIMFLOWSCOMMAND_H
3
4 /*
5  *  trimflowscommand.h
6  *  Mothur
7  *
8  *  Created by Pat Schloss on 12/22/10.
9  *  Copyright 2010 Schloss Lab. All rights reserved.
10  *
11  */
12
13 #include "mothur.h"
14 #include "command.hpp"
15 #include "sequence.hpp"
16 #include "flowdata.h"
17 #include "groupmap.h"
18 #include "trimoligos.h"
19 #include "oligos.h"
20
21 class TrimFlowsCommand : public Command {
22 public:
23         TrimFlowsCommand(string);
24         TrimFlowsCommand();
25         ~TrimFlowsCommand() {}
26         
27         vector<string> setParameters();
28         string getCommandName()                 { return "trim.flows";  }
29         string getCommandCategory()             { return "Sequence Processing";         }
30         
31         string getHelpString(); 
32     string getOutputPattern(string);    
33         string getCitation() { return "http://www.mothur.org/wiki/Trim.flows"; }
34         string getDescription()         { return "trim.flows"; }
35
36         
37         int execute(); 
38         void help() { m->mothurOut(getHelpString()); }  
39         
40 private:
41         bool abort;
42
43         struct linePair {
44                 unsigned long long start;
45                 unsigned long long end;
46                 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
47         };
48         int comboStarts;
49         vector<int> processIDS;   //processid
50         vector<linePair*> lines;
51     vector<string> outputNames;
52         set<string> filesToRemove;
53     bool allFiles;
54         int processors;
55         int numFPrimers, numRPrimers, numBarcodes;
56         int maxFlows, minFlows, minLength, maxLength, maxHomoP, tdiffs, bdiffs, pdiffs, sdiffs, ldiffs, numLinkers, numSpacers;
57         int numFlows;
58         float signal, noise;
59         bool fasta, pairedOligos, reorient;
60         string flowOrder, flowFileName, oligoFileName, outputDir;
61     Oligos oligos;
62
63
64         vector<unsigned long long> getFlowFileBreaks();
65         int createProcessesCreateTrim(string, string, string, string, vector<vector<string> >); 
66         int driverCreateTrim(string, string, string, string, vector<vector<string> >, linePair*);
67         int getOligos(vector<vector<string> >&);                //a rewrite of what is in trimseqscommand.h
68         
69         
70         
71 };
72
73 /**************************************************************************************************
74 //custom data structure for threads to use.
75 // This is passed by void pointer so it can be any data type
76 // that can be passed using a single void pointer (LPVOID).
77 struct trimFlowData {
78         string flowFileName; 
79         string trimFlowFileName; 
80         string scrapFlowFileName;
81         string fastaFileName;
82         string flowOrder;
83         vector<vector<string> > barcodePrimerComboFileNames;
84         map<string, int> barcodes;
85         map<string, int> primers;
86         vector<string> revPrimer;
87         bool fasta, allFiles;
88         unsigned long long start;
89         unsigned long long end;
90         MothurOut* m;
91         float signal, noise;
92         int numFlows, maxFlows, minFlows, maxHomoP, tdiffs, bdiffs, pdiffs, threadID, count;
93         
94         trimFlowData(){}
95         trimFlowData(string ff, string tf, string sf, string f, string fo, vector<vector<string> > bfn, map<string, int> bar, map<string, int> pri, vector<string> rev, bool fa, bool al, unsigned long long st, unsigned long long en, MothurOut* mout, float sig, float n, int numF, int maxF, int minF, int maxH, int td, int bd, int pd, int tid) {
96                 flowFileName = ff;
97                 trimFlowFileName = tf;
98                 scrapFlowFileName = sf;
99                 fastaFileName = f;
100                 flowOrder = fo;
101                 barcodePrimerComboFileNames = bfn;
102                 barcodes = bar;
103                 primers = pri;
104                 revPrimer = rev;
105                 fasta = fa;
106                 allFiles = al;
107                 start = st;
108                 end = en;
109                 m = mout;
110                 signal = sig;
111                 noise = n;
112                 numFlows = numF;
113                 maxFlows = maxF;
114                 minFlows = minF;
115                 maxHomoP = maxH;
116                 tdiffs = td;
117                 bdiffs = bd;
118                 pdiffs = pd;
119                 threadID = tid;
120         }
121 };
122
123 /**************************************************************************************************
124 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
125 #else
126 static DWORD WINAPI MyTrimFlowThreadFunction(LPVOID lpParam){ 
127         trimFlowData* pDataArray;
128         pDataArray = (trimFlowData*)lpParam;
129         
130         try {
131                 ofstream trimFlowFile;
132                 pDataArray->m->openOutputFile(pDataArray->trimFlowFileName, trimFlowFile);
133                 trimFlowFile.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint);
134                 
135                 ofstream scrapFlowFile;
136                 pDataArray->m->openOutputFile(pDataArray->scrapFlowFileName, scrapFlowFile);
137                 scrapFlowFile.setf(ios::fixed, ios::floatfield); scrapFlowFile.setf(ios::showpoint);
138                 
139                 ofstream fastaFile;
140                 if(pDataArray->fasta){  pDataArray->m->openOutputFile(pDataArray->fastaFileName, fastaFile);    }
141                 
142                 ifstream flowFile;
143                 pDataArray->m->openInputFile(pDataArray->flowFileName, flowFile);
144                 
145                 flowFile.seekg(pDataArray->start);
146                 
147                 if(pDataArray->start == 0){
148                         flowFile >> pDataArray->numFlows; pDataArray->m->gobble(flowFile);
149                         scrapFlowFile << pDataArray->maxFlows << endl;
150                         trimFlowFile << pDataArray->maxFlows << endl;
151                         if(pDataArray->allFiles){
152                                 for(int i=0;i<pDataArray->barcodePrimerComboFileNames.size();i++){
153                                         for(int j=0;j<pDataArray->barcodePrimerComboFileNames[0].size();j++){
154                                                 ofstream temp;
155                                                 pDataArray->m->openOutputFile(pDataArray->barcodePrimerComboFileNames[i][j], temp);
156                                                 temp << pDataArray->maxFlows << endl;
157                                                 temp.close();
158                                         }
159                                 }                       
160                         }
161                 }
162                 
163                 FlowData flowData(pDataArray->numFlows, pDataArray->signal, pDataArray->noise, pDataArray->maxHomoP, pDataArray->flowOrder);
164                 cout << " thread flowdata address " <<  &flowData  << '\t' << &flowFile << endl;
165                 TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer);
166                 
167                 pDataArray->count = pDataArray->end;
168                 cout << pDataArray->threadID << '\t' << pDataArray->count << endl;
169                 int count = 0;
170                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
171                         
172                         if (pDataArray->m->control_pressed) {  break; }
173                         cout << pDataArray->threadID << '\t' << count << endl;
174                         int success = 1;
175                         int currentSeqDiffs = 0;
176                         string trashCode = "";
177                                 
178                         flowData.getNext(flowFile);
179                         cout << "thread good bit " << flowFile.good() << endl;
180                         flowData.capFlows(pDataArray->maxFlows);        
181                         
182                         Sequence currSeq = flowData.getSequence();
183                         if(!flowData.hasMinFlows(pDataArray->minFlows)){        //screen to see if sequence is of a minimum number of flows
184                                 success = 0;
185                                 trashCode += 'l';
186                         }
187                         
188                         int primerIndex = 0;
189                         int barcodeIndex = 0;
190                         
191                         if(pDataArray->barcodes.size() != 0){
192                                 success = trimOligos.stripBarcode(currSeq, barcodeIndex);
193                                 if(success > pDataArray->bdiffs)                {       trashCode += 'b';       }
194                                 else{ currentSeqDiffs += success;  }
195                         }
196                         
197                         if(pDataArray->primers.size() != 0){
198                                 success = trimOligos.stripForward(currSeq, primerIndex);
199                                 if(success > pDataArray->pdiffs)                {       trashCode += 'f';       }
200                                 else{ currentSeqDiffs += success;  }
201                         }
202                         
203                         if (currentSeqDiffs > pDataArray->tdiffs)       {       trashCode += 't';   }
204                         
205                         if(pDataArray->revPrimer.size() != 0){
206                                 success = trimOligos.stripReverse(currSeq);
207                                 if(!success)                            {       trashCode += 'r';       }
208                         }
209                         
210                         if(trashCode.length() == 0){
211                                 
212                                 flowData.printFlows(trimFlowFile);
213                                 
214                                 if(pDataArray->fasta)   {       currSeq.setAligned(currSeq.getUnaligned()); currSeq.printSequence(fastaFile);   }
215                                 
216                                 if(pDataArray->allFiles){
217                                         ofstream output;
218                                         pDataArray->m->openOutputFileAppend(pDataArray->barcodePrimerComboFileNames[barcodeIndex][primerIndex], output);
219                                         output.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint);
220                                         
221                                         flowData.printFlows(output);
222                                         output.close();
223                                 }                               
224                         }
225                         else{
226                                 flowData.printFlows(scrapFlowFile, trashCode);
227                         }
228                         
229                         count++;
230                                 cout << pDataArray->threadID << '\t' << currSeq.getName() << endl;              
231                         //report progress
232                         if((count) % 10000 == 0){       pDataArray->m->mothurOut(toString(count)); pDataArray->m->mothurOutEndLine();           }
233                         
234                 }
235                 //report progress
236                 if((count) % 10000 != 0){       pDataArray->m->mothurOut(toString(count)); pDataArray->m->mothurOutEndLine();           }
237                 
238                 trimFlowFile.close();
239                 scrapFlowFile.close();
240                 flowFile.close();
241                 if(pDataArray->fasta){  fastaFile.close();      }
242                 
243         }
244         catch(exception& e) {
245                 pDataArray->m->errorOut(e, "TrimFlowsCommand", "MyTrimFlowsThreadFunction");
246                 exit(1);
247         }
248
249 #endif
250 */
251
252 #endif