]> git.donarmstrong.com Git - mothur.git/blob - chopseqscommand.h
Merge remote-tracking branch 'mothur/master'
[mothur.git] / chopseqscommand.h
1 #ifndef CHOPSEQSCOMMAND_H
2 #define CHOPSEQSCOMMAND_H
3
4 /*
5  *  chopseqscommand.h
6  *  Mothur
7  *
8  *  Created by westcott on 5/10/10.
9  *  Copyright 2010 Schloss Lab. All rights reserved.
10  *
11  */
12
13
14 #include "command.hpp"
15 #include "sequence.hpp"
16
17 class ChopSeqsCommand : public Command {
18         
19         public:
20         
21                 ChopSeqsCommand(string);
22                 ChopSeqsCommand();      
23                 ~ChopSeqsCommand(){};
24         
25                 vector<string> setParameters();
26                 string getCommandName()                 { return "chop.seqs";           }
27                 string getCommandCategory()             { return "Sequence Processing"; }
28                 string getOutputFileNameTag(string, string);
29         string getHelpString(); 
30                 string getCitation() { return "http://www.mothur.org/wiki/Chops.seqs"; }
31                 string getDescription()         { return "trim sequence length"; }
32         
33                 int execute(); 
34                 void help() { m->mothurOut(getHelpString()); }          
35         
36         private:
37         struct linePair {
38             unsigned long long start;
39             unsigned long long end;
40             linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
41         };
42     
43                 string fastafile, outputDir, keep;
44                 bool abort, countGaps, Short;
45                 int numbases, processors;
46                 vector<string> outputNames;
47                 
48                 string getChopped(Sequence);
49         bool driver (linePair, string, string, string);
50         bool createProcesses(vector<linePair>, string, string, string);
51 };
52
53 /**************************************************************************************************/
54 //custom data structure for threads to use.
55 // This is passed by void pointer so it can be any data type
56 // that can be passed using a single void pointer (LPVOID).
57 struct chopData {
58         string filename; 
59         string outFasta, outAccnos, keep; 
60         unsigned long long start;
61         unsigned long long end;
62         int numbases;
63     bool countGaps, Short, wroteAccnos;
64         MothurOut* m;
65         string namefile;
66         map<string, int> nameMap;
67         
68         
69         chopData(){}
70         chopData(string f, string ff, string a, MothurOut* mout, unsigned long long st, unsigned long long en, string k, bool cGaps, int nbases, bool S) {
71                 filename = f;
72                 outFasta = ff;
73         outAccnos = a;
74                 m = mout;
75                 start = st;
76                 end = en;
77         keep = k;
78         countGaps = cGaps;
79         numbases = nbases;
80         Short = S;
81                 wroteAccnos = false;
82         }
83 };
84
85 /**************************************************************************************************/
86 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
87 #else
88 static DWORD WINAPI MyChopThreadFunction(LPVOID lpParam){ 
89         chopData* pDataArray;
90         pDataArray = (chopData*)lpParam;
91         
92         try {
93         ofstream out;
94                 pDataArray->m->openOutputFile(pDataArray->outFasta, out);
95         
96         ofstream outAcc;
97                 pDataArray->m->openOutputFile(pDataArray->outAccnos, outAcc);
98         
99                 ifstream in;
100                 pDataArray->m->openInputFile(pDataArray->filename, in);
101         
102                 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
103                         in.seekg(0);
104                 }else { //this accounts for the difference in line endings. 
105                         in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); 
106                 }
107
108                 bool done = false;
109         bool wroteAccnos = false;
110                 int count = 0;
111
112                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
113                                                 
114                         if (pDataArray->m->control_pressed) {  in.close(); out.close(); outAcc.close(); pDataArray->m->mothurRemove(pDataArray->outFasta); pDataArray->m->mothurRemove(pDataArray->outAccnos); return 0;  }
115             
116             Sequence seq(in); pDataArray->m->gobble(in);
117                         
118                         if (seq.getName() != "") {
119                                 //string newSeqString = getChopped(seq);
120                 ///////////////////////////////////////////////////////////////////////
121                 string temp = seq.getAligned();
122                 string tempUnaligned = seq.getUnaligned();
123                 
124                 if (pDataArray->countGaps) {
125                     //if needed trim sequence
126                     if (pDataArray->keep == "front") {//you want to keep the beginning
127                         int tempLength = temp.length();
128                         
129                         if (tempLength > pDataArray->numbases) { //you have enough bases to remove some
130                             
131                             int stopSpot = 0;
132                             int numBasesCounted = 0;
133                             
134                             for (int i = 0; i < temp.length(); i++) {
135                                 //eliminate N's
136                                 if (toupper(temp[i]) == 'N') { temp[i] = '.'; }
137                                 
138                                 numBasesCounted++; 
139                                 
140                                 if (numBasesCounted >= pDataArray->numbases) { stopSpot = i; break; }
141                             }
142                             
143                             if (stopSpot == 0) { temp = ""; }
144                             else {  temp = temp.substr(0, stopSpot+1);  }
145                                                         
146                         }else { 
147                             if (!pDataArray->Short) { temp = ""; } //sequence too short
148                         }
149                     }else { //you are keeping the back
150                         int tempLength = temp.length();
151                         if (tempLength > pDataArray->numbases) { //you have enough bases to remove some
152                             
153                             int stopSpot = 0;
154                             int numBasesCounted = 0;
155                             
156                             for (int i = (temp.length()-1); i >= 0; i--) {
157                                 //eliminate N's
158                                 if (toupper(temp[i]) == 'N') { temp[i] = '.'; }
159                                 
160                                 numBasesCounted++; 
161                                 
162                                 if (numBasesCounted >= pDataArray->numbases) { stopSpot = i; break; }
163                             }
164                             
165                             if (stopSpot == 0) { temp = ""; }
166                             else {  temp = temp.substr(stopSpot+1);  }
167                         }else { 
168                             if (!pDataArray->Short) { temp = ""; } //sequence too short
169                         }
170                     }
171                     
172                 }else{
173                     
174                     //if needed trim sequence
175                     if (pDataArray->keep == "front") {//you want to keep the beginning
176                         int tempLength = tempUnaligned.length();
177                         
178                         if (tempLength > pDataArray->numbases) { //you have enough bases to remove some
179                             
180                             int stopSpot = 0;
181                             int numBasesCounted = 0;
182                             
183                             for (int i = 0; i < temp.length(); i++) {
184                                 //eliminate N's
185                                 if (toupper(temp[i]) == 'N') { 
186                                     temp[i] = '.'; 
187                                     tempLength--;
188                                     if (tempLength < pDataArray->numbases) { stopSpot = 0; break; }
189                                 }
190                                 
191                                 if(isalpha(temp[i])) { numBasesCounted++; }
192                                 
193                                 if (numBasesCounted >= pDataArray->numbases) { stopSpot = i; break; }
194                             }
195                             
196                             if (stopSpot == 0) { temp = ""; }
197                             else {  temp = temp.substr(0, stopSpot+1);  }
198                                                         
199                         }else { 
200                             if (!pDataArray->Short) { temp = ""; } //sequence too short
201                         }                               
202                     }else { //you are keeping the back
203                         int tempLength = tempUnaligned.length();
204                         if (tempLength > pDataArray->numbases) { //you have enough bases to remove some
205                             
206                             int stopSpot = 0;
207                             int numBasesCounted = 0;
208                             
209                             for (int i = (temp.length()-1); i >= 0; i--) {
210                                 //eliminate N's
211                                 if (toupper(temp[i]) == 'N') { 
212                                     temp[i] = '.'; 
213                                     tempLength--;
214                                     if (tempLength < pDataArray->numbases) { stopSpot = 0; break; }
215                                 }
216                                 
217                                 if(isalpha(temp[i])) { numBasesCounted++; }
218                                 
219                                 if (numBasesCounted >= pDataArray->numbases) { stopSpot = i; break; }
220                             }
221                             
222                             if (stopSpot == 0) { temp = ""; }
223                             else {  temp = temp.substr(stopSpot);  }
224                         }else { 
225                             if (!pDataArray->Short) { temp = ""; } //sequence too short
226                         }
227                     }
228                 }
229                 
230                 string newSeqString = temp;
231                 ///////////////////////////////////////////////////////////////////////
232                                 
233                                 //output trimmed sequence
234                                 if (newSeqString != "") {
235                                         out << ">" << seq.getName() << endl << newSeqString << endl;
236                                 }else{
237                                         outAcc << seq.getName() << endl;
238                                         pDataArray->wroteAccnos = true;
239                                 }
240                 count++;
241                         }
242             //report progress
243                         if((count) % 1000 == 0){        pDataArray->m->mothurOut(toString(count)); pDataArray->m->mothurOutEndLine();           }
244                         
245                 }
246                 //report progress
247                 if((count) % 1000 != 0){        pDataArray->m->mothurOut(toString(count)); pDataArray->m->mothurOutEndLine();           }
248         
249                 
250                 in.close();
251         out.close();
252         outAcc.close();
253                                 
254                 return 0;
255                 
256         }
257         catch(exception& e) {
258                 pDataArray->m->errorOut(e, "ChopsSeqsCommand", "MyChopThreadFunction");
259                 exit(1);
260         }
261
262 #endif
263
264
265
266 #endif
267
268