]> git.donarmstrong.com Git - mothur.git/blob - chopseqscommand.h
added modify names parameter to set.dir
[mothur.git] / chopseqscommand.h
1 #ifndef CHOPSEQSCOMMAND_H
2 #define CHOPSEQSCOMMAND_H
3
4 /*
5  *  chopseqscommand.h
6  *  Mothur
7  *
8  *  Created by westcott on 5/10/10.
9  *  Copyright 2010 Schloss Lab. All rights reserved.
10  *
11  */
12
13
14 #include "command.hpp"
15 #include "sequence.hpp"
16
17 class ChopSeqsCommand : public Command {
18         
19         public:
20         
21                 ChopSeqsCommand(string);
22                 ChopSeqsCommand();      
23                 ~ChopSeqsCommand(){};
24         
25                 vector<string> setParameters();
26                 string getCommandName()                 { return "chop.seqs";           }
27                 string getCommandCategory()             { return "Sequence Processing"; }
28                 
29         string getHelpString(); 
30         string getOutputPattern(string);        
31                 string getCitation() { return "http://www.mothur.org/wiki/Chops.seqs"; }
32                 string getDescription()         { return "trim sequence length"; }
33         
34                 int execute(); 
35                 void help() { m->mothurOut(getHelpString()); }          
36         
37         private:
38         struct linePair {
39             unsigned long long start;
40             unsigned long long end;
41             linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
42         };
43     
44                 string fastafile, outputDir, keep, namefile, groupfile, countfile;
45                 bool abort, countGaps, Short;
46                 int numbases, processors;
47                 vector<string> outputNames;
48                 
49                 string getChopped(Sequence);
50         bool driver (linePair, string, string, string);
51         bool createProcesses(vector<linePair>, string, string, string);
52 };
53
54 /**************************************************************************************************/
55 //custom data structure for threads to use.
56 // This is passed by void pointer so it can be any data type
57 // that can be passed using a single void pointer (LPVOID).
58 struct chopData {
59         string filename; 
60         string outFasta, outAccnos, keep; 
61         unsigned long long start;
62         unsigned long long end;
63         int numbases, count;
64     bool countGaps, Short, wroteAccnos;
65         MothurOut* m;
66         string namefile;
67         map<string, int> nameMap;
68         
69         
70         chopData(){}
71         chopData(string f, string ff, string a, MothurOut* mout, unsigned long long st, unsigned long long en, string k, bool cGaps, int nbases, bool S) {
72                 filename = f;
73                 outFasta = ff;
74         outAccnos = a;
75                 m = mout;
76                 start = st;
77                 end = en;
78         keep = k;
79         countGaps = cGaps;
80         numbases = nbases;
81         Short = S;
82                 wroteAccnos = false;
83         }
84 };
85
86 /**************************************************************************************************/
87 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
88 #else
89 static DWORD WINAPI MyChopThreadFunction(LPVOID lpParam){ 
90         chopData* pDataArray;
91         pDataArray = (chopData*)lpParam;
92         
93         try {
94         ofstream out;
95                 pDataArray->m->openOutputFile(pDataArray->outFasta, out);
96         
97         ofstream outAcc;
98                 pDataArray->m->openOutputFile(pDataArray->outAccnos, outAcc);
99         
100                 ifstream in;
101                 pDataArray->m->openInputFile(pDataArray->filename, in);
102         
103                 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
104                         in.seekg(0);
105                 }else { //this accounts for the difference in line endings. 
106                         in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); 
107                 }
108
109                 bool done = false;
110         bool wroteAccnos = false;
111                 pDataArray->count = 0;
112
113                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
114                                                 
115                         if (pDataArray->m->control_pressed) {  in.close(); out.close(); outAcc.close(); pDataArray->m->mothurRemove(pDataArray->outFasta); pDataArray->m->mothurRemove(pDataArray->outAccnos); return 0;  }
116             
117             Sequence seq(in); pDataArray->m->gobble(in);
118                         
119                         if (seq.getName() != "") {
120                                 //string newSeqString = getChopped(seq);
121                 ///////////////////////////////////////////////////////////////////////
122                 string temp = seq.getAligned();
123                 string tempUnaligned = seq.getUnaligned();
124                 
125                 if (pDataArray->countGaps) {
126                     //if needed trim sequence
127                     if (pDataArray->keep == "front") {//you want to keep the beginning
128                         int tempLength = temp.length();
129                         
130                         if (tempLength > pDataArray->numbases) { //you have enough bases to remove some
131                             
132                             int stopSpot = 0;
133                             int numBasesCounted = 0;
134                             
135                             for (int i = 0; i < temp.length(); i++) {
136                                 //eliminate N's
137                                 if (toupper(temp[i]) == 'N') { temp[i] = '.'; }
138                                 
139                                 numBasesCounted++; 
140                                 
141                                 if (numBasesCounted >= pDataArray->numbases) { stopSpot = i; break; }
142                             }
143                             
144                             if (stopSpot == 0) { temp = ""; }
145                             else {  temp = temp.substr(0, stopSpot+1);  }
146                                                         
147                         }else { 
148                             if (!pDataArray->Short) { temp = ""; } //sequence too short
149                         }
150                     }else { //you are keeping the back
151                         int tempLength = temp.length();
152                         if (tempLength > pDataArray->numbases) { //you have enough bases to remove some
153                             
154                             int stopSpot = 0;
155                             int numBasesCounted = 0;
156                             
157                             for (int i = (temp.length()-1); i >= 0; i--) {
158                                 //eliminate N's
159                                 if (toupper(temp[i]) == 'N') { temp[i] = '.'; }
160                                 
161                                 numBasesCounted++; 
162                                 
163                                 if (numBasesCounted >= pDataArray->numbases) { stopSpot = i; break; }
164                             }
165                             
166                             if (stopSpot == 0) { temp = ""; }
167                             else {  temp = temp.substr(stopSpot+1);  }
168                         }else { 
169                             if (!pDataArray->Short) { temp = ""; } //sequence too short
170                         }
171                     }
172                     
173                 }else{
174                     
175                     //if needed trim sequence
176                     if (pDataArray->keep == "front") {//you want to keep the beginning
177                         int tempLength = tempUnaligned.length();
178                         
179                         if (tempLength > pDataArray->numbases) { //you have enough bases to remove some
180                             
181                             int stopSpot = 0;
182                             int numBasesCounted = 0;
183                             
184                             for (int i = 0; i < temp.length(); i++) {
185                                 //eliminate N's
186                                 if (toupper(temp[i]) == 'N') { 
187                                     temp[i] = '.'; 
188                                     tempLength--;
189                                     if (tempLength < pDataArray->numbases) { stopSpot = 0; break; }
190                                 }
191                                 
192                                 if(isalpha(temp[i])) { numBasesCounted++; }
193                                 
194                                 if (numBasesCounted >= pDataArray->numbases) { stopSpot = i; break; }
195                             }
196                             
197                             if (stopSpot == 0) { temp = ""; }
198                             else {  temp = temp.substr(0, stopSpot+1);  }
199                                                         
200                         }else { 
201                             if (!pDataArray->Short) { temp = ""; } //sequence too short
202                         }                               
203                     }else { //you are keeping the back
204                         int tempLength = tempUnaligned.length();
205                         if (tempLength > pDataArray->numbases) { //you have enough bases to remove some
206                             
207                             int stopSpot = 0;
208                             int numBasesCounted = 0;
209                             
210                             for (int i = (temp.length()-1); i >= 0; i--) {
211                                 //eliminate N's
212                                 if (toupper(temp[i]) == 'N') { 
213                                     temp[i] = '.'; 
214                                     tempLength--;
215                                     if (tempLength < pDataArray->numbases) { stopSpot = 0; break; }
216                                 }
217                                 
218                                 if(isalpha(temp[i])) { numBasesCounted++; }
219                                 
220                                 if (numBasesCounted >= pDataArray->numbases) { stopSpot = i; break; }
221                             }
222                             
223                             if (stopSpot == 0) { temp = ""; }
224                             else {  temp = temp.substr(stopSpot);  }
225                         }else { 
226                             if (!pDataArray->Short) { temp = ""; } //sequence too short
227                         }
228                     }
229                 }
230                 
231                 string newSeqString = temp;
232                 ///////////////////////////////////////////////////////////////////////
233                                 
234                                 //output trimmed sequence
235                                 if (newSeqString != "") {
236                                         out << ">" << seq.getName() << endl << newSeqString << endl;
237                                 }else{
238                                         outAcc << seq.getName() << endl;
239                                         pDataArray->wroteAccnos = true;
240                                 }
241                 pDataArray->count++;
242                         }
243             //report progress
244                         if((pDataArray->count) % 1000 == 0){    pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }
245                         
246                 }
247                 //report progress
248                 if((pDataArray->count) % 1000 != 0){    pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }
249         
250                 
251                 in.close();
252         out.close();
253         outAcc.close();
254                                 
255                 return 0;
256                 
257         }
258         catch(exception& e) {
259                 pDataArray->m->errorOut(e, "ChopsSeqsCommand", "MyChopThreadFunction");
260                 exit(1);
261         }
262
263 #endif
264
265
266
267 #endif
268
269