1 #ifndef CHOPSEQSCOMMAND_H
2 #define CHOPSEQSCOMMAND_H
8 * Created by westcott on 5/10/10.
9 * Copyright 2010 Schloss Lab. All rights reserved.
14 #include "command.hpp"
15 #include "sequence.hpp"
17 class ChopSeqsCommand : public Command {
21 ChopSeqsCommand(string);
25 vector<string> setParameters();
26 string getCommandName() { return "chop.seqs"; }
27 string getCommandCategory() { return "Sequence Processing"; }
28 string getOutputFileNameTag(string, string);
29 string getHelpString();
30 string getCitation() { return "http://www.mothur.org/wiki/Chops.seqs"; }
31 string getDescription() { return "trim sequence length"; }
34 void help() { m->mothurOut(getHelpString()); }
38 unsigned long long start;
39 unsigned long long end;
40 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
43 string fastafile, outputDir, keep;
44 bool abort, countGaps, Short;
45 int numbases, processors;
46 vector<string> outputNames;
48 string getChopped(Sequence);
49 bool driver (linePair, string, string, string);
50 bool createProcesses(vector<linePair>, string, string, string);
53 /**************************************************************************************************/
54 //custom data structure for threads to use.
55 // This is passed by void pointer so it can be any data type
56 // that can be passed using a single void pointer (LPVOID).
59 string outFasta, outAccnos, keep;
60 unsigned long long start;
61 unsigned long long end;
63 bool countGaps, Short, wroteAccnos;
66 map<string, int> nameMap;
70 chopData(string f, string ff, string a, MothurOut* mout, unsigned long long st, unsigned long long en, string k, bool cGaps, int nbases, bool S) {
85 /**************************************************************************************************/
86 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
88 static DWORD WINAPI MyChopThreadFunction(LPVOID lpParam){
90 pDataArray = (chopData*)lpParam;
94 pDataArray->m->openOutputFile(pDataArray->outFasta, out);
97 pDataArray->m->openOutputFile(pDataArray->outAccnos, outAcc);
100 pDataArray->m->openInputFile(pDataArray->filename, in);
102 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
104 }else { //this accounts for the difference in line endings.
105 in.seekg(pDataArray->start-1); pDataArray->m->gobble(in);
109 bool wroteAccnos = false;
112 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
114 if (pDataArray->m->control_pressed) { in.close(); out.close(); outAcc.close(); pDataArray->m->mothurRemove(pDataArray->outFasta); pDataArray->m->mothurRemove(pDataArray->outAccnos); return 0; }
116 Sequence seq(in); pDataArray->m->gobble(in);
118 if (seq.getName() != "") {
119 //string newSeqString = getChopped(seq);
120 ///////////////////////////////////////////////////////////////////////
121 string temp = seq.getAligned();
122 string tempUnaligned = seq.getUnaligned();
124 if (pDataArray->countGaps) {
125 //if needed trim sequence
126 if (pDataArray->keep == "front") {//you want to keep the beginning
127 int tempLength = temp.length();
129 if (tempLength > pDataArray->numbases) { //you have enough bases to remove some
132 int numBasesCounted = 0;
134 for (int i = 0; i < temp.length(); i++) {
136 if (toupper(temp[i]) == 'N') { temp[i] = '.'; }
140 if (numBasesCounted >= pDataArray->numbases) { stopSpot = i; break; }
143 if (stopSpot == 0) { temp = ""; }
144 else { temp = temp.substr(0, stopSpot+1); }
147 if (!pDataArray->Short) { temp = ""; } //sequence too short
149 }else { //you are keeping the back
150 int tempLength = temp.length();
151 if (tempLength > pDataArray->numbases) { //you have enough bases to remove some
154 int numBasesCounted = 0;
156 for (int i = (temp.length()-1); i >= 0; i--) {
158 if (toupper(temp[i]) == 'N') { temp[i] = '.'; }
162 if (numBasesCounted >= pDataArray->numbases) { stopSpot = i; break; }
165 if (stopSpot == 0) { temp = ""; }
166 else { temp = temp.substr(stopSpot+1); }
168 if (!pDataArray->Short) { temp = ""; } //sequence too short
174 //if needed trim sequence
175 if (pDataArray->keep == "front") {//you want to keep the beginning
176 int tempLength = tempUnaligned.length();
178 if (tempLength > pDataArray->numbases) { //you have enough bases to remove some
181 int numBasesCounted = 0;
183 for (int i = 0; i < temp.length(); i++) {
185 if (toupper(temp[i]) == 'N') {
188 if (tempLength < pDataArray->numbases) { stopSpot = 0; break; }
191 if(isalpha(temp[i])) { numBasesCounted++; }
193 if (numBasesCounted >= pDataArray->numbases) { stopSpot = i; break; }
196 if (stopSpot == 0) { temp = ""; }
197 else { temp = temp.substr(0, stopSpot+1); }
200 if (!pDataArray->Short) { temp = ""; } //sequence too short
202 }else { //you are keeping the back
203 int tempLength = tempUnaligned.length();
204 if (tempLength > pDataArray->numbases) { //you have enough bases to remove some
207 int numBasesCounted = 0;
209 for (int i = (temp.length()-1); i >= 0; i--) {
211 if (toupper(temp[i]) == 'N') {
214 if (tempLength < pDataArray->numbases) { stopSpot = 0; break; }
217 if(isalpha(temp[i])) { numBasesCounted++; }
219 if (numBasesCounted >= pDataArray->numbases) { stopSpot = i; break; }
222 if (stopSpot == 0) { temp = ""; }
223 else { temp = temp.substr(stopSpot); }
225 if (!pDataArray->Short) { temp = ""; } //sequence too short
230 string newSeqString = temp;
231 ///////////////////////////////////////////////////////////////////////
233 //output trimmed sequence
234 if (newSeqString != "") {
235 out << ">" << seq.getName() << endl << newSeqString << endl;
237 outAcc << seq.getName() << endl;
238 pDataArray->wroteAccnos = true;
243 if((count) % 1000 == 0){ pDataArray->m->mothurOut(toString(count)); pDataArray->m->mothurOutEndLine(); }
247 if((count) % 1000 != 0){ pDataArray->m->mothurOut(toString(count)); pDataArray->m->mothurOutEndLine(); }
257 catch(exception& e) {
258 pDataArray->m->errorOut(e, "ChopsSeqsCommand", "MyChopThreadFunction");