1 #ifndef CHOPSEQSCOMMAND_H
2 #define CHOPSEQSCOMMAND_H
8 * Created by westcott on 5/10/10.
9 * Copyright 2010 Schloss Lab. All rights reserved.
14 #include "command.hpp"
15 #include "sequence.hpp"
17 class ChopSeqsCommand : public Command {
21 ChopSeqsCommand(string);
25 vector<string> setParameters();
26 string getCommandName() { return "chop.seqs"; }
27 string getCommandCategory() { return "Sequence Processing"; }
29 string getHelpString();
30 string getOutputPattern(string);
31 string getCitation() { return "http://www.mothur.org/wiki/Chops.seqs"; }
32 string getDescription() { return "trim sequence length"; }
35 void help() { m->mothurOut(getHelpString()); }
39 unsigned long long start;
40 unsigned long long end;
41 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
44 string fastafile, outputDir, keep, namefile, groupfile, countfile;
45 bool abort, countGaps, Short;
46 int numbases, processors;
47 vector<string> outputNames;
49 string getChopped(Sequence);
50 bool driver (linePair, string, string, string);
51 bool createProcesses(vector<linePair>, string, string, string);
54 /**************************************************************************************************/
55 //custom data structure for threads to use.
56 // This is passed by void pointer so it can be any data type
57 // that can be passed using a single void pointer (LPVOID).
60 string outFasta, outAccnos, keep;
61 unsigned long long start;
62 unsigned long long end;
64 bool countGaps, Short, wroteAccnos;
67 map<string, int> nameMap;
71 chopData(string f, string ff, string a, MothurOut* mout, unsigned long long st, unsigned long long en, string k, bool cGaps, int nbases, bool S) {
86 /**************************************************************************************************/
87 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
89 static DWORD WINAPI MyChopThreadFunction(LPVOID lpParam){
91 pDataArray = (chopData*)lpParam;
95 pDataArray->m->openOutputFile(pDataArray->outFasta, out);
98 pDataArray->m->openOutputFile(pDataArray->outAccnos, outAcc);
101 pDataArray->m->openInputFile(pDataArray->filename, in);
103 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
105 }else { //this accounts for the difference in line endings.
106 in.seekg(pDataArray->start-1); pDataArray->m->gobble(in);
110 bool wroteAccnos = false;
111 pDataArray->count = 0;
113 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
115 if (pDataArray->m->control_pressed) { in.close(); out.close(); outAcc.close(); pDataArray->m->mothurRemove(pDataArray->outFasta); pDataArray->m->mothurRemove(pDataArray->outAccnos); return 0; }
117 Sequence seq(in); pDataArray->m->gobble(in);
119 if (seq.getName() != "") {
120 //string newSeqString = getChopped(seq);
121 ///////////////////////////////////////////////////////////////////////
122 string temp = seq.getAligned();
123 string tempUnaligned = seq.getUnaligned();
125 if (pDataArray->countGaps) {
126 //if needed trim sequence
127 if (pDataArray->keep == "front") {//you want to keep the beginning
128 int tempLength = temp.length();
130 if (tempLength > pDataArray->numbases) { //you have enough bases to remove some
133 int numBasesCounted = 0;
135 for (int i = 0; i < temp.length(); i++) {
137 if (toupper(temp[i]) == 'N') { temp[i] = '.'; }
141 if (numBasesCounted >= pDataArray->numbases) { stopSpot = i; break; }
144 if (stopSpot == 0) { temp = ""; }
145 else { temp = temp.substr(0, stopSpot+1); }
148 if (!pDataArray->Short) { temp = ""; } //sequence too short
150 }else { //you are keeping the back
151 int tempLength = temp.length();
152 if (tempLength > pDataArray->numbases) { //you have enough bases to remove some
155 int numBasesCounted = 0;
157 for (int i = (temp.length()-1); i >= 0; i--) {
159 if (toupper(temp[i]) == 'N') { temp[i] = '.'; }
163 if (numBasesCounted >= pDataArray->numbases) { stopSpot = i; break; }
166 if (stopSpot == 0) { temp = ""; }
167 else { temp = temp.substr(stopSpot+1); }
169 if (!pDataArray->Short) { temp = ""; } //sequence too short
175 //if needed trim sequence
176 if (pDataArray->keep == "front") {//you want to keep the beginning
177 int tempLength = tempUnaligned.length();
179 if (tempLength > pDataArray->numbases) { //you have enough bases to remove some
182 int numBasesCounted = 0;
184 for (int i = 0; i < temp.length(); i++) {
186 if (toupper(temp[i]) == 'N') {
189 if (tempLength < pDataArray->numbases) { stopSpot = 0; break; }
192 if(isalpha(temp[i])) { numBasesCounted++; }
194 if (numBasesCounted >= pDataArray->numbases) { stopSpot = i; break; }
197 if (stopSpot == 0) { temp = ""; }
198 else { temp = temp.substr(0, stopSpot+1); }
201 if (!pDataArray->Short) { temp = ""; } //sequence too short
203 }else { //you are keeping the back
204 int tempLength = tempUnaligned.length();
205 if (tempLength > pDataArray->numbases) { //you have enough bases to remove some
208 int numBasesCounted = 0;
210 for (int i = (temp.length()-1); i >= 0; i--) {
212 if (toupper(temp[i]) == 'N') {
215 if (tempLength < pDataArray->numbases) { stopSpot = 0; break; }
218 if(isalpha(temp[i])) { numBasesCounted++; }
220 if (numBasesCounted >= pDataArray->numbases) { stopSpot = i; break; }
223 if (stopSpot == 0) { temp = ""; }
224 else { temp = temp.substr(stopSpot); }
226 if (!pDataArray->Short) { temp = ""; } //sequence too short
231 string newSeqString = temp;
232 ///////////////////////////////////////////////////////////////////////
234 //output trimmed sequence
235 if (newSeqString != "") {
236 out << ">" << seq.getName() << endl << newSeqString << endl;
238 outAcc << seq.getName() << endl;
239 pDataArray->wroteAccnos = true;
244 if((pDataArray->count) % 1000 == 0){ pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine(); }
248 if((pDataArray->count) % 1000 != 0){ pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine(); }
258 catch(exception& e) {
259 pDataArray->m->errorOut(e, "ChopsSeqsCommand", "MyChopThreadFunction");