]> git.donarmstrong.com Git - mothur.git/blob - pairwiseseqscommand.h
changes while testing
[mothur.git] / pairwiseseqscommand.h
1 #ifndef PAIRWISESEQSCOMMAND_H
2 #define PAIRWISESEQSCOMMAND_H
3
4 /*
5  *  pairwiseseqscommand.h
6  *  Mothur
7  *
8  *  Created by westcott on 10/20/10.
9  *  Copyright 2010 Schloss Lab. All rights reserved.
10  *
11  */
12
13 #include "mothur.h"
14 #include "command.hpp"
15 #include "database.hpp"
16 #include "alignment.hpp"
17 #include "validcalculator.h"
18 #include "dist.h"
19 #include "sequencedb.h"
20 #include "sequence.hpp"
21
22 #include "gotohoverlap.hpp"
23 #include "needlemanoverlap.hpp"
24 #include "blastalign.hpp"
25 #include "noalign.hpp"
26
27 #include "ignoregaps.h"
28 #include "eachgapdist.h"
29 #include "eachgapignore.h"
30 #include "onegapdist.h"
31 #include "onegapignore.h"
32
33 class PairwiseSeqsCommand : public Command {
34         
35 public:
36         PairwiseSeqsCommand(string);    
37         PairwiseSeqsCommand();
38         ~PairwiseSeqsCommand() {}
39         
40         vector<string> setParameters();
41         string getCommandName()                 { return "pairwise.seqs";               }
42         string getCommandCategory()             { return "Sequence Processing"; }
43         
44         string getHelpString(); 
45     string getOutputPattern(string);    
46         string getCitation() { return "Needleman SB, Wunsch CD (1970). A general method applicable to the search for similarities in the amino acid sequence of two proteins. J Mol Biol 48: 443-53. [ for needleman ]\nGotoh O (1982). An improved algorithm for matching biological sequences. J Mol Biol 162: 705-8. [ for gotoh ] \nhttp://www.mothur.org/wiki/Pairwise.seqs"; }
47         string getDescription()         { return "calculates pairwise distances from an unaligned fasta file"; }
48
49         int execute(); 
50         void help() { m->mothurOut(getHelpString()); }  
51         
52 private:
53         struct distlinePair {
54                 int start;
55                 int end;
56         };
57         
58         vector<int> processIDS;   //end line, processid
59         vector<distlinePair> lines;
60         
61         SequenceDB alignDB;
62         
63         void createProcesses(string);
64         int driver(int, int, string, float);
65         int driver(int, int, string, string);
66         
67         #ifdef USE_MPI 
68         int driverMPI(int, int, MPI_File&, float);
69         int driverMPI(int, int, string, unsigned long long&);
70         int driverMPI(int, int, string, unsigned long long&, string);
71         #endif
72         
73         string fastaFileName, align, calc, outputDir, output;
74         float match, misMatch, gapOpen, gapExtend, cutoff;
75         int processors, longestBase;
76         vector<string> fastaFileNames, Estimators;
77         vector<string> outputNames;
78         
79         bool abort, countends, compress;
80 };
81
82 /**************************************************************************************************/
83 //custom data structure for threads to use.
84 // This is passed by void pointer so it can be any data type
85 // that can be passed using a single void pointer (LPVOID).
86 struct pairwiseData {
87     string outputFileName;
88         string align, square, distcalcType, output;
89         unsigned long long start;
90         unsigned long long end;
91         MothurOut* m;
92         float match, misMatch, gapOpen, gapExtend, cutoff;
93         int count, threadID, longestBase;
94     bool countends;
95     SequenceDB alignDB;
96         
97         pairwiseData(){}
98         pairwiseData(string ofn, string al, string sq, string di, bool co, string op, SequenceDB DB, MothurOut* mout, unsigned long long st, unsigned long long en, float ma, float misMa, float gapO, float gapE, int thr, float cu, int tid) {
99                 outputFileName = ofn;
100                 m = mout;
101                 start = st;
102                 end = en;
103                 match = ma; 
104                 misMatch = misMa;
105                 gapOpen = gapO; 
106                 gapExtend = gapE; 
107                 longestBase = thr;
108                 align = al;
109         square = sq;
110         distcalcType = di;
111         countends = co;
112         alignDB = DB;
113                 count = 0;
114         output = op;
115         cutoff = cu;
116                 threadID = tid;
117         }
118 };
119
120 /**************************************************************************************************/
121 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
122 #else
123 static DWORD WINAPI MyPairwiseSquareThreadFunction(LPVOID lpParam){ 
124         pairwiseData* pDataArray;
125         pDataArray = (pairwiseData*)lpParam;
126         
127         try {
128                 ofstream outFile((pDataArray->outputFileName).c_str(), ios::trunc);
129                 outFile.setf(ios::fixed, ios::showpoint);
130                 outFile << setprecision(4);
131                 
132                 pDataArray->count = 0;
133         
134         int startTime = time(NULL);
135         
136         Alignment* alignment;
137         if(pDataArray->align == "gotoh")                        {       alignment = new GotohOverlap(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);                     }
138                 else if(pDataArray->align == "needleman")       {       alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);                                }
139                 else if(pDataArray->align == "blast")           {       alignment = new BlastAlignment(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch);            }
140                 else if(pDataArray->align == "noalign")         {       alignment = new NoAlign();                                                                                                      }
141                 else {
142                         pDataArray->m->mothurOut(pDataArray->align + " is not a valid alignment option. I will run the command using needleman.");
143                         pDataArray->m->mothurOutEndLine();
144                         alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);
145                 }
146                 
147         ValidCalculators validCalculator;
148         Dist* distCalculator;
149         if (pDataArray->countends) {
150             if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) { 
151                 if (pDataArray->distcalcType == "nogaps")                       {       distCalculator = new ignoreGaps();      }
152                 else if (pDataArray->distcalcType == "eachgap") {       distCalculator = new eachGapDist();     }
153                 else if (pDataArray->distcalcType == "onegap")          {       distCalculator = new oneGapDist();      }
154             }
155         }else {
156             if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) { 
157                 if (pDataArray->distcalcType == "nogaps")               {       distCalculator = new ignoreGaps();                                      }
158                 else if (pDataArray->distcalcType == "eachgap"){        distCalculator = new eachGapIgnoreTermGapDist();        }
159                 else if (pDataArray->distcalcType == "onegap")  {       distCalculator = new oneGapIgnoreTermGapDist();         }
160             }
161         }
162
163         if(pDataArray->start == 0){     outFile << pDataArray->alignDB.getNumSeqs() << endl;    }
164                 
165                 for(int i=pDataArray->start;i<pDataArray->end;i++){
166             pDataArray->count++;
167             
168                         string name = pDataArray->alignDB.get(i).getName();
169                         //pad with spaces to make compatible
170                         if (name.length() < 10) { while (name.length() < 10) {  name += " ";  } }
171             
172                         outFile << name << '\t';        
173                         
174                         for(int j=0;j<pDataArray->alignDB.getNumSeqs();j++){
175                                 
176                                 if (pDataArray->m->control_pressed) { outFile.close(); delete alignment; delete distCalculator; return 0;  }
177                                 
178                                 if (pDataArray->alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
179                                         alignment->resize(pDataArray->alignDB.get(i).getUnaligned().length()+1);
180                                 }
181                                 
182                                 if (pDataArray->alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
183                                         alignment->resize(pDataArray->alignDB.get(j).getUnaligned().length()+1);
184                                 }
185                                 
186                                 Sequence seqI(pDataArray->alignDB.get(i).getName(), pDataArray->alignDB.get(i).getAligned());
187                                 Sequence seqJ(pDataArray->alignDB.get(j).getName(), pDataArray->alignDB.get(j).getAligned());
188                                 
189                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
190                                 seqI.setAligned(alignment->getSeqAAln());
191                                 seqJ.setAligned(alignment->getSeqBAln());
192                                 
193                                 distCalculator->calcDist(seqI, seqJ);
194                                 double dist = distCalculator->getDist();
195                 
196                 if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: " + seqI.getName() + '\t' +  alignment->getSeqAAln() + '\n' + seqJ.getName() + alignment->getSeqBAln() + '\n' + "distance = " + toString(dist) + "\n"); }
197                 
198                                 outFile << dist << '\t'; 
199                         }
200                         
201                         outFile << endl; 
202                         
203                         if(i % 100 == 0){
204                                 pDataArray->m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n"); 
205                         }
206                         
207                 }
208                 pDataArray->m->mothurOutJustToScreen(toString(pDataArray->count) + "\t" + toString(time(NULL) - startTime)+"\n");
209                 
210                 outFile.close();
211         delete alignment;
212         delete distCalculator;
213
214         
215     }
216         catch(exception& e) {
217                 pDataArray->m->errorOut(e, "PairwiseSeqsCommand", "MyPairwiseSquareThreadFunction");
218                 exit(1);
219         }
220
221
222 /**************************************************************************************************/
223 static DWORD WINAPI MyPairwiseThreadFunction(LPVOID lpParam){ 
224         pairwiseData* pDataArray;
225         pDataArray = (pairwiseData*)lpParam;
226         
227         try {
228                 ofstream outFile((pDataArray->outputFileName).c_str(), ios::trunc);
229                 outFile.setf(ios::fixed, ios::showpoint);
230                 outFile << setprecision(4);
231         
232         int startTime = time(NULL);
233         
234         Alignment* alignment;
235         if(pDataArray->align == "gotoh")                        {       alignment = new GotohOverlap(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);                     }
236                 else if(pDataArray->align == "needleman")       {       alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);                                }
237                 else if(pDataArray->align == "blast")           {       alignment = new BlastAlignment(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch);            }
238                 else if(pDataArray->align == "noalign")         {       alignment = new NoAlign();                                                                                                      }
239                 else {
240                         pDataArray->m->mothurOut(pDataArray->align + " is not a valid alignment option. I will run the command using needleman.");
241                         pDataArray->m->mothurOutEndLine();
242                         alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);
243                 }
244                 
245         ValidCalculators validCalculator;
246         Dist* distCalculator;
247         if (pDataArray->countends) {
248             if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) { 
249                 if (pDataArray->distcalcType == "nogaps")                       {       distCalculator = new ignoreGaps();      }
250                 else if (pDataArray->distcalcType == "eachgap") {       distCalculator = new eachGapDist();     }
251                 else if (pDataArray->distcalcType == "onegap")          {       distCalculator = new oneGapDist();      }
252             }
253         }else {
254             if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) { 
255                 if (pDataArray->distcalcType == "nogaps")               {       distCalculator = new ignoreGaps();                                      }
256                 else if (pDataArray->distcalcType == "eachgap"){        distCalculator = new eachGapIgnoreTermGapDist();        }
257                 else if (pDataArray->distcalcType == "onegap")  {       distCalculator = new oneGapIgnoreTermGapDist();         }
258             }
259         }
260         
261         if((pDataArray->output == "lt") && pDataArray->start == 0){     outFile << pDataArray->alignDB.getNumSeqs() << endl;    }
262                 
263         pDataArray->count = 0;
264                 for(int i=pDataArray->start;i<pDataArray->end;i++){
265             pDataArray->count++;
266             
267                         if(pDataArray->output == "lt")  {       
268                                 string name = pDataArray->alignDB.get(i).getName();
269                                 if (name.length() < 10) { //pad with spaces to make compatible
270                                         while (name.length() < 10) {  name += " ";  }
271                                 }
272                                 outFile << name << '\t';        
273                         }
274
275                         
276                         for(int j=0;j<i;j++){
277                                 
278                                 if (pDataArray->m->control_pressed) { outFile.close(); delete alignment; delete distCalculator; return 0;  }
279                                 
280                                 if (pDataArray->alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
281                                         alignment->resize(pDataArray->alignDB.get(i).getUnaligned().length()+1);
282                                 }
283                                 
284                                 if (pDataArray->alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
285                                         alignment->resize(pDataArray->alignDB.get(j).getUnaligned().length()+1);
286                                 }
287                                 
288                                 Sequence seqI(pDataArray->alignDB.get(i).getName(), pDataArray->alignDB.get(i).getAligned());
289                                 Sequence seqJ(pDataArray->alignDB.get(j).getName(), pDataArray->alignDB.get(j).getAligned());
290                                 
291                                 alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
292                                 seqI.setAligned(alignment->getSeqAAln());
293                                 seqJ.setAligned(alignment->getSeqBAln());
294                                 
295                                 distCalculator->calcDist(seqI, seqJ);
296                                 double dist = distCalculator->getDist();
297                 
298                 if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: " + seqI.getName() + '\t' +  alignment->getSeqAAln() + '\n' + seqJ.getName() + alignment->getSeqBAln() + '\n' + "distance = " + toString(dist) + "\n"); }
299                 
300                                 if(dist <= pDataArray->cutoff){
301                                         if (pDataArray->output == "column") { outFile << pDataArray->alignDB.get(i).getName() << ' ' << pDataArray->alignDB.get(j).getName() << ' ' << dist << endl; }
302                                 }
303                                 if (pDataArray->output == "lt") {  outFile << dist << '\t'; }
304                         }
305                         
306                         if (pDataArray->output == "lt") { outFile << endl; }
307                         
308                         if(i % 100 == 0){
309                                 pDataArray->m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n"); 
310                         }
311                         
312                 }
313                 pDataArray->m->mothurOutJustToScreen(toString(pDataArray->end-1) + "\t" + toString(time(NULL) - startTime)+"\n");
314                 
315                 outFile.close();
316         delete alignment;
317         delete distCalculator;
318         
319         
320     }
321         catch(exception& e) {
322                 pDataArray->m->errorOut(e, "PairwiseSeqsCommand", "MyPairwiseThreadFunction");
323                 exit(1);
324         }
325
326
327 #endif
328
329
330 #endif
331