]> git.donarmstrong.com Git - mothur.git/blob - primerdesigncommand.h
working on pam
[mothur.git] / primerdesigncommand.h
1 //
2 //  primerdesigncommand.h
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 1/18/13.
6 //  Copyright (c) 2013 Schloss Lab. All rights reserved.
7 //
8
9 #ifndef Mothur_primerdesigncommand_h
10 #define Mothur_primerdesigncommand_h
11
12 #include "command.hpp"
13 #include "listvector.hpp"
14 #include "inputdata.h"
15 #include "sequence.hpp"
16 #include "alignment.hpp"
17 #include "needlemanoverlap.hpp"
18
19 /**************************************************************************************************/
20
21 class PrimerDesignCommand : public Command {
22 public:
23     PrimerDesignCommand(string);
24     PrimerDesignCommand();
25     ~PrimerDesignCommand(){}
26     
27     vector<string> setParameters();
28     string getCommandName()                     { return "primer.design";               }
29     string getCommandCategory()         { return "OTU-Based Approaches";                } 
30     
31     string getOutputPattern(string);
32         string getHelpString(); 
33     string getCitation() { return "http://www.mothur.org/wiki/Primer.design"; }
34     string getDescription()             { return "identify sequence fragments that are specific to particular OTUs"; }
35     
36     int execute(); 
37     void help() { m->mothurOut(getHelpString()); }      
38     
39 private:
40     
41     struct linePair {
42                 int start;
43                 int end;
44                 linePair(int i, int j) : start(i), end(j) {}
45         };
46     struct fastaLinePair {
47                 unsigned long long start;
48                 unsigned long long end;
49                 fastaLinePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
50         };
51     
52     bool abort, allLines, large;
53     int cutoff, pdiffs, length, processors, alignedLength;
54     string outputDir, listfile, otulabel, namefile, countfile, fastafile, label;
55     double minTM, maxTM;
56     ListVector* list;
57     vector<string> outputNames;
58
59     int initializeCounts(vector< vector< vector<unsigned int> > >& counts, int length, map<string, int>&, map<string, int>&, vector<unsigned int>&);
60     map<string, int> readCount(unsigned long int&);
61     char getBase(vector<unsigned int> counts, int size);
62     int getListVector();
63     int countDiffs(string, string);
64     set<string> getPrimer(Sequence);
65     bool findPrimer(string, string, vector<int>&, vector<int>&, vector<int>&);
66     int findMeltingPoint(string primer, double&, double&);
67     
68     set<int> createProcesses(string, vector<double>&, vector<double>&, set<string>&, vector<Sequence>&, int);
69     set<int> driver(string, vector<double>&, vector<double>&, set<string>&, vector<Sequence>&, int, int, int&, int);
70     vector< vector< vector<unsigned int> > > driverGetCounts(map<string, int>&, unsigned long int&, vector<unsigned int>&, unsigned long long&, unsigned long long&);
71     vector<Sequence> createProcessesConSeqs(map<string, int>&, unsigned long int&);
72     int findIndex(string binLabel, vector<string> binLabels);
73     
74 };
75
76 /**************************************************************************************************/
77 //custom data structure for threads to use.
78 // This is passed by void pointer so it can be any data type
79 // that can be passed using a single void pointer (LPVOID).
80 struct primerDesignData {
81         string summaryFileName;
82         MothurOut* m;
83         int start;
84         int end;
85         int pdiffs, threadID,  length, binIndex;
86         set<string> primers;
87         vector<double> minTms, maxTms;
88     set<int> otusToRemove;
89     vector<Sequence> consSeqs;
90     int numBinsProcessed;
91         
92         primerDesignData(){}
93         primerDesignData(string sf, MothurOut* mout, int st, int en, vector<double> min, vector<double> max, set<string> pri, vector<Sequence> seqs, int d, int otun, int l, int tid) {
94                 summaryFileName = sf;
95                 m = mout;
96                 start = st;
97                 end = en;
98                 pdiffs = d;
99         minTms = min;
100         maxTms = max;
101         primers = pri;
102         consSeqs = seqs;
103         binIndex = otun;
104         length = l;
105                 threadID = tid;
106         numBinsProcessed = 0;
107         }
108 };
109
110 /**************************************************************************************************/
111 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
112 #else
113 static DWORD WINAPI MyPrimerThreadFunction(LPVOID lpParam){ 
114         primerDesignData* pDataArray;
115         pDataArray = (primerDesignData*)lpParam;
116         
117         try {
118                 ofstream outSum;
119         pDataArray->m->openOutputFileAppend(pDataArray->summaryFileName, outSum);
120         
121         for (int i = pDataArray->start; i < pDataArray->end; i++) {
122             
123             if (pDataArray->m->control_pressed) { break; }
124             
125             if (i != (pDataArray->binIndex)) {
126                 int primerIndex = 0;
127                 for (set<string>::iterator it = pDataArray->primers.begin(); it != pDataArray->primers.end(); it++) {
128                     vector<int> primerStarts;
129                     vector<int> primerEnds;
130                     vector<int> mismatches;
131                     
132                     //bool found = findPrimer(conSeqs[i].getUnaligned(), (*it), primerStarts, primerEnds, mismatches);
133                     ///////////////////////////////////////////////////////////////////////////////////////////////////
134                     bool found = false;  //innocent til proven guilty
135                     
136                     string rawSequence = pDataArray->consSeqs[i].getUnaligned();
137                     string primer = *it;
138                     
139                     //look for exact match
140                     if(rawSequence.length() < primer.length()) {  found = false;  }
141                     else {
142                         //search for primer
143                         for (int j = 0; j < rawSequence.length()-pDataArray->length; j++){
144                             
145                             if (pDataArray->m->control_pressed) {  found = false; break; }
146                             
147                             string rawChunk = rawSequence.substr(j, pDataArray->length);
148                             
149                             //int numDiff = countDiffs(primer, rawchuck);
150                             ///////////////////////////////////////////////////////////////////////
151                             int numDiff = 0;
152                             string oligo = primer;
153                             string seq = rawChunk;
154                             
155                             for(int k=0;k<pDataArray->length;k++){
156                                 
157                                 oligo[k] = toupper(oligo[k]);
158                                 seq[k] = toupper(seq[k]);
159                                
160                                 if(oligo[k] != seq[k]){
161             
162                                     if((oligo[k] == 'N' || oligo[k] == 'I') && (seq[k] == 'N'))                         {       numDiff++;      }
163                                     else if(oligo[k] == 'R' && (seq[k] != 'A' && seq[k] != 'G'))                                        {       numDiff++;      }
164                                     else if(oligo[k] == 'Y' && (seq[k] != 'C' && seq[k] != 'T'))                                        {       numDiff++;      }
165                                     else if(oligo[k] == 'M' && (seq[k] != 'C' && seq[k] != 'A'))                                        {       numDiff++;      }
166                                     else if(oligo[k] == 'K' && (seq[k] != 'T' && seq[k] != 'G'))                                        {       numDiff++;      }
167                                     else if(oligo[k] == 'W' && (seq[k] != 'T' && seq[k] != 'A'))                                        {       numDiff++;      }
168                                     else if(oligo[k] == 'S' && (seq[k] != 'C' && seq[k] != 'G'))                                        {       numDiff++;      }
169                                     else if(oligo[k] == 'B' && (seq[k] != 'C' && seq[k] != 'T' && seq[k] != 'G'))       {       numDiff++;      }
170                                     else if(oligo[k] == 'D' && (seq[k] != 'A' && seq[k] != 'T' && seq[k] != 'G'))       {       numDiff++;      }
171                                     else if(oligo[k] == 'H' && (seq[k] != 'A' && seq[k] != 'T' && seq[k] != 'C'))       {       numDiff++;      }
172                                     else if(oligo[k] == 'V' && (seq[k] != 'A' && seq[k] != 'C' && seq[k] != 'G'))       {       numDiff++;      }
173                                     else if(oligo[k] == 'A' && (seq[k] != 'A' && seq[k] != 'M' && seq[k] != 'R' && seq[k] != 'W' && seq[k] != 'D' && seq[k] != 'H' && seq[k] != 'V'))       {   numDiff++;      }
174                                     else if(oligo[k] == 'C' && (seq[k] != 'C' && seq[k] != 'Y' && seq[k] != 'M' && seq[k] != 'S' && seq[k] != 'B' && seq[k] != 'H' && seq[k] != 'V'))       {   numDiff++;      }
175                                     else if(oligo[k] == 'G' && (seq[k] != 'G' && seq[k] != 'R' && seq[k] != 'K' && seq[k] != 'S' && seq[k] != 'B' && seq[k] != 'D' && seq[k] != 'V'))       {   numDiff++;      }
176                                     else if(oligo[k] == 'T' && (seq[k] != 'T' && seq[k] != 'Y' && seq[k] != 'K' && seq[k] != 'W' && seq[k] != 'B' && seq[k] != 'D' && seq[k] != 'H'))       {   numDiff++;      }
177                                     else if((oligo[k] == '.' || oligo[k] == '-'))           {   numDiff++;      }
178                                 }
179                             }
180                             ///////////////////////////////////////////////////////////////////////
181                             
182                             if(numDiff <= pDataArray->pdiffs){
183                                 primerStarts.push_back(j);
184                                 primerEnds.push_back(j+pDataArray->length);
185                                 mismatches.push_back(numDiff);
186                                 found = true;
187                             }
188                         }
189                     }
190                     ///////////////////////////////////////////////////////////////////////////////////////////////////
191                     
192                     //if we found it report to the table
193                     if (found) {
194                         for (int j = 0; j < primerStarts.size(); j++) {
195                             outSum << (i+1) << '\t' << *it << '\t' << primerStarts[j] << '\t' << primerEnds[j] << '\t' << pDataArray->length << '\t' << mismatches[j] << '\t' << pDataArray->minTms[primerIndex] << '\t' << pDataArray->maxTms[primerIndex] << endl;
196                         }
197                         pDataArray->otusToRemove.insert(i);
198                     }
199                     primerIndex++;
200                 }
201             }
202             pDataArray->numBinsProcessed++;
203         }
204         outSum.close();
205         
206         }
207         catch(exception& e) {
208                 pDataArray->m->errorOut(e, "PrimerDesignCommand", "MyPrimerThreadFunction");
209                 exit(1);
210         }
211
212 #endif
213
214 /**************************************************************************************************/
215
216
217
218
219
220 #endif