]> git.donarmstrong.com Git - mothur.git/blob - primerdesigncommand.h
changing command name classify.shared to classifyrf.shared
[mothur.git] / primerdesigncommand.h
1 //
2 //  primerdesigncommand.h
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 1/18/13.
6 //  Copyright (c) 2013 Schloss Lab. All rights reserved.
7 //
8
9 #ifndef Mothur_primerdesigncommand_h
10 #define Mothur_primerdesigncommand_h
11
12 #include "command.hpp"
13 #include "listvector.hpp"
14 #include "inputdata.h"
15 #include "sequence.hpp"
16 #include "alignment.hpp"
17 #include "needlemanoverlap.hpp"
18
19 /**************************************************************************************************/
20
21 class PrimerDesignCommand : public Command {
22 public:
23     PrimerDesignCommand(string);
24     PrimerDesignCommand();
25     ~PrimerDesignCommand(){}
26     
27     vector<string> setParameters();
28     string getCommandName()                     { return "primer.design";               }
29     string getCommandCategory()         { return "OTU-Based Approaches";                } 
30     
31     string getOutputPattern(string);
32         string getHelpString(); 
33     string getCitation() { return "http://www.mothur.org/wiki/Primer.design"; }
34     string getDescription()             { return "identify sequence fragments that are specific to particular OTUs"; }
35     
36     int execute(); 
37     void help() { m->mothurOut(getHelpString()); }      
38     
39 private:
40     
41     struct linePair {
42                 int start;
43                 int end;
44                 linePair(int i, int j) : start(i), end(j) {}
45         };
46     struct fastaLinePair {
47                 unsigned long long start;
48                 unsigned long long end;
49                 fastaLinePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
50         };
51     
52     bool abort, allLines, large;
53     int cutoff, pdiffs, length, otunumber, processors, alignedLength;
54     string outputDir, listfile, namefile, countfile, fastafile, label;
55     double minTM, maxTM;
56     ListVector* list;
57     vector<string> outputNames;
58
59     int initializeCounts(vector< vector< vector<unsigned int> > >& counts, int length, map<string, int>&, map<string, int>&, vector<unsigned int>&);
60     map<string, int> readCount(unsigned long int&);
61     char getBase(vector<unsigned int> counts, int size);
62     int getListVector();
63     int countDiffs(string, string);
64     set<string> getPrimer(Sequence);
65     bool findPrimer(string, string, vector<int>&, vector<int>&, vector<int>&);
66     int findMeltingPoint(string primer, double&, double&);
67     
68     set<int> createProcesses(string, vector<double>&, vector<double>&, set<string>&, vector<Sequence>&);
69     set<int> driver(string, vector<double>&, vector<double>&, set<string>&, vector<Sequence>&, int, int, int&);
70     vector< vector< vector<unsigned int> > > driverGetCounts(map<string, int>&, unsigned long int&, vector<unsigned int>&, unsigned long long&, unsigned long long&);
71     vector<Sequence> createProcessesConSeqs(map<string, int>&, unsigned long int&);
72     
73 };
74
75 /**************************************************************************************************/
76 //custom data structure for threads to use.
77 // This is passed by void pointer so it can be any data type
78 // that can be passed using a single void pointer (LPVOID).
79 struct primerDesignData {
80         string summaryFileName;
81         MothurOut* m;
82         int start;
83         int end;
84         int pdiffs, threadID, otunumber, length;
85         set<string> primers;
86         vector<double> minTms, maxTms;
87     set<int> otusToRemove;
88     vector<Sequence> consSeqs;
89     int numBinsProcessed;
90         
91         primerDesignData(){}
92         primerDesignData(string sf, MothurOut* mout, int st, int en, vector<double> min, vector<double> max, set<string> pri, vector<Sequence> seqs, int d, int otun, int l, int tid) {
93                 summaryFileName = sf;
94                 m = mout;
95                 start = st;
96                 end = en;
97                 pdiffs = d;
98         minTms = min;
99         maxTms = max;
100         primers = pri;
101         consSeqs = seqs;
102         otunumber = otun;
103         length = l;
104                 threadID = tid;
105         numBinsProcessed = 0;
106         }
107 };
108
109 /**************************************************************************************************/
110 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
111 #else
112 static DWORD WINAPI MyPrimerThreadFunction(LPVOID lpParam){ 
113         primerDesignData* pDataArray;
114         pDataArray = (primerDesignData*)lpParam;
115         
116         try {
117                 ofstream outSum;
118         pDataArray->m->openOutputFileAppend(pDataArray->summaryFileName, outSum);
119         
120         for (int i = pDataArray->start; i < pDataArray->end; i++) {
121             
122             if (pDataArray->m->control_pressed) { break; }
123             
124             if (i != (pDataArray->otunumber-1)) {
125                 int primerIndex = 0;
126                 for (set<string>::iterator it = pDataArray->primers.begin(); it != pDataArray->primers.end(); it++) {
127                     vector<int> primerStarts;
128                     vector<int> primerEnds;
129                     vector<int> mismatches;
130                     
131                     //bool found = findPrimer(conSeqs[i].getUnaligned(), (*it), primerStarts, primerEnds, mismatches);
132                     ///////////////////////////////////////////////////////////////////////////////////////////////////
133                     bool found = false;  //innocent til proven guilty
134                     
135                     string rawSequence = pDataArray->consSeqs[i].getUnaligned();
136                     string primer = *it;
137                     
138                     //look for exact match
139                     if(rawSequence.length() < primer.length()) {  found = false;  }
140                     else {
141                         //search for primer
142                         for (int j = 0; j < rawSequence.length()-pDataArray->length; j++){
143                             
144                             if (pDataArray->m->control_pressed) {  found = false; break; }
145                             
146                             string rawChunk = rawSequence.substr(j, pDataArray->length);
147                             
148                             //int numDiff = countDiffs(primer, rawchuck);
149                             ///////////////////////////////////////////////////////////////////////
150                             int numDiff = 0;
151                             string oligo = primer;
152                             string seq = rawChunk;
153                             
154                             for(int k=0;k<pDataArray->length;k++){
155                                 
156                                 oligo[k] = toupper(oligo[k]);
157                                 seq[k] = toupper(seq[k]);
158                                
159                                 if(oligo[k] != seq[k]){
160             
161                                     if((oligo[k] == 'N' || oligo[k] == 'I') && (seq[k] == 'N'))                         {       numDiff++;      }
162                                     else if(oligo[k] == 'R' && (seq[k] != 'A' && seq[k] != 'G'))                                        {       numDiff++;      }
163                                     else if(oligo[k] == 'Y' && (seq[k] != 'C' && seq[k] != 'T'))                                        {       numDiff++;      }
164                                     else if(oligo[k] == 'M' && (seq[k] != 'C' && seq[k] != 'A'))                                        {       numDiff++;      }
165                                     else if(oligo[k] == 'K' && (seq[k] != 'T' && seq[k] != 'G'))                                        {       numDiff++;      }
166                                     else if(oligo[k] == 'W' && (seq[k] != 'T' && seq[k] != 'A'))                                        {       numDiff++;      }
167                                     else if(oligo[k] == 'S' && (seq[k] != 'C' && seq[k] != 'G'))                                        {       numDiff++;      }
168                                     else if(oligo[k] == 'B' && (seq[k] != 'C' && seq[k] != 'T' && seq[k] != 'G'))       {       numDiff++;      }
169                                     else if(oligo[k] == 'D' && (seq[k] != 'A' && seq[k] != 'T' && seq[k] != 'G'))       {       numDiff++;      }
170                                     else if(oligo[k] == 'H' && (seq[k] != 'A' && seq[k] != 'T' && seq[k] != 'C'))       {       numDiff++;      }
171                                     else if(oligo[k] == 'V' && (seq[k] != 'A' && seq[k] != 'C' && seq[k] != 'G'))       {       numDiff++;      }
172                                     else if(oligo[k] == 'A' && (seq[k] != 'A' && seq[k] != 'M' && seq[k] != 'R' && seq[k] != 'W' && seq[k] != 'D' && seq[k] != 'H' && seq[k] != 'V'))       {   numDiff++;      }
173                                     else if(oligo[k] == 'C' && (seq[k] != 'C' && seq[k] != 'Y' && seq[k] != 'M' && seq[k] != 'S' && seq[k] != 'B' && seq[k] != 'H' && seq[k] != 'V'))       {   numDiff++;      }
174                                     else if(oligo[k] == 'G' && (seq[k] != 'G' && seq[k] != 'R' && seq[k] != 'K' && seq[k] != 'S' && seq[k] != 'B' && seq[k] != 'D' && seq[k] != 'V'))       {   numDiff++;      }
175                                     else if(oligo[k] == 'T' && (seq[k] != 'T' && seq[k] != 'Y' && seq[k] != 'K' && seq[k] != 'W' && seq[k] != 'B' && seq[k] != 'D' && seq[k] != 'H'))       {   numDiff++;      }
176                                     else if((oligo[k] == '.' || oligo[k] == '-'))           {   numDiff++;      }
177                                 }
178                             }
179                             ///////////////////////////////////////////////////////////////////////
180                             
181                             if(numDiff <= pDataArray->pdiffs){
182                                 primerStarts.push_back(j);
183                                 primerEnds.push_back(j+pDataArray->length);
184                                 mismatches.push_back(numDiff);
185                                 found = true;
186                             }
187                         }
188                     }
189                     ///////////////////////////////////////////////////////////////////////////////////////////////////
190                     
191                     //if we found it report to the table
192                     if (found) {
193                         for (int j = 0; j < primerStarts.size(); j++) {
194                             outSum << (i+1) << '\t' << *it << '\t' << primerStarts[j] << '\t' << primerEnds[j] << '\t' << pDataArray->length << '\t' << mismatches[j] << '\t' << pDataArray->minTms[primerIndex] << '\t' << pDataArray->maxTms[primerIndex] << endl;
195                         }
196                         pDataArray->otusToRemove.insert(i);
197                     }
198                     primerIndex++;
199                 }
200             }
201             pDataArray->numBinsProcessed++;
202         }
203         outSum.close();
204         
205         }
206         catch(exception& e) {
207                 pDataArray->m->errorOut(e, "PrimerDesignCommand", "MyPrimerThreadFunction");
208                 exit(1);
209         }
210
211 #endif
212
213 /**************************************************************************************************/
214
215
216
217
218
219 #endif