]> git.donarmstrong.com Git - mothur.git/blob - mallard.cpp
chimera.seqs pintail is working.
[mothur.git] / mallard.cpp
1 /*
2  *  mallard.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 8/11/09.
6  *  Copyright 2009 __MyCompanyName__. All rights reserved.
7  *
8  */
9
10 #include "mallard.h"
11
12 //***************************************************************************************************************
13
14 Mallard::Mallard(string filename) {  fastafile = filename;  }
15 //***************************************************************************************************************
16
17 Mallard::~Mallard() {
18         try {
19                 for (int i = 0; i < querySeqs.size(); i++)              {  delete querySeqs[i];         }
20         }
21         catch(exception& e) {
22                 errorOut(e, "Mallard", "~Mallard");
23                 exit(1);
24         }
25 }       
26 //***************************************************************************************************************
27 void Mallard::print(ostream& out) {
28         try {
29                 
30                 for (int i = 0; i < querySeqs.size(); i++) {
31                         
32                         out << querySeqs[i]->getName() << "\thighest de value = " << highestDE[i] << "\tpenalty score = " << marked[i] << endl;
33                         cout << querySeqs[i]->getName() << "\tpenalty score = " << marked[i] << endl;
34                                                 
35                 }
36         }
37         catch(exception& e) {
38                 errorOut(e, "Mallard", "print");
39                 exit(1);
40         }
41 }
42
43 //***************************************************************************************************************
44 void Mallard::getChimeras() {
45         try {
46                 
47                 //read in query sequences and subject sequences
48                 mothurOut("Reading sequences and template file... "); cout.flush();
49                 querySeqs = readSeqs(fastafile);
50                 mothurOut("Done."); mothurOutEndLine();
51                 
52                 int numSeqs = querySeqs.size();
53                 
54                 windowSizes.resize(numSeqs, window);
55                 quantilesMembers.resize(100);  //one for every percent mismatch
56                 highestDE.resize(numSeqs, 0.0);  //contains the highest de value for each seq
57                 
58                 //break up file if needed
59                 int linesPerProcess = numSeqs / processors ;
60                 
61                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
62                         //find breakup of sequences for all times we will Parallelize
63                         if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
64                         else {
65                                 //fill line pairs
66                                 for (int i = 0; i < (processors-1); i++) {                      
67                                         lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
68                                 }
69                                 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
70                                 int i = processors - 1;
71                                 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
72                         }
73                         
74                 #else
75                         lines.push_back(new linePair(0, numSeqs));
76                 #endif
77                 
78                 decalc = new DeCalculator();
79                 
80                 //if the user does enter a mask then you want to keep all the spots in the alignment
81                 if (seqMask.length() == 0)      {       decalc->setAlignmentLength(querySeqs[0]->getAligned().length());        }
82                 else                                            {       decalc->setAlignmentLength(seqMask.length());                                           }
83                 
84                 decalc->setMask(seqMask);
85                                 
86                 //find P
87                 mothurOut("Getting conservation... "); cout.flush();
88                 probabilityProfile = decalc->calcFreq(querySeqs, fastafile); 
89         
90                 //make P into Q
91                 for (int i = 0; i < probabilityProfile.size(); i++)  {  probabilityProfile[i] = 1 - probabilityProfile[i];  } 
92                 mothurOut("Done."); mothurOutEndLine();
93                 
94                 //mask sequences if the user wants to 
95                 if (seqMask != "") {
96                         //mask querys
97                         for (int i = 0; i < querySeqs.size(); i++) {
98                                 decalc->runMask(querySeqs[i]);
99                         }
100                 }
101                                                 
102                 mothurOut("Calculating DE values..."); cout.flush();
103                 if (processors == 1) { 
104                         quantilesMembers = decalc->getQuantiles(querySeqs, windowSizes, window, probabilityProfile, increment, 0, querySeqs.size(), highestDE);
105                 }else {         createProcessesQuan();          }
106                 mothurOut("Done."); mothurOutEndLine();
107                 
108                 mothurOut("Ranking outliers..."); cout.flush();
109                 marked = decalc->returnObviousOutliers(quantilesMembers, querySeqs.size());
110                 mothurOut("Done."); mothurOutEndLine();
111
112         
113                 //free memory
114                 for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];                }               
115                 delete decalc;
116         }
117         catch(exception& e) {
118                 errorOut(e, "Mallard", "getChimeras");
119                 exit(1);
120         }
121 }
122 /**************************************************************************************************/
123
124 void Mallard::createProcessesQuan() {
125         try {
126 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
127                 int process = 0;
128                 vector<int> processIDS;
129                                 
130                 //loop through and create all the processes you want
131                 while (process != processors) {
132                         int pid = fork();
133                         
134                         if (pid > 0) {
135                                 processIDS.push_back(pid);  
136                                 process++;
137                         }else if (pid == 0){
138                                 
139                                 quantilesMembers = decalc->getQuantiles(querySeqs, windowSizes, window, probabilityProfile, increment, lines[process]->start, lines[process]->end, highestDE);
140                                 
141                                 //write out data to file so parent can read it
142                                 ofstream out;
143                                 string s = toString(getpid()) + ".temp";
144                                 openOutputFile(s, out);
145                                 
146                                                                 
147                                 //output observed distances
148                                 for (int i = 0; i < quantilesMembers.size(); i++) {
149                                         out << quantilesMembers[i].size() << '\t';
150                                         for (int j = 0; j < quantilesMembers[i].size(); j++) {
151                                                 out << quantilesMembers[i][j].score << '\t' << quantilesMembers[i][j].member1 << '\t' << quantilesMembers[i][j].member2 << '\t';
152                                         }
153                                         out << endl;
154                                 }
155                                 
156                                 out << highestDE.size() << endl;
157                                 for (int i = 0; i < highestDE.size(); i++) {
158                                         out << highestDE[i] << '\t';
159                                 }
160                                 out << endl;
161                                 
162                                 out.close();
163                                 
164                                 exit(0);
165                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
166                 }
167                 
168                 //force parent to wait until all the processes are done
169                 for (int i=0;i<processors;i++) { 
170                         int temp = processIDS[i];
171                         wait(&temp);
172                 }
173
174                 //get data created by processes
175                 for (int i=0;i<processors;i++) { 
176                         ifstream in;
177                         string s = toString(processIDS[i]) + ".temp";
178                         openInputFile(s, in);
179                         
180                         vector< vector<quanMember> > quan; 
181                         quan.resize(100);
182                         
183                         //get quantiles
184                         for (int m = 0; m < quan.size(); m++) {
185                                 int num;
186                                 in >> num; 
187                                 
188                                 gobble(in);
189
190                                 vector<quanMember> q;  float w; int b, n;
191                                 for (int j = 0; j < num; j++) {
192                                         in >> w >> b >> n;
193         //cout << w << '\t' << b << '\t' n << endl;
194                                         quanMember newMember(w, b, n);
195                                         q.push_back(newMember);
196                                 }
197 //cout << "here" << endl;
198                                 quan[m] = q;
199 //cout << "now here" << endl;
200                                 gobble(in);
201                         }
202                         
203         
204                         //save quan in quantiles
205                         for (int j = 0; j < quan.size(); j++) {
206                                 //put all values of q[i] into quan[i]
207                                 for (int l = 0; l < quan[j].size(); l++) {  quantilesMembers[j].push_back(quan[j][l]);   }
208                                 //quantilesMembers[j].insert(quantilesMembers[j].begin(), quan[j].begin(), quan[j].end());
209                         }
210                         
211                         int num;
212                         in >> num;  gobble(in);
213                         
214                         int count = lines[process]->start;
215                         for (int s = 0; s < num; s++) {
216                                 float high;
217                                 in >> high;
218                                 
219                                 highestDE[count] = high;
220                                 count++;
221                         }
222                         
223                         in.close();
224                         remove(s.c_str());
225                 }
226                 
227 #else
228                 quantilesMembers = decalc->getQuantiles(querySeqs, windowSizes, window, probabilityProfile, increment, 0, querySeqs.size(), highestDE);
229 #endif          
230         }
231         catch(exception& e) {
232                 errorOut(e, "Mallard", "createProcessesQuan");
233                 exit(1);
234         }
235 }
236 //***************************************************************************************************************
237
238