5 * Created by Sarah Westcott on 8/11/09.
6 * Copyright 2009 __MyCompanyName__. All rights reserved.
12 //***************************************************************************************************************
14 Mallard::Mallard(string filename) { fastafile = filename; }
15 //***************************************************************************************************************
19 for (int i = 0; i < querySeqs.size(); i++) { delete querySeqs[i]; }
22 errorOut(e, "Mallard", "~Mallard");
26 //***************************************************************************************************************
27 void Mallard::print(ostream& out) {
30 for (int i = 0; i < querySeqs.size(); i++) {
32 out << querySeqs[i]->getName() << "\thighest de value = " << highestDE[i] << "\tpenalty score = " << marked[i] << endl;
33 cout << querySeqs[i]->getName() << "\tpenalty score = " << marked[i] << endl;
38 errorOut(e, "Mallard", "print");
43 //***************************************************************************************************************
44 void Mallard::getChimeras() {
47 //read in query sequences and subject sequences
48 mothurOut("Reading sequences and template file... "); cout.flush();
49 querySeqs = readSeqs(fastafile);
50 mothurOut("Done."); mothurOutEndLine();
52 int numSeqs = querySeqs.size();
54 windowSizes.resize(numSeqs, window);
55 quantilesMembers.resize(100); //one for every percent mismatch
56 highestDE.resize(numSeqs, 0.0); //contains the highest de value for each seq
58 //break up file if needed
59 int linesPerProcess = numSeqs / processors ;
61 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
62 //find breakup of sequences for all times we will Parallelize
63 if (processors == 1) { lines.push_back(new linePair(0, numSeqs)); }
66 for (int i = 0; i < (processors-1); i++) {
67 lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
69 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
70 int i = processors - 1;
71 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
75 lines.push_back(new linePair(0, numSeqs));
78 decalc = new DeCalculator();
80 //if the user does enter a mask then you want to keep all the spots in the alignment
81 if (seqMask.length() == 0) { decalc->setAlignmentLength(querySeqs[0]->getAligned().length()); }
82 else { decalc->setAlignmentLength(seqMask.length()); }
84 decalc->setMask(seqMask);
87 mothurOut("Getting conservation... "); cout.flush();
88 probabilityProfile = decalc->calcFreq(querySeqs, fastafile);
91 for (int i = 0; i < probabilityProfile.size(); i++) { probabilityProfile[i] = 1 - probabilityProfile[i]; }
92 mothurOut("Done."); mothurOutEndLine();
94 //mask sequences if the user wants to
97 for (int i = 0; i < querySeqs.size(); i++) {
98 decalc->runMask(querySeqs[i]);
102 mothurOut("Calculating DE values..."); cout.flush();
103 if (processors == 1) {
104 quantilesMembers = decalc->getQuantiles(querySeqs, windowSizes, window, probabilityProfile, increment, 0, querySeqs.size(), highestDE);
105 }else { createProcessesQuan(); }
106 mothurOut("Done."); mothurOutEndLine();
108 mothurOut("Ranking outliers..."); cout.flush();
109 marked = decalc->returnObviousOutliers(quantilesMembers, querySeqs.size());
110 mothurOut("Done."); mothurOutEndLine();
114 for (int i = 0; i < lines.size(); i++) { delete lines[i]; }
117 catch(exception& e) {
118 errorOut(e, "Mallard", "getChimeras");
122 /**************************************************************************************************/
124 void Mallard::createProcessesQuan() {
126 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
128 vector<int> processIDS;
130 //loop through and create all the processes you want
131 while (process != processors) {
135 processIDS.push_back(pid);
139 quantilesMembers = decalc->getQuantiles(querySeqs, windowSizes, window, probabilityProfile, increment, lines[process]->start, lines[process]->end, highestDE);
141 //write out data to file so parent can read it
143 string s = toString(getpid()) + ".temp";
144 openOutputFile(s, out);
147 //output observed distances
148 for (int i = 0; i < quantilesMembers.size(); i++) {
149 out << quantilesMembers[i].size() << '\t';
150 for (int j = 0; j < quantilesMembers[i].size(); j++) {
151 out << quantilesMembers[i][j].score << '\t' << quantilesMembers[i][j].member1 << '\t' << quantilesMembers[i][j].member2 << '\t';
156 out << highestDE.size() << endl;
157 for (int i = 0; i < highestDE.size(); i++) {
158 out << highestDE[i] << '\t';
165 }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
168 //force parent to wait until all the processes are done
169 for (int i=0;i<processors;i++) {
170 int temp = processIDS[i];
174 //get data created by processes
175 for (int i=0;i<processors;i++) {
177 string s = toString(processIDS[i]) + ".temp";
178 openInputFile(s, in);
180 vector< vector<quanMember> > quan;
184 for (int m = 0; m < quan.size(); m++) {
190 vector<quanMember> q; float w; int b, n;
191 for (int j = 0; j < num; j++) {
193 //cout << w << '\t' << b << '\t' n << endl;
194 quanMember newMember(w, b, n);
195 q.push_back(newMember);
197 //cout << "here" << endl;
199 //cout << "now here" << endl;
204 //save quan in quantiles
205 for (int j = 0; j < quan.size(); j++) {
206 //put all values of q[i] into quan[i]
207 for (int l = 0; l < quan[j].size(); l++) { quantilesMembers[j].push_back(quan[j][l]); }
208 //quantilesMembers[j].insert(quantilesMembers[j].begin(), quan[j].begin(), quan[j].end());
212 in >> num; gobble(in);
214 int count = lines[process]->start;
215 for (int s = 0; s < num; s++) {
219 highestDE[count] = high;
228 quantilesMembers = decalc->getQuantiles(querySeqs, windowSizes, window, probabilityProfile, increment, 0, querySeqs.size(), highestDE);
231 catch(exception& e) {
232 errorOut(e, "Mallard", "createProcessesQuan");
236 //***************************************************************************************************************