]> git.donarmstrong.com Git - mothur.git/blob - ccode.cpp
checking in chimera files in progress after move to michigan
[mothur.git] / ccode.cpp
1 /*
2  *  ccode.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 8/24/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "ccode.h"
11 #include "ignoregaps.h"
12
13
14 //***************************************************************************************************************
15 Ccode::Ccode(string filename, string temp) {  fastafile = filename;  templateFile = temp;  }
16 //***************************************************************************************************************
17
18 Ccode::~Ccode() {
19         try {
20                 for (int i = 0; i < querySeqs.size(); i++)              {  delete querySeqs[i];         }
21                 for (int i = 0; i < templateSeqs.size(); i++)   {  delete templateSeqs[i];      }
22                 delete distCalc;
23         }
24         catch(exception& e) {
25                 errorOut(e, "Ccode", "~Ccode");
26                 exit(1);
27         }
28 }       
29 //***************************************************************************************************************
30 void Ccode::print(ostream& out) {
31         try {
32                 
33                 mothurOutEndLine();
34                 
35                 for (int i = 0; i < querySeqs.size(); i++) {
36                         
37                 }
38         }
39         catch(exception& e) {
40                 errorOut(e, "Ccode", "print");
41                 exit(1);
42         }
43 }
44
45 //***************************************************************************************************************
46 void Ccode::getChimeras() {
47         try {
48                 
49                 //read in query sequences and subject sequences
50                 mothurOut("Reading sequences and template file... "); cout.flush();
51                 querySeqs = readSeqs(fastafile);
52                 templateSeqs = readSeqs(templateFile);
53                 mothurOut("Done."); mothurOutEndLine();
54                 
55                 int numSeqs = querySeqs.size();
56                 
57                 closest.resize(numSeqs);
58                 
59                 //break up file if needed
60                 int linesPerProcess = numSeqs / processors ;
61                 
62                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
63                         //find breakup of sequences for all times we will Parallelize
64                         if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
65                         else {
66                                 //fill line pairs
67                                 for (int i = 0; i < (processors-1); i++) {                      
68                                         lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
69                                 }
70                                 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
71                                 int i = processors - 1;
72                                 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
73                         }
74                         
75                         //find breakup of templatefile for quantiles
76                         if (processors == 1) {   templateLines.push_back(new linePair(0, templateSeqs.size()));  }
77                         else { 
78                                 for (int i = 0; i < processors; i++) {
79                                         templateLines.push_back(new linePair());
80                                         templateLines[i]->start = int (sqrt(float(i)/float(processors)) * templateSeqs.size());
81                                         templateLines[i]->end = int (sqrt(float(i+1)/float(processors)) * templateSeqs.size());
82                                 }
83                         }
84                 #else
85                         lines.push_back(new linePair(0, numSeqs));
86                         templateLines.push_back(new linePair(0, templateSeqs.size()));
87                 #endif
88         
89                 distCalc = new ignoreGaps();
90                 
91                 //find closest
92                 if (processors == 1) { 
93                         mothurOut("Finding top matches for sequences... "); cout.flush();
94                         closest = findClosest(lines[0]->start, lines[0]->end, numWanted);
95                         mothurOut("Done."); mothurOutEndLine();
96                 }else {         createProcessesClosest();               }
97
98                 
99                 for (int i = 0; i < closest.size(); i++) {
100                         cout << querySeqs[i]->getName() << ": ";
101                         for (int j = 0; j < closest[i].size(); j++) {
102                         
103                                 cout << closest[i][j]->getName() << '\t';
104                         }
105                         cout << endl;
106                 }
107                         
108                 //free memory
109                 for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];                                }
110                 for (int i = 0; i < templateLines.size(); i++)                  {       delete templateLines[i];                }
111                         
112         }
113         catch(exception& e) {
114                 errorOut(e, "Ccode", "getChimeras");
115                 exit(1);
116         }
117 }
118 /***************************************************************************************************************
119 vector<int> Ccode::findWindows() {
120         try {
121                 
122                 vector<int> win; 
123                 
124                 if (increment > querySeqs[0]->getAligned().length()) {  mothurOut("You have selected an increment larger than the length of your sequences.  I will use the default of 25.");  increment = 25; }
125                 
126                 for (int m = increment;  m < (querySeqs[0]->getAligned().length() - increment); m+=increment) {  win.push_back(m);  }
127
128                 return win;
129         
130         }
131         catch(exception& e) {
132                 errorOut(e, "Ccode", "findWindows");
133                 exit(1);
134         }
135 }
136 */
137 //***************************************************************************************************************
138 vector< vector<Sequence*> > Ccode::findClosest(int start, int end, int numWanted) {
139         try{
140         
141                 vector< vector<Sequence*> > topMatches;  topMatches.resize(querySeqs.size());
142         
143                 float smallestOverall, smallestLeft, smallestRight;
144                 smallestOverall = 1000;  smallestLeft = 1000;  smallestRight = 1000;
145                 
146                 //for each sequence in querySeqs - find top matches to use as reference
147                 for(int j = start; j < end; j++){
148                         
149                         Sequence query = *(querySeqs[j]);
150                         
151                         vector<SeqDist> distances;
152                         
153                         //calc distance to each sequence in template seqs
154                         for (int i = 0; i < templateSeqs.size(); i++) {
155                         
156                                 Sequence ref = *(templateSeqs[i]); 
157                                         
158                                 //find overall dist
159                                 distCalc->calcDist(query, ref);
160                                 float dist = distCalc->getDist();       
161                                 
162                                 //save distance
163                                 SeqDist temp;
164                                 temp.seq = templateSeqs[i];
165                                 temp.dist = dist;
166
167                                 distances.push_back(temp);
168                         }
169                         
170                         sort(distances.begin(), distances.end(), compareSeqDist);
171                         
172                         //save the number of top matches wanted
173                         for (int h = 0; h < numWanted; h++) {
174                                 topMatches[j].push_back(distances[h].seq);
175                         }
176                 }
177                         
178                 return topMatches;
179
180         }
181         catch(exception& e) {
182                 errorOut(e, "Ccode", "findClosestSides");
183                 exit(1);
184         }
185 }
186 /**************************************************************************************************/
187 void Ccode::createProcessesClosest() {
188         try {
189 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
190                 int process = 0;
191                 vector<int> processIDS;
192                 
193                 //loop through and create all the processes you want
194                 while (process != processors) {
195                         int pid = fork();
196                         
197                         if (pid > 0) {
198                                 processIDS.push_back(pid);  
199                                 process++;
200                         }else if (pid == 0){
201                                 
202                                 mothurOut("Finding top matches for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
203                                 closest = findClosest(lines[process]->start, lines[process]->end, numWanted);
204                                 mothurOut("Done finding top matches for sequences " +  toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
205                                 
206                                 //write out data to file so parent can read it
207                                 ofstream out;
208                                 string s = toString(getpid()) + ".temp";
209                                 openOutputFile(s, out);
210                                 
211                                 //output pairs
212                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
213                                          out << closest[i].size() << endl;
214                                          for (int j = 0; j < closest[i].size(); j++) {
215                                                 closest[i][j]->printSequence(out);
216                                          }
217                                 }
218                                 out.close();
219                                 
220                                 exit(0);
221                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
222                 }
223                 
224                 //force parent to wait until all the processes are done
225                 for (int i=0;i<processors;i++) { 
226                         int temp = processIDS[i];
227                         wait(&temp);
228                 }
229                 
230                 //get data created by processes
231                 for (int i=0;i<processors;i++) { 
232                         ifstream in;
233                         string s = toString(processIDS[i]) + ".temp";
234                         openInputFile(s, in);
235                         
236                         //get pairs
237                         for (int k = lines[i]->start; k < lines[i]->end; k++) {
238                                 int size;
239                                 in >> size;
240                                 gobble(in);
241                                 
242                                 vector<Sequence*> tempVector;
243                                 
244                                 for (int j = 0; j < size; j++) {
245                                 
246                                         Sequence* temp = new Sequence(in);
247                                         gobble(in);
248                                                 
249                                         tempVector.push_back(temp);
250                                 }
251                                 
252                                 closest[k] = tempVector;
253                         }
254                         
255                         in.close();
256                         remove(s.c_str());
257                 }
258                         
259         
260 #else
261                 closest = findClosest(lines[0]->start, lines[0]->end, numWanted);
262 #endif  
263
264         }
265         catch(exception& e) {
266                 errorOut(e, "Ccode", "createProcessesClosest");
267                 exit(1);
268         }
269 }
270
271 //***************************************************************************************************************
272