5 * Created by westcott on 8/24/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
11 #include "ignoregaps.h"
14 //***************************************************************************************************************
15 Ccode::Ccode(string filename, string temp) { fastafile = filename; templateFile = temp; }
16 //***************************************************************************************************************
20 for (int i = 0; i < querySeqs.size(); i++) { delete querySeqs[i]; }
21 for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; }
25 errorOut(e, "Ccode", "~Ccode");
29 //***************************************************************************************************************
30 void Ccode::print(ostream& out) {
35 for (int i = 0; i < querySeqs.size(); i++) {
40 errorOut(e, "Ccode", "print");
45 //***************************************************************************************************************
46 void Ccode::getChimeras() {
49 //read in query sequences and subject sequences
50 mothurOut("Reading sequences and template file... "); cout.flush();
51 querySeqs = readSeqs(fastafile);
52 templateSeqs = readSeqs(templateFile);
53 mothurOut("Done."); mothurOutEndLine();
55 int numSeqs = querySeqs.size();
57 closest.resize(numSeqs);
59 //break up file if needed
60 int linesPerProcess = numSeqs / processors ;
62 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
63 //find breakup of sequences for all times we will Parallelize
64 if (processors == 1) { lines.push_back(new linePair(0, numSeqs)); }
67 for (int i = 0; i < (processors-1); i++) {
68 lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
70 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
71 int i = processors - 1;
72 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
75 //find breakup of templatefile for quantiles
76 if (processors == 1) { templateLines.push_back(new linePair(0, templateSeqs.size())); }
78 for (int i = 0; i < processors; i++) {
79 templateLines.push_back(new linePair());
80 templateLines[i]->start = int (sqrt(float(i)/float(processors)) * templateSeqs.size());
81 templateLines[i]->end = int (sqrt(float(i+1)/float(processors)) * templateSeqs.size());
85 lines.push_back(new linePair(0, numSeqs));
86 templateLines.push_back(new linePair(0, templateSeqs.size()));
89 distCalc = new ignoreGaps();
92 if (processors == 1) {
93 mothurOut("Finding top matches for sequences... "); cout.flush();
94 closest = findClosest(lines[0]->start, lines[0]->end, numWanted);
95 mothurOut("Done."); mothurOutEndLine();
96 }else { createProcessesClosest(); }
99 for (int i = 0; i < closest.size(); i++) {
100 cout << querySeqs[i]->getName() << ": ";
101 for (int j = 0; j < closest[i].size(); j++) {
103 cout << closest[i][j]->getName() << '\t';
109 for (int i = 0; i < lines.size(); i++) { delete lines[i]; }
110 for (int i = 0; i < templateLines.size(); i++) { delete templateLines[i]; }
113 catch(exception& e) {
114 errorOut(e, "Ccode", "getChimeras");
118 /***************************************************************************************************************
119 vector<int> Ccode::findWindows() {
124 if (increment > querySeqs[0]->getAligned().length()) { mothurOut("You have selected an increment larger than the length of your sequences. I will use the default of 25."); increment = 25; }
126 for (int m = increment; m < (querySeqs[0]->getAligned().length() - increment); m+=increment) { win.push_back(m); }
131 catch(exception& e) {
132 errorOut(e, "Ccode", "findWindows");
137 //***************************************************************************************************************
138 vector< vector<Sequence*> > Ccode::findClosest(int start, int end, int numWanted) {
141 vector< vector<Sequence*> > topMatches; topMatches.resize(querySeqs.size());
143 float smallestOverall, smallestLeft, smallestRight;
144 smallestOverall = 1000; smallestLeft = 1000; smallestRight = 1000;
146 //for each sequence in querySeqs - find top matches to use as reference
147 for(int j = start; j < end; j++){
149 Sequence query = *(querySeqs[j]);
151 vector<SeqDist> distances;
153 //calc distance to each sequence in template seqs
154 for (int i = 0; i < templateSeqs.size(); i++) {
156 Sequence ref = *(templateSeqs[i]);
159 distCalc->calcDist(query, ref);
160 float dist = distCalc->getDist();
164 temp.seq = templateSeqs[i];
167 distances.push_back(temp);
170 sort(distances.begin(), distances.end(), compareSeqDist);
172 //save the number of top matches wanted
173 for (int h = 0; h < numWanted; h++) {
174 topMatches[j].push_back(distances[h].seq);
181 catch(exception& e) {
182 errorOut(e, "Ccode", "findClosestSides");
186 /**************************************************************************************************/
187 void Ccode::createProcessesClosest() {
189 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
191 vector<int> processIDS;
193 //loop through and create all the processes you want
194 while (process != processors) {
198 processIDS.push_back(pid);
202 mothurOut("Finding top matches for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
203 closest = findClosest(lines[process]->start, lines[process]->end, numWanted);
204 mothurOut("Done finding top matches for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
206 //write out data to file so parent can read it
208 string s = toString(getpid()) + ".temp";
209 openOutputFile(s, out);
212 for (int i = lines[process]->start; i < lines[process]->end; i++) {
213 out << closest[i].size() << endl;
214 for (int j = 0; j < closest[i].size(); j++) {
215 closest[i][j]->printSequence(out);
221 }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
224 //force parent to wait until all the processes are done
225 for (int i=0;i<processors;i++) {
226 int temp = processIDS[i];
230 //get data created by processes
231 for (int i=0;i<processors;i++) {
233 string s = toString(processIDS[i]) + ".temp";
234 openInputFile(s, in);
237 for (int k = lines[i]->start; k < lines[i]->end; k++) {
242 vector<Sequence*> tempVector;
244 for (int j = 0; j < size; j++) {
246 Sequence* temp = new Sequence(in);
249 tempVector.push_back(temp);
252 closest[k] = tempVector;
261 closest = findClosest(lines[0]->start, lines[0]->end, numWanted);
265 catch(exception& e) {
266 errorOut(e, "Ccode", "createProcessesClosest");
271 //***************************************************************************************************************