]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayer.cpp
started adding chimeraslayer method and fixed minor bug in treegroupscommand deconstr...
[mothur.git] / chimeraslayer.cpp
1 /*
2  *  chimeraslayer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/25/09.
6  *  Copyright 2009 Pschloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraslayer.h"
11
12 //***************************************************************************************************************
13 ChimeraSlayer::ChimeraSlayer(string filename, string temp) {  fastafile = filename;  templateFile = temp;  }
14 //***************************************************************************************************************
15
16 ChimeraSlayer::~ChimeraSlayer() {
17         try {
18                 for (int i = 0; i < querySeqs.size(); i++)                      {  delete querySeqs[i];                 }
19                 for (int i = 0; i < templateSeqs.size(); i++)           {  delete templateSeqs[i];              }
20         }
21         catch(exception& e) {
22                 errorOut(e, "ChimeraSlayer", "~ChimeraSlayer");
23                 exit(1);
24         }
25 }       
26 //***************************************************************************************************************
27 void ChimeraSlayer::print(ostream& out) {
28         try {
29                 
30                 mothurOutEndLine();
31                 
32                                 
33         }
34         catch(exception& e) {
35                 errorOut(e, "ChimeraSlayer", "print");
36                 exit(1);
37         }
38 }
39
40 //***************************************************************************************************************
41 void ChimeraSlayer::getChimeras() {
42         try {
43                 
44                 //read in query sequences and subject sequences
45                 mothurOut("Reading sequences and template file... "); cout.flush();
46                 querySeqs = readSeqs(fastafile);
47                 templateSeqs = readSeqs(templateFile);
48                 mothurOut("Done."); mothurOutEndLine();
49                 
50                 int numSeqs = querySeqs.size();
51                 
52                 //break up file if needed
53                 int linesPerProcess = numSeqs / processors ;
54                 
55                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
56                         //find breakup of sequences for all times we will Parallelize
57                         if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
58                         else {
59                                 //fill line pairs
60                                 for (int i = 0; i < (processors-1); i++) {                      
61                                         lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
62                                 }
63                                 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
64                                 int i = processors - 1;
65                                 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
66                         }
67                 #else
68                         lines.push_back(new linePair(0, numSeqs));
69                 #endif
70                 
71                 if (seqMask != "") {    decalc = new DeCalculator();    } //to use below
72                 
73                 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
74                 maligner = new Maligner(templateSeqs, numWanted, match, misMatch, 1.01, minSim);
75                 slayer = new Slayer(window, increment, minSim, divR);
76                 
77                 for (int i = 0; i < querySeqs.size(); i++) {
78                 
79                         string chimeraFlag = maligner->getResults(querySeqs[i]);
80                         float percentIdentical = maligner->getPercentID();
81                         vector<results> Results = maligner->getOutput();
82                         
83                         cout << querySeqs[4]->getName() << '\t' << chimeraFlag << '\t' << percentIdentical << endl;
84                         
85                         for (int j = 0; j < Results.size(); j++) {
86                                 cout << "regionStart = " << Results[j].regionStart << "\tRegionEnd = " << Results[j].regionEnd << "\tName = " << Results[j].parent << "\tPerQP = " << Results[j].queryToParent << "\tLocalPerQP = " << Results[j].queryToParentLocal << "\tdivR = " << Results[j].divR << endl;
87                                 
88                         }
89                         
90                         if (chimeraFlag == "yes") {
91                         
92                                 //get sequence that were given from maligner results
93                                 vector<SeqDist> seqs;
94                                 for (int j = 0; j < Results.size(); j++) {
95                                         Sequence* seq = getSequence(Results[j].parent); //makes copy so you can filter and mask and not effect template
96                                         
97                                         //seq = NULL if error occurred in getSequence
98                                         if (seq == NULL) {  break;      }
99                                         else {  
100                                                 SeqDist member;
101                                                 member.seq = seq;
102                                                 member.dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
103                                                 seqs.push_back(member); 
104                                         }
105                                 }
106                         
107                                 //limit number of parents to explore - default 5
108                                 if (Results.size() > parents) {
109                                         //sort by distance
110                                         sort(seqs.begin(), seqs.end(), compareSeqDist);
111                                         //prioritize larger more similiar sequence fragments
112                                         reverse(seqs.begin(), seqs.end());
113                                         
114                                         for (int k = seqs.size()-1; k > (parents-1); k--)  {  
115                                                 delete seqs[k].seq;
116                                                 seqs.pop_back();        
117                                         }
118                                 }
119                                 
120                                 //put seqs into vector to send to slayer
121                                 vector<Sequence*> seqsForSlayer;
122                                 for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
123                         
124                                 //mask then send to slayer...
125                                 if (seqMask != "") {
126                                         decalc->setMask(seqMask);
127
128                                         //mask querys
129                                         decalc->runMask(querySeqs[i]);
130                                         
131                                         //mask parents
132                                         for (int k = 0; k < seqsForSlayer.size(); k++) {
133                                                 decalc->runMask(seqsForSlayer[k]);
134                                         }
135                                         
136                                 }
137                                 
138                                 //send to slayer
139                                 slayer->getResults(querySeqs[i], seqsForSlayer);
140                                 
141                                 //free memory
142                                 for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
143                         }
144                         
145                 }       
146                 //free memory
147                 for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];        }
148                 
149                 if (seqMask != "") {
150                         delete decalc; 
151                 }
152
153                         
154         }
155         catch(exception& e) {
156                 errorOut(e, "ChimeraSlayer", "getChimeras");
157                 exit(1);
158         }
159 }
160 //***************************************************************************************************************
161 Sequence* ChimeraSlayer::getSequence(string name) {
162         try{
163                 Sequence* temp;
164                 
165                 //look through templateSeqs til you find it
166                 int spot = -1;
167                 for (int i = 0; i < templateSeqs.size(); i++) {
168                         if (name == templateSeqs[i]->getName()) {  
169                                 spot = i;
170                                 break;
171                         }
172                 }
173                 
174                 if(spot == -1) { mothurOut("Error: Could not find sequence in chimeraSlayer."); mothurOutEndLine(); return NULL; }
175                 
176                 temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
177                 
178                 return temp;
179         }
180         catch(exception& e) {
181                 errorOut(e, "ChimeraSlayer", "getSequence");
182                 exit(1);
183         }
184 }
185 //***************************************************************************************************************
186
187
188