]> git.donarmstrong.com Git - mothur.git/blobdiff - chimera.h
changes while testing
[mothur.git] / chimera.h
index 9142bacaa09a016d1cabd25560f1e428a699a15c..e187bfc60d8d504d80561d4347f9a838f1ddabee 100644 (file)
--- a/chimera.h
+++ b/chimera.h
 
 
 #include "mothur.h"
-#include "sparsematrix.hpp"
 #include "sequence.hpp"
+/***********************************************************************/
+struct data_struct { 
+       float divr_qla_qrb;
+       float divr_qlb_qra;
+       float qla_qrb;
+       float qlb_qra;
+       float qla;
+       float qrb;
+       float ab; 
+       float qa;
+       float qb; 
+       float lab; 
+       float rab; 
+       float qra; 
+       float qlb; 
+       int winLStart;
+       int winLEnd; 
+       int winRStart; 
+       int winREnd; 
+       Sequence querySeq; 
+       Sequence parentA;
+       Sequence parentB;
+       float bsa;
+       float bsb;
+       float bsMax;
+       float chimeraMax;
+       
+};
+/***********************************************************************/
+struct data_results {
+       vector<data_struct> results;
+       string flag;
+       Sequence trimQuery;
+       //results malignerResults;
+       
+       data_results(vector<data_struct> d, string f, map<int, int> s, Sequence t) : results(d), flag(f), trimQuery(t) {}
+       data_results() {}
+};
+/***********************************************************************/
+//sorts lowest to highest first by bsMax, then if tie by chimeraMax
+inline bool compareDataStruct(data_struct left, data_struct right){
+       if (left.bsMax < right.bsMax) { return true; }
+       else if (left.bsMax == right.bsMax) {
+               return (left.chimeraMax < right.chimeraMax);
+       }else { return false;   }
+} 
+/***********************************************************************/
+struct Preference {
+               string name;
+               string leftParent; //keep the name of closest left 
+               string rightParent; //keep the name of closest 
+               float score;  //preference score
+               float closestLeft;  //keep the closest left 
+               float closestRight; //keep the closest right 
+               int midpoint;
+               Preference() { name = ""; leftParent = ""; rightParent = ""; score = 0.0; closestLeft = 10000.0; closestRight = 10000.0; midpoint = 0;  }
+               ~Preference() {}
+};
+/***********************************************************************/
+struct score_struct {
+       int prev;
+       int score;
+       int row;
+       int col;
+//     int mismatches;
+};
+/***********************************************************************/
+struct trace_struct {
+       int col;
+       int oldCol;
+       int row;
+};
+/***********************************************************************/
+struct results {
+       int regionStart;
+       int regionEnd;
+       int nastRegionStart;
+       int nastRegionEnd;
+       string parent;
+       string parentAligned;
+       float queryToParent;
+       float queryToParentLocal;
+       float divR;
+};
+/***********************************************************************/
+struct SeqDist {
+       Sequence* seq;
+       float dist;
+       int index;
+};
+/***********************************************************************/
+struct SeqCompare {
+       Sequence seq;
+       float dist;
+       int index;
+};
+//********************************************************************************************************************
+//sorts lowest to highest
+inline bool compareRegionStart(results left, results right){
+       return (left.nastRegionStart < right.nastRegionStart);  
+} 
+//********************************************************************************************************************
+//sorts lowest to highest
+inline bool compareSeqDist(SeqDist left, SeqDist right){
+       return (left.dist < right.dist);        
+} 
+//********************************************************************************************************************
+//sorts lowest to highest
+inline bool compareSeqCompare(SeqCompare left, SeqCompare right){
+       return (left.dist < right.dist);        
+} 
+//********************************************************************************************************************
+struct sim {
+               string leftParent;
+               string rightParent; 
+               float score;  
+               int midpoint;
+};
 
-typedef list<PCell>::iterator MatData;
-typedef map<int, float> SeqMap;  //maps sequence to all distance for that seqeunce
+struct linePair {
+                       unsigned long long start;
+                       unsigned long long end;
+                       linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
+                       linePair(){}
+};
 
 
 /***********************************************************************/
@@ -25,80 +146,39 @@ class Chimera {
 
        public:
        
-               Chimera(){};
-               Chimera(string);
-               Chimera(string, string);
-               virtual ~Chimera(){};
-               virtual void setFilter(bool f)                  {       filter = f;                     }
-               virtual void setCorrection(bool c)              {       correction = c;         }
-               virtual void setProcessors(int p)               {       processors = p;         }
-               virtual void setWindow(int w)                   {       window = w;                     }
-               virtual void setIncrement(int i)                {       increment = i;          }
-               
-               virtual void setCons(string) {};
-               virtual void setQuantiles(string) {};
-               
-               virtual vector<Sequence*> readSeqs(string file) {
-                       try {
-                               ifstream in;
-                               openInputFile(file, in);
-                               vector<Sequence*> container;
-                               
-                               //read in seqs and store in vector
-                               while(!in.eof()){
-
-                                       Sequence* current = new Sequence(in);
-                                       container.push_back(current);
-                                       gobble(in);
-                               }
-                               
-                               in.close();
-                               return container;
-                       }
-                       catch(exception& e) {
-                               errorOut(e, "Chimera", "readSeqs");
-                               exit(1);
-                       }
-               }
-               
+               Chimera(){ m = MothurOut::getInstance(); length = 0; unaligned = false;  byGroup = false; }
+               virtual ~Chimera(){     for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i];  } for (int i = 0; i < filteredTemplateSeqs.size(); i++) { delete filteredTemplateSeqs[i];  } };
+               virtual bool getUnaligned()                             {       return unaligned;                       }
+               virtual int getLength()                                 {   return length;      }
+               virtual vector<Sequence*> readSeqs(string);
+               virtual void setMask(string);
+               virtual map<int, int> runFilter(Sequence*);
+               virtual string createFilter(vector<Sequence*>, float);
+               virtual void printHeader(ostream&){};
+               virtual int getChimeras(Sequence*){ return 0; }
+               virtual int getChimeras(){ return 0; }
+               virtual Sequence print(ostream&, ostream&){  Sequence temp; return temp; }
+               virtual Sequence print(ostream&, ostream&, data_results, data_results) { Sequence temp; return temp; }
+               virtual int print(ostream&, ostream&, string){  return 0; }
+               virtual int getNumNoParents(){  return 0; }
+               virtual data_results getResults() { data_results results; return results; }
                
-               virtual void setMask(string filename) {
-                       try {
-                               
-                               if (filename == "default") {
-                                       //default is from wigeon  236627 EU009184.1 Shigella dysenteriae str. FBD013
-                                       seqMask = ".....................................................................................................AAATTGAAGAGTTT-GA--T-CA-T-G-GCTC-AG-AT-TGAA-C-GC--TGG-C--G-GC-A-GG--C----C-T--AACACA-T-GC-A-AGT-CGA-A-CG----------G-TAA-CA-G----------------------------GAAG-A-AG----------------------------------------------------CTT-G----------------------------------------------------------------------------------CT-TCTTT----------------G-CT--G--AC--G--AG-T-GG-C-GG-A--C-------------GGG-TGAGT-A--AT-GT-C-T-G-GG---A-A--A-CT-G--C-C-TGA--TG-G------------------------------------------------------------------A-GG----GGG-AT-AA-CTA-------------------------C-T-G-----------------------GAA-A---CGG-TAG-CTAA-TA---CC-G--C-AT-A----------A--------------------C-------------------------------------GT-C-----------------------------------------------------------------------------------------------------------------------G-CA-A--------------------------------------------------------------------------------------------------------------------------------------G-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CAAA--G-A-G-GG-----G--GA-C-CT--------------------------------------------------------------------------------------------------------------------TCG-G----------------------------------------------------------------------------------------------------------------------G----CC-TC--T---T-G--------------C----C-A---T-CG-G---AT---G-T-----G-CCC-AGA--T-GGG--A------TT--A--G-CT-A----G---TAGG-T-G-GG-G-T----AAC-GG-C-T-C-ACCT--A-GG-C-G--A-CG-A------------TCC-C-T------AG-CT-G-G-TCT-G-AG----A--GG-AT--G-AC-C-AG-CCAC-A-CTGGA--A-C-TG-A-GA-C-AC-G-G-TCCAGA-CTCC-TAC-G--G-G-A-G-GC-A-GC-A-G-TG---GG-G-A-ATA-TTGCA-C-AA-T-GG--GC-GC-A----A-G-CC-T-GA-TG-CA-GCCA-TGCC-G-CG-T---G-T-A--T--GA-A-G--A--A-G-G-CC-----TT-CG---------G-G-T-T-G-T--A---AA-G-TAC--------TT-TC-A-G--C-GGG----GA-G--G---AA-GGGA---GTAA-AG----T--T--AA-T---A----C-----CT-T-TGC-TCA-TT-GA-CG-TT-A-C-CC-G-CA-G---------AA-----------GAAGC-ACC-GG-C-TAA---C--T-CCGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GG-AG-GGT-GCA-A-G-CG-TTAA-T-CGG-AA-TT-A--C-T--GGGC-GTA----AA-GCGC-AC--G-CA-G-G-C-G------------G--T-TT-G-T-T-AA----G-T-C-A---G-ATG-TG-A-AA-TC--CC-CGG-G--------------------------------------------------------------------CT-C-AA-------------------------------------------------------------------------CC-T-G-GG-AA-C----T-G-C-A-T-C--------T--GA-T-A-C-T-G-GCA--A-G-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-T-G-A-G-T-C-----T-CG--TA-G-A------------G-GG-G-GG-T----AG--AATT-CCA-G-GT--GT-A-GCG-GTGAAA-TG-CGT-AGAG-A-TC-T-GGA--GG-A-AT-A-CC-GG--T--G--GC-GAA-G--G-C---G----G--C-C-CCCTG------G-AC-GA--------------------------------------------------------------AG-A-C-T--GA--CG-----CT-CA-GG--T-G-CGA--AA-G-C--------------G-TGGG-GAG-C-A-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCG-T-AAA--C-GATG-TC--GA-CT---------T-GG--A--G-G-TT-G-TG-C--C--------------------------------------------------------------------------------------CTT-GA--------------------------------------------------------------------------------------------------------------------------------------------------G-G-C-GT--G-G-C-T-TC-C------GG--A----GC-TAA--CG-C-G-T--T--AA-GT--C----G-ACC-GCC-T-G-GG-GAG-TA---CGG-----C-C--G-C-A-A-GGT-T--AAA-ACTC-AAA---------TGAA-TTG-ACGGG-G-G-CCCG----C-A--C-A-A-GCG-GT-G--G--AG-CA-T--GT-GGT-TT-AATT-C-G-ATG-CAAC-G-CG-A-AG-A-A-CC-TT-A-CC-TGGTC-TT-G-AC-A-T-C--------------CAC-G-G-------------A-AG-T-T-T--TC--A-GA-G-A-T--G-A-G--A-A-T-G--T-G-----CC-------------------------------------T--TC-G------------------------------------------GG----A----A---CC-GTG---A--GA---------------------------------------------------C-A-G-G-T-GCTG-CA-TGG-CT--GTC-GTC-A-GC-TC---G-TG-TT-G--TGA-AA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-TA--TC--C-TTTG--T-T-G-C-C---AG-C-G-----G-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TCC------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GG---C----C-G------------G----G---A-A--CT---------------C-A-A-A-G-GA-G--AC-T-G-CCA--G-T------------------------------------G-A---TAA----------------------------------A-C-T-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAGT-C---ATC-A-T-G-G-C-C-CTT----AC-G--AC-C-A-GG-GC-TA-CAC-ACGTG-C--TA--CAATG---G-CGCA-T-A--C-AAA-GA-GA--------------------------------------------------------------------------------------------------A-G-C-G-A--C-CTCG-C--G---------------------------------------A-GA-G-C-----------A--A-G-CG---G----------A--CCT-C------A-T-AAAGT-GC-G-T-C-G-TAG-TCC--------GGA-T-TGGAG-TC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-ACTCC-A-T-G-AA-G-TC-GGAAT-CG-C-TA--G-TA-AT-C-G-T----GGA-TC-A-G--A------AT--GCC-AC-G-GT-G-AAT-ACGT-T-CCCGGGCCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-TG-GG-A--G---TGG-G-TT-GC-AAA--A-GAA------G--T-AGG-TA-G-C-T-T-AA-C-C--------------------------------------------------------------TT----C-------------------------------------------------------------------------------------------------G--GG-A--GG-G--C---GC-TTA--CC--ACT-T----T-GTG-AT-TCA------------------------TG--ACT-GGGG-TG-AAG-TCGTAACAA-GGTAA-CCGT-AGGGGAA-CCTG-CGGT-TGGATCACCTCCTTA................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................";
-                               }else{
-                                       ifstream infile;
-                                       openInputFile(filename, infile);
-                                       
-                                       while (!infile.eof()) {
-                                               Sequence temp(infile);
-                                               seqMask = temp.getAligned();
-                                               
-                                               gobble(infile);
-                                       }
-                                       
-                                       infile.close();
-                               }
-                       }
-                       catch(exception& e) {
-                               errorOut(e, "Chimera", "setMask");
-                               exit(1);
-                       }
-               }
+               #ifdef USE_MPI
+               virtual Sequence print(MPI_File&, MPI_File&){  Sequence temp; return temp; }
+               virtual Sequence print(MPI_File&, MPI_File&, data_results, data_results, bool&){  Sequence temp; return temp; }
+               virtual int print(MPI_File&, MPI_File&, string){  return 0; }
+               #endif
                
-               //pure functions
-               virtual void getChimeras() = 0; 
-               virtual void print(ostream&) = 0;       
                
        protected:
                
-               bool filter, correction;
-               int processors, window, increment;
-               string seqMask;
-                       
-
+               vector<Sequence*> templateSeqs;
+               vector<Sequence*> filteredTemplateSeqs;
+               bool filter, unaligned, byGroup; 
+               int length; 
+               string seqMask, filterString, outputDir, templateFileName; 
+               Sequence* getSequence(string);  //find sequence from name       
+               MothurOut* m;
 };
 
 /***********************************************************************/