]> git.donarmstrong.com Git - mothur.git/blobdiff - chimeraperseuscommand.h
moved mothur's source into a folder to make grabbing just the source easier on github
[mothur.git] / chimeraperseuscommand.h
diff --git a/chimeraperseuscommand.h b/chimeraperseuscommand.h
deleted file mode 100644 (file)
index 01f5768..0000000
+++ /dev/null
@@ -1,325 +0,0 @@
-#ifndef CHIMERAPERSEUSCOMMAND_H
-#define CHIMERAPERSEUSCOMMAND_H
-
-
-/*
- *  chimeraperseuscommand.h
- *  Mothur
- *
- *  Created by westcott on 10/26/11.
- *  Copyright 2011 Schloss Lab. All rights reserved.
- *
- */
-
-
-
-#include "mothur.h"
-#include "command.hpp"
-#include "sequenceparser.h"
-#include "myPerseus.h"
-
-/***********************************************************/
-class ChimeraPerseusCommand : public Command {
-public:
-       ChimeraPerseusCommand(string);
-       ChimeraPerseusCommand();
-       ~ChimeraPerseusCommand() {}
-       
-       vector<string> setParameters();
-       string getCommandName()                 { return "chimera.perseus";             }
-       string getCommandCategory()             { return "Sequence Processing"; }
-       string getHelpString(); 
-       string getCitation() { return "http://www.mothur.org/wiki/Chimera.perseus\n"; }
-       string getDescription()         { return "detect chimeric sequences"; }
-       
-       int execute(); 
-       void help() { m->mothurOut(getHelpString()); }          
-       
-private:
-       struct linePair {
-               int start;
-               int end;
-               linePair(int i, int j) : start(i), end(j) {}
-       };
-       
-       bool abort;
-       string fastafile, groupfile, outputDir, namefile;
-       int processors, alignLength;
-       double cutoff, alpha, beta;
-       
-       vector<string> outputNames;
-       vector<string> fastaFileNames;
-       vector<string> nameFileNames;
-       vector<string> groupFileNames;
-       
-       string getNamesFile(string&);
-       int driver(string, vector<seqData>&, string, int&);
-       vector<seqData> readFiles(string, string);
-       vector<seqData> loadSequences(SequenceParser&, string);
-       int deconvoluteResults(SequenceParser&, string, string);
-       int driverGroups(SequenceParser&, string, string, int, int, vector<string>);
-       int createProcessesGroups(SequenceParser&, string, string, vector<string>, string, string, string);
-};
-
-/**************************************************************************************************/
-//custom data structure for threads to use.
-// This is passed by void pointer so it can be any data type
-// that can be passed using a single void pointer (LPVOID).
-struct perseusData {
-       string fastafile; 
-       string namefile; 
-       string groupfile;
-       string outputFName;
-       string accnos;
-       MothurOut* m;
-       int start;
-       int end;
-       int threadID, count, numChimeras;
-       double alpha, beta, cutoff;
-       vector<string> groups;
-       
-       perseusData(){}
-       perseusData(double a, double b, double c, string o,  string f, string n, string g, string ac, vector<string> gr, MothurOut* mout, int st, int en, int tid) {
-               alpha = a;
-               beta = b;
-               cutoff = c;
-               fastafile = f;
-               namefile = n;
-               groupfile = g;
-               outputFName = o;
-               accnos = ac;
-               m = mout;
-               start = st;
-               end = en;
-               threadID = tid;
-               groups = gr;
-               count = 0;
-               numChimeras = 0;
-       }
-};
-/**************************************************************************************************/
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-#else
-static DWORD WINAPI MyPerseusThreadFunction(LPVOID lpParam){ 
-       perseusData* pDataArray;
-       pDataArray = (perseusData*)lpParam;
-       
-       try {
-               
-               //clears files
-               ofstream out, out1, out2;
-               pDataArray->m->openOutputFile(pDataArray->outputFName, out); out.close(); 
-               pDataArray->m->openOutputFile(pDataArray->accnos, out1); out1.close();
-               
-               //parse fasta and name file by group
-               SequenceParser* parser;
-               if (pDataArray->namefile != "") { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile, pDataArray->namefile);      }
-               else                                                    { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile);                                            }
-               
-               int totalSeqs = 0;
-               int numChimeras = 0;
-               
-               for (int i = pDataArray->start; i < pDataArray->end; i++) {
-                       
-                       int start = time(NULL);  if (pDataArray->m->control_pressed) {  delete parser; pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); return 0; }
-                       
-                       pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("Checking sequences from group " + pDataArray->groups[i] + "...");  pDataArray->m->mothurOutEndLine();                                      
-                       
-                       //vector<seqData> sequences = loadSequences(parser, groups[i]); - same function below
-                       ////////////////////////////////////////////////////////////////////////////////////////
-                       vector<Sequence> thisGroupsSeqs = parser->getSeqs(pDataArray->groups[i]);
-                       map<string, string> nameMap = parser->getNameMap(pDataArray->groups[i]);
-                       map<string, string>::iterator it;
-                       
-                       vector<seqData> sequences;
-                       bool error = false;
-                       
-                       for (int j = 0; j < thisGroupsSeqs.size(); j++) {
-                               
-                               if (pDataArray->m->control_pressed) {  delete parser; pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); return 0; }
-                               
-                               it = nameMap.find(thisGroupsSeqs[j].getName());
-                               if (it == nameMap.end()) { error = true; pDataArray->m->mothurOut("[ERROR]: " + thisGroupsSeqs[j].getName() + " is in your fasta file and not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
-                               else {
-                                       int num = pDataArray->m->getNumNames(it->second);
-                                       sequences.push_back(seqData(thisGroupsSeqs[j].getName(), thisGroupsSeqs[j].getUnaligned(), num));
-                               }
-                       }
-                       
-                       if (error) { pDataArray->m->control_pressed = true; }
-                       
-                       //sort by frequency
-                       sort(sequences.rbegin(), sequences.rend());
-                       ////////////////////////////////////////////////////////////////////////////////////////
-
-                       if (pDataArray->m->control_pressed) { delete parser; pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); return 0; }
-                       
-                       //int numSeqs = driver((outputFName + groups[i]), sequences, (accnos+groups[i]), numChimeras); - same function below
-                       ////////////////////////////////////////////////////////////////////////////////////////
-                       string chimeraFileName = pDataArray->outputFName+pDataArray->groups[i];
-                       string accnosFileName = pDataArray->accnos+pDataArray->groups[i];
-                       
-                       vector<vector<double> > correctModel(4);        //could be an option in the future to input own model matrix
-                       for(int j=0;j<4;j++){   correctModel[j].resize(4);      }
-                       
-                       correctModel[0][0] = 0.000000;  //AA
-                       correctModel[1][0] = 11.619259; //CA
-                       correctModel[2][0] = 11.694004; //TA
-                       correctModel[3][0] = 7.748623;  //GA
-                       
-                       correctModel[1][1] = 0.000000;  //CC
-                       correctModel[2][1] = 7.619657;  //TC
-                       correctModel[3][1] = 12.852562; //GC
-                       
-                       correctModel[2][2] = 0.000000;  //TT
-                       correctModel[3][2] = 10.964048; //TG
-                       
-                       correctModel[3][3] = 0.000000;  //GG
-                       
-                       for(int k=0;k<4;k++){
-                               for(int j=0;j<k;j++){
-                                       correctModel[j][k] = correctModel[k][j];
-                               }
-                       }
-                       
-                       int numSeqs = sequences.size();
-                       int alignLength = sequences[0].sequence.size();
-                       
-                       ofstream chimeraFile;
-                       ofstream accnosFile;
-                       pDataArray->m->openOutputFile(chimeraFileName, chimeraFile); 
-                       pDataArray->m->openOutputFile(accnosFileName, accnosFile); 
-                       
-                       Perseus myPerseus;
-                       vector<vector<double> > binMatrix = myPerseus.binomial(alignLength);
-                       
-                       chimeraFile << "SequenceIndex\tName\tDiffsToBestMatch\tBestMatchIndex\tBestMatchName\tDiffstToChimera\tIndexofLeftParent\tIndexOfRightParent\tNameOfLeftParent\tNameOfRightParent\tDistanceToBestMatch\tcIndex\t(cIndex - singleDist)\tloonIndex\tMismatchesToChimera\tMismatchToTrimera\tChimeraBreakPoint\tLogisticProbability\tTypeOfSequence\n";
-                       
-                       vector<bool> chimeras(numSeqs, 0);
-                       
-                       for(int j=0;j<numSeqs;j++){     
-                               
-                               if (pDataArray->m->control_pressed) { delete parser; pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); chimeraFile.close(); pDataArray->m->mothurRemove(chimeraFileName); accnosFile.close(); pDataArray->m->mothurRemove(accnosFileName); return 0; }
-                               
-                               vector<bool> restricted = chimeras;
-                               
-                               vector<vector<int> > leftDiffs(numSeqs);
-                               vector<vector<int> > leftMaps(numSeqs);
-                               vector<vector<int> > rightDiffs(numSeqs);
-                               vector<vector<int> > rightMaps(numSeqs);
-                               
-                               vector<int> singleLeft, bestLeft;
-                               vector<int> singleRight, bestRight;
-                               
-                               int bestSingleIndex, bestSingleDiff;
-                               vector<pwAlign> alignments(numSeqs);
-                               
-                               int comparisons = myPerseus.getAlignments(j, sequences, alignments, leftDiffs, leftMaps, rightDiffs, rightMaps, bestSingleIndex, bestSingleDiff, restricted);
-                               
-                               if (pDataArray->m->control_pressed) { delete parser; pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); chimeraFile.close(); pDataArray->m->mothurRemove(chimeraFileName); accnosFile.close(); pDataArray->m->mothurRemove(accnosFileName); return 0; }
-                               
-                               int minMismatchToChimera, leftParentBi, rightParentBi, breakPointBi;
-                               
-                               string dummyA, dummyB;
-                               
-                               if(comparisons >= 2){   
-                                       minMismatchToChimera = myPerseus.getChimera(sequences, leftDiffs, rightDiffs, leftParentBi, rightParentBi, breakPointBi, singleLeft, bestLeft, singleRight, bestRight, restricted);
-                                       
-                                       if (pDataArray->m->control_pressed) { delete parser;  pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); chimeraFile.close(); pDataArray->m->mothurRemove(chimeraFileName); accnosFile.close(); pDataArray->m->mothurRemove(accnosFileName); return 0; }
-                                       
-                                       int minMismatchToTrimera = numeric_limits<int>::max();
-                                       int leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB;
-                                       
-                                       if(minMismatchToChimera >= 3 && comparisons >= 3){
-                                               minMismatchToTrimera = myPerseus.getTrimera(sequences, leftDiffs, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, singleLeft, bestLeft, singleRight, bestRight, restricted);
-                                               
-                                               if (pDataArray->m->control_pressed) { delete parser;  pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); chimeraFile.close(); pDataArray->m->mothurRemove(chimeraFileName); accnosFile.close(); pDataArray->m->mothurRemove(accnosFileName); return 0; }
-                                       }
-                                       
-                                       double singleDist = myPerseus.modeledPairwiseAlignSeqs(sequences[j].sequence, sequences[bestSingleIndex].sequence, dummyA, dummyB, correctModel);
-                                       
-                                       if (pDataArray->m->control_pressed) { delete parser;  pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); chimeraFile.close(); pDataArray->m->mothurRemove(chimeraFileName); accnosFile.close(); pDataArray->m->mothurRemove(accnosFileName); return 0; }
-                                       
-                                       string type;
-                                       string chimeraRefSeq;
-                                       
-                                       if(minMismatchToChimera - minMismatchToTrimera >= 3){
-                                               type = "trimera";
-                                               chimeraRefSeq = myPerseus.stitchTrimera(alignments, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, leftMaps, rightMaps);
-                                       }
-                                       else{
-                                               type = "chimera";
-                                               chimeraRefSeq = myPerseus.stitchBimera(alignments, leftParentBi, rightParentBi, breakPointBi, leftMaps, rightMaps);
-                                       }
-                                       
-                                       if (pDataArray->m->control_pressed) { delete parser; pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); chimeraFile.close(); pDataArray->m->mothurRemove(chimeraFileName); accnosFile.close(); pDataArray->m->mothurRemove(accnosFileName); return 0; }
-                                       
-                                       double chimeraDist = myPerseus.modeledPairwiseAlignSeqs(sequences[j].sequence, chimeraRefSeq, dummyA, dummyB, correctModel);
-                                       
-                                       if (pDataArray->m->control_pressed) { delete parser; pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); chimeraFile.close(); pDataArray->m->mothurRemove(chimeraFileName); accnosFile.close(); pDataArray->m->mothurRemove(accnosFileName); return 0; }
-                                       
-                                       double cIndex = chimeraDist;//modeledPairwiseAlignSeqs(sequences[j].sequence, chimeraRefSeq);
-                                       double loonIndex = myPerseus.calcLoonIndex(sequences[j].sequence, sequences[leftParentBi].sequence, sequences[rightParentBi].sequence, breakPointBi, binMatrix);                
-                                       
-                                       if (pDataArray->m->control_pressed) { delete parser; pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); chimeraFile.close(); pDataArray->m->mothurRemove(chimeraFileName); accnosFile.close(); pDataArray->m->mothurRemove(accnosFileName); return 0; }
-                                       
-                                       chimeraFile << j << '\t' << sequences[j].seqName << '\t' << bestSingleDiff << '\t' << bestSingleIndex << '\t' << sequences[bestSingleIndex].seqName << '\t';
-                                       chimeraFile << minMismatchToChimera << '\t' << leftParentBi << '\t' << rightParentBi << '\t' << sequences[leftParentBi].seqName << '\t' << sequences[rightParentBi].seqName << '\t';
-                                       chimeraFile << singleDist << '\t' << cIndex << '\t' << (cIndex - singleDist) << '\t' << loonIndex << '\t';
-                                       chimeraFile << minMismatchToChimera << '\t' << minMismatchToTrimera << '\t' << breakPointBi << '\t';
-                                       
-                                       double probability = myPerseus.classifyChimera(singleDist, cIndex, loonIndex, pDataArray->alpha, pDataArray->beta);
-                                       
-                                       chimeraFile << probability << '\t';
-                                       
-                                       if(probability > pDataArray->cutoff){ 
-                                               chimeraFile << type << endl;
-                                               accnosFile << sequences[j].seqName << endl;
-                                               chimeras[j] = 1;
-                                               numChimeras++;
-                                       }
-                                       else{
-                                               chimeraFile << "good" << endl;
-                                       }
-                                       
-                               }
-                               else{
-                                       chimeraFile << j << '\t' << sequences[j].seqName << "\t0\t0\tNull\t0\t0\t0\tNull\tNull\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\tgood" << endl;
-                               }
-                               //report progress
-                               if((j+1) % 100 == 0){   pDataArray->m->mothurOut("Processing sequence: " + toString(j+1) + "\n");               }
-                       }
-                       
-                       if((numSeqs) % 100 != 0){       pDataArray->m->mothurOut("Processing sequence: " + toString(numSeqs) + "\n");           }
-                       
-                       chimeraFile.close();
-                       accnosFile.close();
-                       ////////////////////////////////////////////////////////////////////////////////////////
-
-                       totalSeqs += numSeqs;
-                       
-                       //append files
-                       pDataArray->m->appendFiles(chimeraFileName, pDataArray->outputFName); pDataArray->m->mothurRemove(chimeraFileName);
-                       pDataArray->m->appendFiles(accnosFileName, pDataArray->accnos); pDataArray->m->mothurRemove(accnosFileName);
-                       pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + pDataArray->groups[i] + ".");        pDataArray->m->mothurOutEndLine();                                      
-                       
-                       if (pDataArray->m->control_pressed) { delete parser; pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); return 0; }
-               }       
-               
-               pDataArray->count = totalSeqs;
-               delete parser;
-               return totalSeqs;
-               
-       }
-       catch(exception& e) {
-               pDataArray->m->errorOut(e, "ChimeraUchimeCommand", "MyPerseusThreadFunction");
-               exit(1);
-       }
-} 
-/**************************************************************************************************/
-
-#endif
-
-#endif
-
-