+++ /dev/null
-#ifndef Mothur_pcrseqscommand_h
-#define Mothur_pcrseqscommand_h
-
-//
-// pcrseqscommand.h
-// Mothur
-//
-// Created by Sarah Westcott on 3/14/12.
-// Copyright (c) 2012 Schloss Lab. All rights reserved.
-//
-
-
-#include "command.hpp"
-#include "sequence.hpp"
-#include "trimoligos.h"
-#include "alignment.hpp"
-#include "needlemanoverlap.hpp"
-
-class PcrSeqsCommand : public Command {
-public:
- PcrSeqsCommand(string);
- PcrSeqsCommand();
- ~PcrSeqsCommand(){}
-
- vector<string> setParameters();
- string getCommandName() { return "pcr.seqs"; }
- string getCommandCategory() { return "Sequence Processing"; }
- string getHelpString();
- string getCitation() { return "http://www.mothur.org/wiki/Pcr.seqs"; }
- string getDescription() { return "pcr.seqs"; }
-
- int execute();
- void help() { m->mothurOut(getHelpString()); }
-
-private:
-
- struct linePair {
- unsigned long long start;
- unsigned long long end;
- linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
- linePair() {}
- };
-
- vector<linePair> lines;
- bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
- bool abort, keepprimer, keepdots;
- string fastafile, oligosfile, taxfile, groupfile, namefile, ecolifile, outputDir, nomatch;
- int start, end, processors, length;
-
- vector<string> revPrimer, outputNames;
- vector<string> primers;
-
- int writeAccnos(set<string>);
- int readName(set<string>&);
- int readGroup(set<string>);
- int readTax(set<string>);
- bool readOligos();
- bool readEcoli();
- int driverPcr(string, string, string, set<string>&, linePair);
- int createProcesses(string, string, string, set<string>&);
- bool findForward(Sequence&, int&, int&);
- bool findReverse(Sequence&, int&, int&);
- bool isAligned(string, map<int, int>&);
- bool compareDNASeq(string, string);
- string reverseOligo(string);
-};
-
-/**************************************************************************************************/
-//custom data structure for threads to use.
-// This is passed by void pointer so it can be any data type
-// that can be passed using a single void pointer (LPVOID).
-struct pcrData {
- string filename;
- string goodFasta, badFasta, oligosfile, ecolifile, nomatch;
- unsigned long long fstart;
- unsigned long long fend;
- int count, start, end, length;
- MothurOut* m;
- vector<string> primers;
- vector<string> revPrimer;
- set<string> badSeqNames;
- bool keepprimer, keepdots;
-
-
- pcrData(){}
- pcrData(string f, string gf, string bfn, MothurOut* mout, string ol, string ec, vector<string> pr, vector<string> rpr, string nm, bool kp, bool kd, int st, int en, int l, unsigned long long fst, unsigned long long fen) {
- filename = f;
- goodFasta = gf;
- badFasta = bfn;
- m = mout;
- oligosfile = ol;
- ecolifile = ec;
- primers = pr;
- revPrimer = rpr;
- nomatch = nm;
- keepprimer = kp;
- keepdots = kd;
- start = st;
- end = en;
- length = l;
- fstart = fst;
- fend = fen;
- count = 0;
- }
-};
-/**************************************************************************************************/
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-#else
-static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
- pcrData* pDataArray;
- pDataArray = (pcrData*)lpParam;
-
- try {
- ofstream goodFile;
- pDataArray->m->openOutputFile(pDataArray->goodFasta, goodFile);
-
- ofstream badFile;
- pDataArray->m->openOutputFile(pDataArray->badFasta, badFile);
-
- ifstream inFASTA;
- pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
-
- //print header if you are process 0
- if ((pDataArray->fstart == 0) || (pDataArray->fstart == 1)) {
- inFASTA.seekg(0);
- }else { //this accounts for the difference in line endings.
- inFASTA.seekg(pDataArray->fstart-1); pDataArray->m->gobble(inFASTA);
- }
-
- set<int> lengths;
- pDataArray->count = pDataArray->fend;
- for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process
-
- if (pDataArray->m->control_pressed) { break; }
-
- Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA);
-
- string trashCode = "";
- if (currSeq.getName() != "") {
-
- bool goodSeq = true;
- if (pDataArray->oligosfile != "") {
- map<int, int> mapAligned;
- //bool aligned = isAligned(currSeq.getAligned(), mapAligned);
- ///////////////////////////////////////////////////////////////
- bool aligned = false;
- string seq = currSeq.getAligned();
- int countBases = 0;
- for (int k = 0; k < seq.length(); k++) {
- if (!isalpha(seq[k])) { aligned = true; }
- else { mapAligned[countBases] = k; countBases++; } //maps location in unaligned -> location in aligned.
- } //ie. the 3rd base may be at spot 10 in the alignment
- //later when we trim we want to trim from spot 10.
- ///////////////////////////////////////////////////////////////
-
- //process primers
- if (pDataArray->primers.size() != 0) {
- int primerStart = 0; int primerEnd = 0;
- //bool good = findForward(currSeq, primerStart, primerEnd);
- ///////////////////////////////////////////////////////////////
- bool good = false;
- string rawSequence = currSeq.getUnaligned();
-
- for(int j=0;j<pDataArray->primers.size();j++){
- string oligo = pDataArray->primers[j];
-
- if (pDataArray->m->control_pressed) { primerStart = 0; primerEnd = 0; good = false; break; }
-
- if(rawSequence.length() < oligo.length()) { break; }
-
- //search for primer
- int olength = oligo.length();
- for (int l = 0; l < rawSequence.length()-olength; l++){
- if (pDataArray->m->control_pressed) { primerStart = 0; primerEnd = 0; good = false; break; }
- string rawChunk = rawSequence.substr(l, olength);
- //compareDNASeq(oligo, rawChunk)
- ////////////////////////////////////////////////////////
- bool success = 1;
- for(int k=0;k<olength;k++){
-
- if(oligo[k] != rawChunk[k]){
- if(oligo[k] == 'A' || oligo[k] == 'T' || oligo[k] == 'G' || oligo[k] == 'C') { success = 0; }
- else if((oligo[k] == 'N' || oligo[k] == 'I') && (rawChunk[k] == 'N')) { success = 0; }
- else if(oligo[k] == 'R' && (rawChunk[k] != 'A' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'Y' && (rawChunk[k] != 'C' && rawChunk[k] != 'T')) { success = 0; }
- else if(oligo[k] == 'M' && (rawChunk[k] != 'C' && rawChunk[k] != 'A')) { success = 0; }
- else if(oligo[k] == 'K' && (rawChunk[k] != 'T' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'W' && (rawChunk[k] != 'T' && rawChunk[k] != 'A')) { success = 0; }
- else if(oligo[k] == 'S' && (rawChunk[k] != 'C' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'B' && (rawChunk[k] != 'C' && rawChunk[k] != 'T' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'D' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'H' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'C')) { success = 0; }
- else if(oligo[k] == 'V' && (rawChunk[k] != 'A' && rawChunk[k] != 'C' && rawChunk[k] != 'G')) { success = 0; }
-
- if(success == 0) { break; }
- }
- else{
- success = 1;
- }
- }
-
- ////////////////////////////////////////////////////////////////////
- if(success) {
- primerStart = j;
- primerEnd = primerStart + olength;
- good = true; break;
- }
- }
- if (good) { break; }
- }
-
- if (!good) { primerStart = 0; primerEnd = 0; }
- ///////////////////////////////////////////////////////////////
-
-
- if(!good){ if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "f"; }
- else{
- //are you aligned
- if (aligned) {
- if (!pDataArray->keepprimer) {
- if (pDataArray->keepdots) { currSeq.filterToPos(mapAligned[primerEnd]); }
- else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd])); }
- }
- else {
- if (pDataArray->keepdots) { currSeq.filterToPos(mapAligned[primerStart]); }
- else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart])); }
- }
- }else {
- if (!pDataArray->keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); }
- else { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); }
- }
- }
- }
-
- //process reverse primers
- if (pDataArray->revPrimer.size() != 0) {
- int primerStart = 0; int primerEnd = 0;
- bool good = false;
- //findReverse(currSeq, primerStart, primerEnd);
- ///////////////////////////////////////////////////////////////
- string rawSequence = currSeq.getUnaligned();
-
- for(int j=0;j<pDataArray->revPrimer.size();j++){
- string oligo = pDataArray->revPrimer[j];
- if (pDataArray->m->control_pressed) { primerStart = 0; primerEnd = 0; good = false; break; }
- if(rawSequence.length() < oligo.length()) { break; }
-
- //search for primer
- int olength = oligo.length();
- for (int l = rawSequence.length()-olength; l >= 0; l--){
-
- string rawChunk = rawSequence.substr(l, olength);
- //compareDNASeq(oligo, rawChunk)
- ////////////////////////////////////////////////////////
- bool success = 1;
- for(int k=0;k<olength;k++){
-
- if(oligo[k] != rawChunk[k]){
- if(oligo[k] == 'A' || oligo[k] == 'T' || oligo[k] == 'G' || oligo[k] == 'C') { success = 0; }
- else if((oligo[k] == 'N' || oligo[k] == 'I') && (rawChunk[k] == 'N')) { success = 0; }
- else if(oligo[k] == 'R' && (rawChunk[k] != 'A' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'Y' && (rawChunk[k] != 'C' && rawChunk[k] != 'T')) { success = 0; }
- else if(oligo[k] == 'M' && (rawChunk[k] != 'C' && rawChunk[k] != 'A')) { success = 0; }
- else if(oligo[k] == 'K' && (rawChunk[k] != 'T' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'W' && (rawChunk[k] != 'T' && rawChunk[k] != 'A')) { success = 0; }
- else if(oligo[k] == 'S' && (rawChunk[k] != 'C' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'B' && (rawChunk[k] != 'C' && rawChunk[k] != 'T' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'D' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'H' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'C')) { success = 0; }
- else if(oligo[k] == 'V' && (rawChunk[k] != 'A' && rawChunk[k] != 'C' && rawChunk[k] != 'G')) { success = 0; }
-
- if(success == 0) { break; }
- }
- else{
- success = 1;
- }
- }
-
- ////////////////////////////////////////////////////////////////////
- if(success) {
- primerStart = j;
- primerEnd = primerStart + olength;
- good = true; break;
- }
- }
- if (good) { break; }
- }
-
- if (!good) { primerStart = 0; primerEnd = 0; }
-
- ///////////////////////////////////////////////////////////////
- if(!good){ if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
- else{
- //are you aligned
- if (aligned) {
- if (!pDataArray->keepprimer) {
- if (pDataArray->keepdots) { currSeq.filterFromPos(mapAligned[primerStart]); }
- else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart])); }
- }
- else {
- if (pDataArray->keepdots) { currSeq.filterFromPos(mapAligned[primerEnd]); }
- else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd])); }
- } }
- else {
- if (!pDataArray->keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart)); }
- else { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerEnd)); }
- }
- }
- }
- }else if (pDataArray->ecolifile != "") {
- //make sure the seqs are aligned
- lengths.insert(currSeq.getAligned().length());
- if (lengths.size() > 1) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; }
- else if (currSeq.getAligned().length() != pDataArray->length) {
- pDataArray->m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); pDataArray->m->control_pressed = true; break;
- }else {
- if (pDataArray->keepdots) {
- currSeq.filterToPos(pDataArray->start);
- currSeq.filterFromPos(pDataArray->end);
- }else {
- string seqString = currSeq.getAligned().substr(0, pDataArray->end);
- seqString = seqString.substr(pDataArray->start);
- currSeq.setAligned(seqString);
- }
- }
- }else{ //using start and end to trim
- //make sure the seqs are aligned
- lengths.insert(currSeq.getAligned().length());
- if (lengths.size() > 1) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; }
- else {
- if (pDataArray->end != -1) {
- if (pDataArray->end > currSeq.getAligned().length()) { pDataArray->m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); pDataArray->m->control_pressed = true; break; }
- else {
- if (pDataArray->keepdots) { currSeq.filterFromPos(pDataArray->end); }
- else {
- string seqString = currSeq.getAligned().substr(0, pDataArray->end);
- currSeq.setAligned(seqString);
- }
- }
- }
- if (pDataArray->start != -1) {
- if (pDataArray->keepdots) { currSeq.filterToPos(pDataArray->start); }
- else {
- string seqString = currSeq.getAligned().substr(pDataArray->start);
- currSeq.setAligned(seqString);
- }
- }
-
- }
- }
-
- if(goodSeq == 1) { currSeq.printSequence(goodFile); }
- else {
- pDataArray->badSeqNames.insert(currSeq.getName());
- currSeq.setName(currSeq.getName() + '|' + trashCode);
- currSeq.printSequence(badFile);
- }
- }
-
- //report progress
- if((i+1) % 100 == 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(i+1)); pDataArray->m->mothurOutEndLine(); }
- }
- //report progress
- if((pDataArray->count) % 100 != 0){ pDataArray->m->mothurOut("Thread Processing sequence: " + toString(pDataArray->count)); pDataArray->m->mothurOutEndLine(); }
-
- goodFile.close();
- inFASTA.close();
- badFile.close();
-
- return 0;
-
- }
- catch(exception& e) {
- pDataArray->m->errorOut(e, "PcrSeqsCommand", "MyPcrThreadFunction");
- exit(1);
- }
-}
-
-#endif
-
-/**************************************************************************************************/
-
-
-
-#endif