From: Sarah Westcott Date: Wed, 28 Mar 2012 17:23:50 +0000 (-0400) Subject: minor changes to pcr.seqs. Added filterToPos and filterFromPos to sequence class... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=a33a385cc5b7481488f92f794425f01fbf40a543 minor changes to pcr.seqs. Added filterToPos and filterFromPos to sequence class. started work on create.database command. --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 2550873..f904a8b 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -51,6 +51,7 @@ A77410F614697C300098E6AC /* seqnoise.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77410F414697C300098E6AC /* seqnoise.cpp */; }; A778FE6B134CA6CA00C0BA33 /* getcommandinfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */; }; A77A221F139001B600B0BE70 /* deuniquetreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */; }; + A77EBD2F1523709100ED407C /* createdatabasecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77EBD2E1523709100ED407C /* createdatabasecommand.cpp */; }; A79234D713C74BF6002B08E2 /* mothurfisher.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A79234D613C74BF6002B08E2 /* mothurfisher.cpp */; }; A795840D13F13CD900F201D5 /* countgroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A795840C13F13CD900F201D5 /* countgroupscommand.cpp */; }; A799F5B91309A3E000AEEFA0 /* makefastqcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */; }; @@ -471,6 +472,8 @@ A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getcommandinfocommand.cpp; sourceTree = ""; }; A77A221D139001B600B0BE70 /* deuniquetreecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deuniquetreecommand.h; sourceTree = ""; }; A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deuniquetreecommand.cpp; sourceTree = ""; }; + A77EBD2C1523707F00ED407C /* createdatabasecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = createdatabasecommand.h; sourceTree = ""; }; + A77EBD2E1523709100ED407C /* createdatabasecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = createdatabasecommand.cpp; sourceTree = ""; }; A79234D513C74BF6002B08E2 /* mothurfisher.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mothurfisher.h; sourceTree = ""; }; A79234D613C74BF6002B08E2 /* mothurfisher.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mothurfisher.cpp; sourceTree = ""; }; A795840B13F13CD900F201D5 /* countgroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = countgroupscommand.h; sourceTree = ""; }; @@ -1354,6 +1357,8 @@ A795840C13F13CD900F201D5 /* countgroupscommand.cpp */, A7730EFD13967241007433A3 /* countseqscommand.h */, A7730EFE13967241007433A3 /* countseqscommand.cpp */, + A77EBD2C1523707F00ED407C /* createdatabasecommand.h */, + A77EBD2E1523709100ED407C /* createdatabasecommand.cpp */, A7E9B6C412D37EC400DA6239 /* deconvolutecommand.h */, A7E9B6C312D37EC400DA6239 /* deconvolutecommand.cpp */, A7E9B6C612D37EC400DA6239 /* degapseqscommand.h */, @@ -2295,6 +2300,7 @@ A7C3DC0B14FE457500FE1924 /* cooccurrencecommand.cpp in Sources */, A7C3DC0F14FE469500FE1924 /* trialSwap2.cpp in Sources */, A76CDD821510F143004C8458 /* prcseqscommand.cpp in Sources */, + A77EBD2F1523709100ED407C /* createdatabasecommand.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/classifyseqscommand.h b/classifyseqscommand.h index d410558..acee70c 100644 --- a/classifyseqscommand.h +++ b/classifyseqscommand.h @@ -10,7 +10,7 @@ * */ -#include "mothur.h" + #include "command.hpp" #include "classify.h" #include "referencedb.h" diff --git a/commandfactory.cpp b/commandfactory.cpp index 579abe8..49dcd2c 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -313,6 +313,7 @@ CommandFactory::~CommandFactory(){ //This function calls the appropriate command fucntions based on user input. Command* CommandFactory::getCommand(string commandName, string optionString){ try { + delete command; //delete the old command //user has opted to redirect output from dir where input files are located to some other place diff --git a/createdatabasecommand.cpp b/createdatabasecommand.cpp new file mode 100644 index 0000000..e4a911f --- /dev/null +++ b/createdatabasecommand.cpp @@ -0,0 +1,54 @@ +// +// createdatabasecommand.cpp +// Mothur +// +// Created by Sarah Westcott on 3/28/12. +// Copyright (c) 2012 Schloss Lab. All rights reserved. +// + +#include "createdatabasecommand.h" +#include "sequence.hpp" +#include "inputdata.h" + +//********************************************************************************************************************** +vector CreateDatabaseCommand::setParameters(){ + try { + CommandParameter pfasta("repfasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta); + CommandParameter pname("repname", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pname); + CommandParameter pcontaxonomy("contaxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pcontaxonomy); + CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist); + CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup); + CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + + vector myArray; + for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } + return myArray; + } + catch(exception& e) { + m->errorOut(e, "CreateDatabaseCommand", "setParameters"); + exit(1); + } +} +//********************************************************************************************************************** +string CreateDatabaseCommand::getHelpString(){ + try { + string helpString = ""; + helpString += "The create.database command reads a listfile, *.cons.taxonomy, *.rep.fasta, *.rep.names and optional groupfile, and creates a database file.\n"; + helpString += "The create.database command parameters are repfasta, list, repname, contaxonomy, group and label. List, repfasta, repnames, and contaxonomy are required. \n"; + + helpString += "The create.database command should be in the following format: \n"; + helpString += "create.database(repfasta=yourFastaFileFromGetOTURep, repname=yourNameFileFromGetOTURep, contaxonomy=yourConTaxFileFromClassifyOTU, list=yourListFile) \n"; + helpString += "Example: create.database(repfasta=final.an.0.03.rep.fasta, name=final.an.0.03.rep.names, list=fina.an.list, label=0.03, contaxonomy=final.an.0.03.cons.taxonomy) \n"; + helpString += "Note: No spaces between parameter labels (i.e. repfasta), '=' and parameters (i.e.yourFastaFileFromGetOTURep).\n"; + return helpString; + } + catch(exception& e) { + m->errorOut(e, "CreateDatabaseCommand", "getHelpString"); + exit(1); + } +} +//********************************************************************************************************************** + + diff --git a/createdatabasecommand.h b/createdatabasecommand.h new file mode 100644 index 0000000..9fc84d7 --- /dev/null +++ b/createdatabasecommand.h @@ -0,0 +1,49 @@ +#ifndef Mothur_createdatabasecommand_h +#define Mothur_createdatabasecommand_h + +// +// createdatabasecommand.h +// Mothur +// +// Created by Sarah Westcott on 3/28/12. +// Copyright (c) 2012 Schloss Lab. All rights reserved. +// + +#include "command.hpp" +#include "listvector.hpp" + +class CreateDatabaseCommand : public Command { +public: + CreateDatabaseCommand(string); + CreateDatabaseCommand(); + ~CreateDatabaseCommand(){} + + vector setParameters(); + string getCommandName() { return "create.database"; } + string getCommandCategory() { return "OTU-Based Approaches"; } + string getHelpString(); + string getCitation() { return "http://www.mothur.org/wiki/Create.database"; } + string getDescription() { return "creates database file that includes, abundances across groups, representative sequences, and taxonomy for each OTU"; } + + + int execute() {}; + void help() { m->mothurOut(getHelpString()); } + +private: + + bool abort; + string listfile, groupfile, repfastafile, repnamesfile, constaxonomyfile, label, outputDir; + + vector outputNames; + + int readFasta(); + int readNames(); + int readTax(); + int processList(ListVector*&); + +}; + + + + +#endif diff --git a/prcseqscommand.cpp b/prcseqscommand.cpp index 7f34a03..d950b48 100644 --- a/prcseqscommand.cpp +++ b/prcseqscommand.cpp @@ -223,7 +223,7 @@ PcrSeqsCommand::PcrSeqsCommand(string option) { m->mothurOut("[ERROR]: You did not set any options. Please provide an oligos or ecoli file, or set start or end.\n"); abort = true; } - if ((oligosfile == "") && (ecolifile == "") && (start < 0) && (end != -1)) { m->mothurOut("[ERROR]: Invalid start value.\n"); abort = true; } + if ((oligosfile == "") && (ecolifile == "") && (start < 0) && (end == -1)) { m->mothurOut("[ERROR]: Invalid start value.\n"); abort = true; } if ((ecolifile != "") && (start != -1) && (end != -1)) { m->mothurOut("[ERROR]: You provided an ecoli file , but set the start or end parameters. Unsure what you intend. When you provide the ecoli file, mothur thinks you want to use the start and end of the sequence in the ecoli file.\n"); abort = true; @@ -516,8 +516,8 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta else{ //are you aligned if (aligned) { - if (!keepprimer) { currSeq.padToPos(mapAligned[primerEnd]); } - else { currSeq.padToPos(mapAligned[primerStart]); } + if (!keepprimer) { currSeq.filterToPos(mapAligned[primerEnd]); } + else { currSeq.filterToPos(mapAligned[primerStart]); } }else { if (!keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } else { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } @@ -533,8 +533,8 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta else{ //are you aligned if (aligned) { - if (!keepprimer) { currSeq.padFromPos(mapAligned[primerStart]); } - else { currSeq.padFromPos(mapAligned[primerEnd]); } + if (!keepprimer) { currSeq.filterFromPos(mapAligned[primerStart]); } + else { currSeq.filterFromPos(mapAligned[primerEnd]); } } else { if (!keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart)); } @@ -549,19 +549,19 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta else if (currSeq.getAligned().length() != length) { m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); m->control_pressed = true; break; }else { - currSeq.padToPos(start); - currSeq.padFromPos(end); + currSeq.filterToPos(start); + currSeq.filterFromPos(end); } }else{ //using start and end to trim //make sure the seqs are aligned lengths.insert(currSeq.getAligned().length()); if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; } else { - if (start != -1) { currSeq.padToPos(start); } + if (start != -1) { currSeq.filterToPos(start); } if (end != -1) { if (end > currSeq.getAligned().length()) { m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); m->control_pressed = true; break; } else { - currSeq.padFromPos(end); + currSeq.filterFromPos(end); } } } diff --git a/seqerrorcommand.cpp b/seqerrorcommand.cpp index 9bda05a..0dc15f5 100644 --- a/seqerrorcommand.cpp +++ b/seqerrorcommand.cpp @@ -682,7 +682,8 @@ int SeqErrorCommand::driver(string filename, string qFileName, string rFileName, if(numParentSeqs > 1 && ignoreChimeras == 1) { ignoreSeq = 1; } else { ignoreSeq = 0; } - Compare minCompare = getErrors(query, referenceSeqs[closestRefIndex]); + Compare minCompare; + getErrors(query, referenceSeqs[closestRefIndex], minCompare); if(namesFileName != ""){ it = weights.find(query.getName()); @@ -839,7 +840,7 @@ void SeqErrorCommand::getReferences(){ //*************************************************************************************************************** -Compare SeqErrorCommand::getErrors(Sequence query, Sequence reference){ +int SeqErrorCommand::getErrors(Sequence query, Sequence reference, Compare& errors){ try { if(query.getAlignLength() != reference.getAlignLength()){ m->mothurOut("Warning: " + toString(query.getName()) + " and " + toString(reference.getName()) + " are different lengths\n"); @@ -850,7 +851,7 @@ Compare SeqErrorCommand::getErrors(Sequence query, Sequence reference){ string r = reference.getAligned(); int started = 0; - Compare errors; + //Compare errors; for(int i=0;ierrorOut(e, "SeqErrorCommand", "getErrors"); diff --git a/seqerrorcommand.h b/seqerrorcommand.h index cc904ec..e7c97e1 100644 --- a/seqerrorcommand.h +++ b/seqerrorcommand.h @@ -10,41 +10,16 @@ * */ -#include "mothur.h" #include "command.hpp" #include "sequence.hpp" #include "referencedb.h" -struct Compare { - int AA, AT, AG, AC, TA, TT, TG, TC, GA, GT, GG, GC, CA, CT, CG, CC, NA, NT, NG, NC, Ai, Ti, Gi, Ci, Ni, dA, dT, dG, dC; - string refName, queryName, sequence; - double errorRate; - int weight, matches, mismatches, total; - - Compare(){ - AA=0; AT=0; AG=0; AC=0; - TA=0; TT=0; TG=0; TC=0; - GA=0; GT=0; GG=0; GC=0; - CA=0; CT=0; CG=0; CC=0; - NA=0; NT=0; NG=0; NC=0; - Ai=0; Ti=0; Gi=0; Ci=0; Ni=0; - dA=0; dT=0; dG=0; dC=0; - refName = ""; - queryName = ""; - weight = 1; - matches = 0; - mismatches = 0; - total = 0; - errorRate = 1.0000; - sequence = ""; - } -}; class SeqErrorCommand : public Command { public: SeqErrorCommand(string); SeqErrorCommand(); - ~SeqErrorCommand() {} + ~SeqErrorCommand(){} vector setParameters(); string getCommandName() { return "seq.error"; } @@ -65,8 +40,35 @@ private: unsigned long long start; unsigned long long end; linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {} + ~linePair(){} }; + struct Compare { + int AA, AT, AG, AC, TA, TT, TG, TC, GA, GT, GG, GC, CA, CT, CG, CC, NA, NT, NG, NC, Ai, Ti, Gi, Ci, Ni, dA, dT, dG, dC; + string refName, queryName, sequence; + double errorRate; + int weight, matches, mismatches, total; + + Compare(){ + AA=0; AT=0; AG=0; AC=0; + TA=0; TT=0; TG=0; TC=0; + GA=0; GT=0; GG=0; GC=0; + CA=0; CT=0; CG=0; CC=0; + NA=0; NT=0; NG=0; NC=0; + Ai=0; Ti=0; Gi=0; Ci=0; Ni=0; + dA=0; dT=0; dG=0; dC=0; + refName = ""; + queryName = ""; + weight = 1; + matches = 0; + mismatches = 0; + total = 0; + errorRate = 1.0000; + sequence = ""; + } + ~Compare(){}; + }; + vector processIDS; //processid vector lines; vector qLines; @@ -74,7 +76,7 @@ private: void getReferences(); map getWeights(); - Compare getErrors(Sequence, Sequence); + int getErrors(Sequence, Sequence, Compare&); void printErrorHeader(ofstream&); void printErrorData(Compare, int, ofstream&, ofstream&); void printSubMatrix(); diff --git a/sequence.cpp b/sequence.cpp index 090ee14..cfd8ec2 100644 --- a/sequence.cpp +++ b/sequence.cpp @@ -571,7 +571,35 @@ void Sequence::padToPos(int start){ startPos = start; } +//******************************************************************************************************************** + +int Sequence::filterToPos(int start){ + + if (start > aligned.length()) { start = aligned.length(); m->mothurOut("[ERROR]: start to large.\n"); } + + for(int j = 0; j < start-1; j++) { + aligned[j] = '.'; + } + + setUnaligned(aligned); + + return 0; + +} +//******************************************************************************************************************** +int Sequence::filterFromPos(int end){ + + if (end > aligned.length()) { end = aligned.length(); m->mothurOut("[ERROR]: end to large.\n"); } + + for(int j = end; j < aligned.length(); j++) { + aligned[j] = '.'; + } + + setUnaligned(aligned); + + return 0; +} //******************************************************************************************************************** int Sequence::getEndPos(){ @@ -591,7 +619,7 @@ int Sequence::getEndPos(){ //******************************************************************************************************************** void Sequence::padFromPos(int end){ - + cout << end << '\t' << endPos << endl; for(int j = end; j < endPos; j++) { aligned[j] = '.'; } diff --git a/sequence.hpp b/sequence.hpp index af0cba8..0a62e0c 100644 --- a/sequence.hpp +++ b/sequence.hpp @@ -52,6 +52,8 @@ public: int getEndPos(); void padToPos(int); void padFromPos(int); + int filterToPos(int); //any character before the pos is changed to . and aligned and unaligned strings changed + int filterFromPos(int); //any character after the pos is changed to . and aligned and unaligned strings changed int getAlignLength(); int getAmbigBases(); void removeAmbigBases(); diff --git a/shhhercommand.cpp b/shhhercommand.cpp index f22975e..6ef9532 100644 --- a/shhhercommand.cpp +++ b/shhhercommand.cpp @@ -3069,10 +3069,9 @@ void ShhherCommand::writeQualities(int numOTUs, int numFlowCells, string filenam int j=4; //need to get past the first four bases while(qualities[i][j] != -1){ - //cout << i << '\t' << j << '\t' << qualities[i][j] << endl; qualityFile << qualities[i][j] << ' '; + if (j > qualities[i].size()) { break; } j++; - } qualityFile << endl; }