]> git.donarmstrong.com Git - mothur.git/commitdiff
minor changes to pcr.seqs. Added filterToPos and filterFromPos to sequence class...
authorSarah Westcott <mothur.westcott@gmail.com>
Wed, 28 Mar 2012 17:23:50 +0000 (13:23 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Wed, 28 Mar 2012 17:23:50 +0000 (13:23 -0400)
Mothur.xcodeproj/project.pbxproj
classifyseqscommand.h
commandfactory.cpp
createdatabasecommand.cpp [new file with mode: 0644]
createdatabasecommand.h [new file with mode: 0644]
prcseqscommand.cpp
seqerrorcommand.cpp
seqerrorcommand.h
sequence.cpp
sequence.hpp
shhhercommand.cpp

index 2550873ae612a60d93d937c21908389e515a306b..f904a8b33650f30c1432f5866481b22a749ee2a9 100644 (file)
@@ -51,6 +51,7 @@
                A77410F614697C300098E6AC /* seqnoise.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77410F414697C300098E6AC /* seqnoise.cpp */; };
                A778FE6B134CA6CA00C0BA33 /* getcommandinfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */; };
                A77A221F139001B600B0BE70 /* deuniquetreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */; };
+               A77EBD2F1523709100ED407C /* createdatabasecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77EBD2E1523709100ED407C /* createdatabasecommand.cpp */; };
                A79234D713C74BF6002B08E2 /* mothurfisher.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A79234D613C74BF6002B08E2 /* mothurfisher.cpp */; };
                A795840D13F13CD900F201D5 /* countgroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A795840C13F13CD900F201D5 /* countgroupscommand.cpp */; };
                A799F5B91309A3E000AEEFA0 /* makefastqcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */; };
                A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getcommandinfocommand.cpp; sourceTree = "<group>"; };
                A77A221D139001B600B0BE70 /* deuniquetreecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deuniquetreecommand.h; sourceTree = "<group>"; };
                A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deuniquetreecommand.cpp; sourceTree = "<group>"; };
+               A77EBD2C1523707F00ED407C /* createdatabasecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = createdatabasecommand.h; sourceTree = "<group>"; };
+               A77EBD2E1523709100ED407C /* createdatabasecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = createdatabasecommand.cpp; sourceTree = "<group>"; };
                A79234D513C74BF6002B08E2 /* mothurfisher.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mothurfisher.h; sourceTree = "<group>"; };
                A79234D613C74BF6002B08E2 /* mothurfisher.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mothurfisher.cpp; sourceTree = "<group>"; };
                A795840B13F13CD900F201D5 /* countgroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = countgroupscommand.h; sourceTree = "<group>"; };
                                A795840C13F13CD900F201D5 /* countgroupscommand.cpp */,
                                A7730EFD13967241007433A3 /* countseqscommand.h */,
                                A7730EFE13967241007433A3 /* countseqscommand.cpp */,
+                               A77EBD2C1523707F00ED407C /* createdatabasecommand.h */,
+                               A77EBD2E1523709100ED407C /* createdatabasecommand.cpp */,
                                A7E9B6C412D37EC400DA6239 /* deconvolutecommand.h */,
                                A7E9B6C312D37EC400DA6239 /* deconvolutecommand.cpp */,
                                A7E9B6C612D37EC400DA6239 /* degapseqscommand.h */,
                                A7C3DC0B14FE457500FE1924 /* cooccurrencecommand.cpp in Sources */,
                                A7C3DC0F14FE469500FE1924 /* trialSwap2.cpp in Sources */,
                                A76CDD821510F143004C8458 /* prcseqscommand.cpp in Sources */,
+                               A77EBD2F1523709100ED407C /* createdatabasecommand.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index d410558785a4dd00a2df9bb9df71cfb8527093d5..acee70c5cb474dee4e25eea4a6dcf6b5bd55fa06 100644 (file)
@@ -10,7 +10,7 @@
  *
  */
 
-#include "mothur.h"
+
 #include "command.hpp"
 #include "classify.h"
 #include "referencedb.h"
index 579abe82cd4019ab265dcce4220d58aea6924e67..49dcd2cb6f42edfc7208738966a119147192b415 100644 (file)
@@ -313,6 +313,7 @@ CommandFactory::~CommandFactory(){
 //This function calls the appropriate command fucntions based on user input.
 Command* CommandFactory::getCommand(string commandName, string optionString){
        try {
+        
                delete command;   //delete the old command
                
                //user has opted to redirect output from dir where input files are located to some other place
diff --git a/createdatabasecommand.cpp b/createdatabasecommand.cpp
new file mode 100644 (file)
index 0000000..e4a911f
--- /dev/null
@@ -0,0 +1,54 @@
+//
+//  createdatabasecommand.cpp
+//  Mothur
+//
+//  Created by Sarah Westcott on 3/28/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "createdatabasecommand.h"
+#include "sequence.hpp"
+#include "inputdata.h"
+
+//**********************************************************************************************************************
+vector<string> CreateDatabaseCommand::setParameters(){ 
+       try {
+               CommandParameter pfasta("repfasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
+               CommandParameter pname("repname", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pname);
+               CommandParameter pcontaxonomy("contaxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pcontaxonomy);
+               CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
+               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
+               CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "CreateDatabaseCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string CreateDatabaseCommand::getHelpString(){ 
+       try {
+               string helpString = "";
+               helpString += "The create.database command reads a listfile, *.cons.taxonomy, *.rep.fasta, *.rep.names and optional groupfile, and creates a database file.\n";
+               helpString += "The create.database command parameters are repfasta, list, repname, contaxonomy, group and label. List, repfasta, repnames, and contaxonomy are required.  \n";
+        
+        helpString += "The create.database command should be in the following format: \n";
+               helpString += "create.database(repfasta=yourFastaFileFromGetOTURep, repname=yourNameFileFromGetOTURep, contaxonomy=yourConTaxFileFromClassifyOTU, list=yourListFile) \n";       
+               helpString += "Example: create.database(repfasta=final.an.0.03.rep.fasta, name=final.an.0.03.rep.names, list=fina.an.list, label=0.03, contaxonomy=final.an.0.03.cons.taxonomy) \n";
+               helpString += "Note: No spaces between parameter labels (i.e. repfasta), '=' and parameters (i.e.yourFastaFileFromGetOTURep).\n";       
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "CreateDatabaseCommand", "getHelpString");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+
diff --git a/createdatabasecommand.h b/createdatabasecommand.h
new file mode 100644 (file)
index 0000000..9fc84d7
--- /dev/null
@@ -0,0 +1,49 @@
+#ifndef Mothur_createdatabasecommand_h
+#define Mothur_createdatabasecommand_h
+
+//
+//  createdatabasecommand.h
+//  Mothur
+//
+//  Created by Sarah Westcott on 3/28/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "command.hpp"
+#include "listvector.hpp"
+
+class CreateDatabaseCommand : public Command {
+public:
+       CreateDatabaseCommand(string);
+       CreateDatabaseCommand();
+       ~CreateDatabaseCommand(){}
+       
+       vector<string> setParameters();
+       string getCommandName()                 { return "create.database";             }
+       string getCommandCategory()             { return "OTU-Based Approaches"; }
+       string getHelpString(); 
+       string getCitation() { return "http://www.mothur.org/wiki/Create.database"; }
+       string getDescription()         { return "creates database file that includes, abundances across groups, representative sequences, and taxonomy for each OTU"; }
+    
+       
+       int execute() {}; 
+       void help() { m->mothurOut(getHelpString()); }  
+       
+private:
+       
+       bool abort;
+       string listfile, groupfile, repfastafile, repnamesfile, constaxonomyfile, label, outputDir;
+       
+       vector<string> outputNames;
+               
+       int readFasta();
+       int readNames();
+    int readTax();
+       int processList(ListVector*&);
+       
+};
+
+
+
+
+#endif
index 7f34a03d353da5a6aa2facec462693521fdda3f4..d950b48eea4ec778371a439c1df89272fec03573 100644 (file)
@@ -223,7 +223,7 @@ PcrSeqsCommand::PcrSeqsCommand(string option)  {
                 m->mothurOut("[ERROR]: You did not set any options. Please provide an oligos or ecoli file, or set start or end.\n"); abort = true;
             }
             
-            if ((oligosfile == "") && (ecolifile == "") && (start < 0) && (end != -1)) { m->mothurOut("[ERROR]: Invalid start value.\n"); abort = true; }
+            if ((oligosfile == "") && (ecolifile == "") && (start < 0) && (end == -1)) { m->mothurOut("[ERROR]: Invalid start value.\n"); abort = true; }
             
             if ((ecolifile != "") && (start != -1) && (end != -1)) {
                 m->mothurOut("[ERROR]: You provided an ecoli file , but set the start or end parameters. Unsure what you intend.  When you provide the ecoli file, mothur thinks you want to use the start and end of the sequence in the ecoli file.\n"); abort = true;
@@ -516,8 +516,8 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
                         else{
                             //are you aligned
                             if (aligned) { 
-                                if (!keepprimer)    {  currSeq.padToPos(mapAligned[primerEnd]); } 
-                                else                {  currSeq.padToPos(mapAligned[primerStart]); }
+                                if (!keepprimer)    {  currSeq.filterToPos(mapAligned[primerEnd]); } 
+                                else                {  currSeq.filterToPos(mapAligned[primerStart]); }
                             }else { 
                                 if (!keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } 
                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } 
@@ -533,8 +533,8 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
                         else{ 
                             //are you aligned
                             if (aligned) { 
-                                if (!keepprimer)    {  currSeq.padFromPos(mapAligned[primerStart]); } 
-                                else                {  currSeq.padFromPos(mapAligned[primerEnd]); } 
+                                if (!keepprimer)    {  currSeq.filterFromPos(mapAligned[primerStart]); } 
+                                else                {  currSeq.filterFromPos(mapAligned[primerEnd]); } 
                             }
                             else { 
                                 if (!keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart));   } 
@@ -549,19 +549,19 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
                     else if (currSeq.getAligned().length() != length) {
                         m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); m->control_pressed = true; break; 
                     }else {
-                        currSeq.padToPos(start); 
-                        currSeq.padFromPos(end);
+                        currSeq.filterToPos(start); 
+                        currSeq.filterFromPos(end);
                     }
                 }else{ //using start and end to trim
                     //make sure the seqs are aligned
                     lengths.insert(currSeq.getAligned().length());
                     if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
                     else {
-                        if (start != -1) { currSeq.padToPos(start); }
+                        if (start != -1) { currSeq.filterToPos(start); }
                         if (end != -1) {
                             if (end > currSeq.getAligned().length()) {  m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); m->control_pressed = true; break; }
                             else {
-                                currSeq.padFromPos(end);
+                                currSeq.filterFromPos(end);
                             }
                         }
                     }
index 9bda05a912551eca035bdd776387954619f5a8d8..0dc15f59ee98d0ee2d93659c4c3ad1f033f8a193 100644 (file)
@@ -682,7 +682,8 @@ int SeqErrorCommand::driver(string filename, string qFileName, string rFileName,
                        if(numParentSeqs > 1 && ignoreChimeras == 1)    {       ignoreSeq = 1;  }
                        else                                                                                    {       ignoreSeq = 0;  }
                        
-                       Compare minCompare = getErrors(query, referenceSeqs[closestRefIndex]);
+                       Compare minCompare;
+            getErrors(query, referenceSeqs[closestRefIndex], minCompare);
                        
                        if(namesFileName != ""){
                                it = weights.find(query.getName());
@@ -839,7 +840,7 @@ void SeqErrorCommand::getReferences(){
 
 //***************************************************************************************************************
 
-Compare SeqErrorCommand::getErrors(Sequence query, Sequence reference){
+int SeqErrorCommand::getErrors(Sequence query, Sequence reference, Compare& errors){
        try {
                if(query.getAlignLength() != reference.getAlignLength()){
                        m->mothurOut("Warning: " + toString(query.getName()) + " and " + toString(reference.getName()) + " are different lengths\n");
@@ -850,7 +851,7 @@ Compare SeqErrorCommand::getErrors(Sequence query, Sequence reference){
                string r = reference.getAligned();
 
                int started = 0;
-               Compare errors;
+               //Compare errors;
 
                for(int i=0;i<alignLength;i++){
 //                     cout << r[i] << '\t' << q[i] << '\t';
@@ -929,7 +930,8 @@ Compare SeqErrorCommand::getErrors(Sequence query, Sequence reference){
                errors.queryName = query.getName();
                errors.refName = reference.getName();
                
-               return errors;
+               //return errors;
+        return 0;
        }
        catch(exception& e) {
                m->errorOut(e, "SeqErrorCommand", "getErrors");
index cc904ec7f246aed91970b41cf52e39b7d32b896b..e7c97e152d718b8944b0e004b32398dd1f210e99 100644 (file)
  *
  */
 
-#include "mothur.h"
 #include "command.hpp"
 #include "sequence.hpp"
 #include "referencedb.h"
 
-struct Compare {
-       int AA, AT, AG, AC,     TA, TT, TG, TC, GA, GT, GG, GC, CA, CT, CG, CC, NA, NT, NG, NC, Ai, Ti, Gi, Ci, Ni, dA, dT, dG, dC;
-       string refName, queryName, sequence;
-       double errorRate;
-       int weight, matches, mismatches, total;
-       
-       Compare(){
-               AA=0; AT=0; AG=0; AC=0;
-               TA=0; TT=0; TG=0; TC=0;
-               GA=0; GT=0; GG=0; GC=0;
-               CA=0; CT=0; CG=0; CC=0;
-               NA=0; NT=0; NG=0; NC=0;
-               Ai=0; Ti=0; Gi=0; Ci=0; Ni=0;
-               dA=0; dT=0; dG=0; dC=0;
-               refName = "";
-               queryName = "";
-               weight = 1;
-               matches = 0;
-               mismatches = 0;
-               total = 0;
-               errorRate = 1.0000;
-               sequence = "";
-       }
-};
 
 class SeqErrorCommand : public Command {
 public:
        SeqErrorCommand(string);
        SeqErrorCommand();
-       ~SeqErrorCommand() {}
+       ~SeqErrorCommand(){}
        
        vector<string> setParameters();
        string getCommandName()                 { return "seq.error";                           }
@@ -65,8 +40,35 @@ private:
                unsigned long long start;
                unsigned long long end;
                linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
+        ~linePair(){}
        };
        
+    struct Compare {
+        int AA, AT, AG, AC,    TA, TT, TG, TC, GA, GT, GG, GC, CA, CT, CG, CC, NA, NT, NG, NC, Ai, Ti, Gi, Ci, Ni, dA, dT, dG, dC;
+        string refName, queryName, sequence;
+        double errorRate;
+        int weight, matches, mismatches, total;
+        
+        Compare(){
+            AA=0; AT=0; AG=0; AC=0;
+            TA=0; TT=0; TG=0; TC=0;
+            GA=0; GT=0; GG=0; GC=0;
+            CA=0; CT=0; CG=0; CC=0;
+            NA=0; NT=0; NG=0; NC=0;
+            Ai=0; Ti=0; Gi=0; Ci=0; Ni=0;
+            dA=0; dT=0; dG=0; dC=0;
+            refName = "";
+            queryName = "";
+            weight = 1;
+            matches = 0;
+            mismatches = 0;
+            total = 0;
+            errorRate = 1.0000;
+            sequence = "";
+        }
+        ~Compare(){};
+    };
+
        vector<int> processIDS;   //processid
        vector<linePair> lines;
        vector<linePair> qLines;
@@ -74,7 +76,7 @@ private:
 
        void getReferences();
        map<string,int> getWeights();
-       Compare getErrors(Sequence, Sequence);
+       int getErrors(Sequence, Sequence, Compare&);
        void printErrorHeader(ofstream&);
        void printErrorData(Compare, int, ofstream&, ofstream&);
        void printSubMatrix();
index 090ee14c1c838098aaaeb1a831ec01e3272ed512..cfd8ec21498120614b94d3adbdade7275eb6a8b7 100644 (file)
@@ -571,7 +571,35 @@ void Sequence::padToPos(int start){
        startPos = start;
 
 }
+//********************************************************************************************************************
+
+int Sequence::filterToPos(int start){
+    
+    if (start > aligned.length()) { start = aligned.length(); m->mothurOut("[ERROR]: start to large.\n"); }
+    
+       for(int j = 0; j < start-1; j++) {
+               aligned[j] = '.';
+       }
+       
+    setUnaligned(aligned);
+    
+    return 0;
+    
+}
+//********************************************************************************************************************
 
+int Sequence::filterFromPos(int end){
+    
+    if (end > aligned.length()) { end = aligned.length(); m->mothurOut("[ERROR]: end to large.\n"); }
+    
+       for(int j = end; j < aligned.length(); j++) {
+               aligned[j] = '.';
+       }
+       
+    setUnaligned(aligned);
+    
+    return 0;
+}
 //********************************************************************************************************************
 
 int Sequence::getEndPos(){
@@ -591,7 +619,7 @@ int Sequence::getEndPos(){
 //********************************************************************************************************************
 
 void Sequence::padFromPos(int end){
-       
+       cout << end << '\t' << endPos << endl;
        for(int j = end; j < endPos; j++) {
                aligned[j] = '.';
        }
index af0cba814baece3ae9ce4abae02b3fa7370769cf..0a62e0c1ff6b60f20d58ed260847fb39eec44659 100644 (file)
@@ -52,6 +52,8 @@ public:
        int getEndPos();
        void padToPos(int);
        void padFromPos(int);
+    int filterToPos(int); //any character before the pos is changed to . and aligned and unaligned strings changed
+    int filterFromPos(int); //any character after the pos is changed to . and aligned and unaligned strings changed
        int getAlignLength();
        int getAmbigBases();
        void removeAmbigBases();
index f22975ec56ed288026ca4681a223cad66b206137..6ef9532790a5fc63905cb598f1e7bc4baa4c9871 100644 (file)
@@ -3069,10 +3069,9 @@ void ShhherCommand::writeQualities(int numOTUs, int numFlowCells, string filenam
                        
                                int j=4;        //need to get past the first four bases
                                while(qualities[i][j] != -1){
-                    //cout << i << '\t' << j << '\t' << qualities[i][j] << endl;
                     qualityFile << qualities[i][j] << ' ';
+                    if (j > qualities[i].size()) { break; }
                     j++;
-                    
                                }
                                qualityFile << endl;
                        }