]> git.donarmstrong.com Git - mothur.git/commitdiff
mods to seq.errror
authorpschloss <pschloss>
Tue, 15 Feb 2011 11:17:04 +0000 (11:17 +0000)
committerpschloss <pschloss>
Tue, 15 Feb 2011 11:17:04 +0000 (11:17 +0000)
Mothur.xcodeproj/project.pbxproj
makefile
mothur.h
refchimeratest.cpp
seqerrorcommand.cpp
sequence.cpp
sequence.hpp

index 5b4e0768097ae7513eb03c2baee73eaaf8a82575..b95f673d028e793f407897a9028392403939cda0 100644 (file)
                        attributes = {
                                ORGANIZATIONNAME = "Schloss Lab";
                        };
-                       buildConfigurationList = 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "mothur" */;
+                       buildConfigurationList = 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "Mothur" */;
                        compatibilityVersion = "Xcode 3.1";
                        developmentRegion = English;
                        hasScannedForEncodings = 1;
                        defaultConfigurationIsVisible = 0;
                        defaultConfigurationName = Release;
                };
-               1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "mothur" */ = {
+               1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "Mothur" */ = {
                        isa = XCConfigurationList;
                        buildConfigurations = (
                                1DEB928A08733DD80010E9CD /* Debug */,
index 6ac7e31c49b396397c946010777800a1e29993c2..7cc71f0e9a4064a2163707f3f7bc08da36af2b5e 100644 (file)
--- a/makefile
+++ b/makefile
@@ -9,7 +9,7 @@
 # Macros
 #
 
-USEMPI ?= yes
+USEMPI ?= no
 64BIT_VERSION ?= yes
 USEREADLINE ?= yes
 CYGWIN_BUILD ?= no
index 6e5833f7032aafba146f8b92cd30bacc149ab931..97a104398fb584d2d8371e0f9bca440d4aea1f8b 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -57,6 +57,7 @@
        #include <sys/wait.h>
        #include <sys/time.h>
        #include <sys/resource.h>
+       #include <sys/types.h>
        #include <sys/stat.h>
        #include <unistd.h>
        
@@ -70,7 +71,7 @@
        #include <direct.h> //get cwd
        #include <windows.h>
        #include <psapi.h>
-
+       #include <direct.h>
 #endif
 
 using namespace std;
index e540d8a255363cc980318b2edf1c97bd4adcd722..4d4a658fe471ba9d0970fbb7fa57151d93c53807 100644 (file)
@@ -30,7 +30,7 @@ RefChimeraTest::RefChimeraTest(vector<Sequence>& refs, string chimeraReportFileN
        
        alignLength = referenceSeqs[0].length();
 
-       chimeraReportFile << "queryName\tbestRef\tbestSequenceMismatch\tleftParentChi,rightParentChi\tbreakPointChi\tminMismatchToChimera\tdistToBestMera\tnumParents";
+       chimeraReportFile << "queryName\tbestRef\tbestSequenceMismatch\tleftParentChi,rightParentChi\tbreakPointChi\tminMismatchToChimera\tdistToBestMera\tnumParents" << endl;
 //     chimeraReportFile << "leftParentTri,middleParentTri,rightParentTri\tbreakPointTriA,breakPointTriB\tminMismatchToTrimera\tdistToBestMera\tnMera" << endl;
 
 }
@@ -111,7 +111,8 @@ int RefChimeraTest::getMismatches(string& querySeq, vector<vector<int> >& left,
                
                int lDiffs = 0;
                for(int l=0;l<alignLength;l++){
-                       if(querySeq[l] != '.' && querySeq[l] != referenceSeqs[i][l]){
+//                     if(querySeq[l] != '.' && querySeq[l] != referenceSeqs[i][l]){
+                       if(querySeq[l] != '.' && referenceSeqs[i][l] != '.' && querySeq[l] != referenceSeqs[i][l]){
                                lDiffs++;
                        }
                        left[i][l] = lDiffs;
@@ -120,7 +121,8 @@ int RefChimeraTest::getMismatches(string& querySeq, vector<vector<int> >& left,
                int rDiffs = 0;
                int index = 0;
                for(int l=alignLength-1;l>=0;l--){
-                       if(querySeq[l] != '.' && querySeq[l] != referenceSeqs[i][l]){
+//                     if(querySeq[l] != '.' && querySeq[l] != referenceSeqs[i][l]){
+                       if(querySeq[l] != '.' && referenceSeqs[i][l] != '.' && querySeq[l] != referenceSeqs[i][l]){
                                rDiffs++;
                        }                       
                        right[i][index++] = rDiffs;
@@ -254,7 +256,8 @@ double RefChimeraTest::calcDistToChimera(string& querySeq, string& chimeraRefSeq
        int mismatch = 0;
        
        for(int i=0;i<alignLength;i++){
-               if(querySeq[i] != '.' && chimeraRefSeq[i] != '.'){
+//             if(querySeq[i] != '.' && chimeraRefSeq[i] != '.'){
+               if(chimeraRefSeq[i] != '.'){
                        if(querySeq[i] == '-' && chimeraRefSeq[i] == '-'){      /*      do nothing      */      }
                        else if(querySeq[i] == chimeraRefSeq[i]){
                                match++;
index af044fec3f1b60b07da406894ebc454d96df21bd..b3a9a398db93ebf5df773e9e75604905a1892e4d 100644 (file)
@@ -222,9 +222,7 @@ void SeqErrorCommand::help(){
 
 //***************************************************************************************************************
 
-SeqErrorCommand::~SeqErrorCommand(){
-
-}
+SeqErrorCommand::~SeqErrorCommand(){   /*      void    */      }
 
 //***************************************************************************************************************
 
@@ -302,6 +300,8 @@ int SeqErrorCommand::execute(){
                RefChimeraTest chimeraTest(referenceSeqs, errorChimeraFileName);
                outputNames.push_back(errorChimeraFileName); outputTypes["error.chimera"].push_back(errorChimeraFileName);
                
+               vector<string> megaAlignVector(numRefs, "");
+                               
                int index = 0;
                bool ignoreSeq = 0;
                
@@ -327,7 +327,8 @@ int SeqErrorCommand::execute(){
                        else    {       minCompare.weight = 1;  }
 
                        printErrorData(minCompare, numParentSeqs);
-
+                       
+                       
                        if(!ignoreSeq){
                                for(int i=0;i<minCompare.total;i++){
                                        char letter = minCompare.sequence[i];
@@ -339,7 +340,7 @@ int SeqErrorCommand::execute(){
                        if(qualFileName != "" && reportFileName != ""){
                                report = ReportFile(reportFile);
                                
-                               int origLength = report.getQueryLength();
+//                             int origLength = report.getQueryLength();
                                int startBase = report.getQueryStart();
                                int endBase = report.getQueryEnd();
 
@@ -361,6 +362,9 @@ int SeqErrorCommand::execute(){
                                }                               
                                misMatchCounts[minCompare.mismatches] += minCompare.weight;
                                numSeqs++;
+                               
+                               
+                               megaAlignVector[closestRefIndex] += query.getInlineSeq() + '\n';
                        }
                        
                        index++;
@@ -396,6 +400,16 @@ int SeqErrorCommand::execute(){
 
                printSubMatrix();
                                
+               string megAlignmentFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".ref-query";
+               ofstream megAlignmentFile;
+               m->openOutputFile(megAlignmentFileName, megAlignmentFile);
+               
+               for(int i=0;i<numRefs;i++){
+                       megAlignmentFile << referenceSeqs[i].getInlineSeq() << endl;
+                       megAlignmentFile << megaAlignVector[i] << endl;
+               }
+               
+               
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
                for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
@@ -419,20 +433,34 @@ void SeqErrorCommand::getReferences(){
                
                int numAmbigSeqs = 0;
                
+               int maxStartPos = 0;
+               int minEndPos = 100000;
+               
                while(referenceFile){
                        Sequence currentSeq(referenceFile);
                        int numAmbigs = currentSeq.getAmbigBases();
                        if(numAmbigs > 0){      numAmbigSeqs++; }
+                       
+                       int startPos = currentSeq.getStartPos();
+                       if(startPos > maxStartPos)      {       maxStartPos = startPos; }
+
+                       int endPos = currentSeq.getEndPos();
+                       if(endPos < minEndPos)          {       minEndPos = endPos;             }
                        referenceSeqs.push_back(currentSeq);
                        m->gobble(referenceFile);
                }
                referenceFile.close();
+               numRefs = referenceSeqs.size();
+
+               for(int i=0;i<numRefs;i++){
+                       referenceSeqs[i].padToPos(maxStartPos);
+                       referenceSeqs[i].padFromPos(minEndPos);
+               }
                
                if(numAmbigSeqs != 0){
                        m->mothurOut("Warning: " + toString(numAmbigSeqs) + " reference sequences have ambiguous bases, these bases will be ignored\n");
-               }
+               }               
                
-               numRefs = referenceSeqs.size();
        }
        catch(exception& e) {
                m->errorOut(e, "SeqErrorCommand", "getReferences");
@@ -507,7 +535,6 @@ Compare SeqErrorCommand::getErrors(Sequence query, Sequence reference){
                                if(started == 1){       break;  }
                        }
                        else if(q[i] != '.' && r[i] == '.'){            //      query extends beyond reference
-//                             m->mothurOut("Warning: " + toString(query.getName()) + " extend beyond " + toString(reference.getName()) + ".  Ignoring the extra bases in the query\n");
                                if(started == 1){       break;  }
                        }
                        else if(q[i] == '.' && r[i] == '.'){            //      both are missing data
index 872a0f091d49fddac359d528c53c799a50ef3c66..6aa0c0f8ae2c46a32edde8a67276ccb966bc0003 100644 (file)
@@ -427,6 +427,13 @@ string Sequence::getAligned(){
        else                            {  return aligned;  }
 }
 
+//********************************************************************************************************************
+
+string Sequence::getInlineSeq(){
+       return name + '\t' + aligned;   
+}
+
+
 //********************************************************************************************************************
 
 string Sequence::getPairwise(){
@@ -514,7 +521,7 @@ int Sequence::getLongHomoPolymer(){
 //********************************************************************************************************************
 
 int Sequence::getStartPos(){
-       if(endPos == -1){
+       if(startPos == -1){
                for(int j = 0; j < alignmentLength; j++) {
                        if(aligned[j] != '.'){
                                startPos = j + 1;
@@ -529,6 +536,17 @@ int Sequence::getStartPos(){
 
 //********************************************************************************************************************
 
+void Sequence::padToPos(int start){
+
+       for(int j = startPos-1; j < start-1; j++) {
+               aligned[j] = '.';
+       }
+       startPos = start;
+
+}
+
+//********************************************************************************************************************
+
 int Sequence::getEndPos(){
        if(endPos == -1){
                for(int j=alignmentLength-1;j>=0;j--){
@@ -545,6 +563,17 @@ int Sequence::getEndPos(){
 
 //********************************************************************************************************************
 
+void Sequence::padFromPos(int end){
+       
+       for(int j = end; j < endPos; j++) {
+               aligned[j] = '.';
+       }
+       endPos = end;
+       
+}
+
+//********************************************************************************************************************
+
 bool Sequence::getIsAligned(){
        return isAligned;
 }
index 48287af367092715ab999b9ffabf02229c68cacc..94d7d29b6117da80862e9fc8b3d0855c4adb8edd 100644 (file)
@@ -44,9 +44,12 @@ public:
        string getAligned();
        string getPairwise();
        string getUnaligned();
+       string getInlineSeq();
        int getNumBases();
        int getStartPos();
        int getEndPos();
+       void padToPos(int);
+       void padFromPos(int);
        int getAlignLength();
        int getAmbigBases();
        void removeAmbigBases();