]> git.donarmstrong.com Git - mothur.git/commitdiff
changed reading of name file to use buffered reads. note the splitAtWhiteSpace functi...
authorSarah Westcott <mothur.westcott@gmail.com>
Tue, 12 Jun 2012 15:27:51 +0000 (11:27 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Tue, 12 Jun 2012 15:27:51 +0000 (11:27 -0400)
14 files changed:
aligncommand.cpp
aligncommand.h
classifyseqscommand.cpp
classifyseqscommand.h
consensusseqscommand.cpp
createdatabasecommand.cpp
createdatabasecommand.h
groupmap.cpp
mothurout.cpp
mothurout.h
screenseqscommand.h
subsamplecommand.cpp
trimseqscommand.cpp
trimseqscommand.h

index 8215de301b646f4d4571944de85dddb40478e7e5..75466f95b242ae08a71a88a40efec3a8fd230c86 100644 (file)
@@ -875,7 +875,7 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s
                        if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
                        in.close(); m->mothurRemove(tempFile);
                        
-                       appendAlignFiles((alignFileName + toString(processIDS[i]) + ".temp"), alignFileName);
+                       m->appendFiles((alignFileName + toString(processIDS[i]) + ".temp"), alignFileName);
                        m->mothurRemove((alignFileName + toString(processIDS[i]) + ".temp"));
                        
                        appendReportFiles((reportFileName + toString(processIDS[i]) + ".temp"), reportFileName);
@@ -892,7 +892,7 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s
                        rename(nonBlankAccnosFiles[0].c_str(), accnosFName.c_str());
                        
                        for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
-                               appendAlignFiles(nonBlankAccnosFiles[h], accnosFName);
+                               m->appendFiles(nonBlankAccnosFiles[h], accnosFName);
                                m->mothurRemove(nonBlankAccnosFiles[h]);
                        }
                }else { //recreate the accnosfile if needed
@@ -957,7 +957,7 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s
                else { m->mothurRemove(accnosFName); } //remove so other files can be renamed to it
                
                for (int i = 1; i < processors; i++) {
-                       appendAlignFiles((alignFileName + toString(i) + ".temp"), alignFileName);
+                       m->appendFiles((alignFileName + toString(i) + ".temp"), alignFileName);
                        m->mothurRemove((alignFileName + toString(i) + ".temp"));
                        
                        appendReportFiles((reportFileName + toString(i) + ".temp"), reportFileName);
@@ -973,7 +973,7 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s
                        rename(nonBlankAccnosFiles[0].c_str(), accnosFName.c_str());
                        
                        for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
-                               appendAlignFiles(nonBlankAccnosFiles[h], accnosFName);
+                               m->appendFiles(nonBlankAccnosFiles[h], accnosFName);
                                m->mothurRemove(nonBlankAccnosFiles[h]);
                        }
                }else { //recreate the accnosfile if needed
@@ -990,29 +990,6 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s
                exit(1);
        }
 }
-/**************************************************************************************************/
-
-void AlignCommand::appendAlignFiles(string temp, string filename) {
-       try{
-               
-               ofstream output;
-               ifstream input;
-               m->openOutputFileAppend(filename, output);
-               m->openInputFile(temp, input);
-               
-               while(char c = input.get()){
-                       if(input.eof())         {       break;                  }
-                       else                            {       output << c;    }
-               }
-               
-               input.close();
-               output.close();
-       }
-       catch(exception& e) {
-               m->errorOut(e, "AlignCommand", "appendAlignFiles");
-               exit(1);
-       }
-}
 //**********************************************************************************************************************
 
 void AlignCommand::appendReportFiles(string temp, string filename) {
@@ -1025,10 +1002,11 @@ void AlignCommand::appendReportFiles(string temp, string filename) {
 
                while (!input.eof())    {       char c = input.get(); if (c == 10 || c == 13){  break;  }       } // get header line
                                
-               while(char c = input.get()){
-                       if(input.eof())         {       break;                  }
-                       else                            {       output << c;    }
-               }
+        char buffer[4096];        
+        while (!input.eof()) {
+            input.read(buffer, 4096);
+            output.write(buffer, input.gcount());
+        }
                
                input.close();
                output.close();
index 7eeaa1eb7e7bc3e2dc28d144d646b2190eb34983..d4b7e78ce01ba2b9e6507a2b5a502bae9dbb8b68 100644 (file)
@@ -55,7 +55,6 @@ private:
        
        int driver(linePair*, string, string, string, string);
        int createProcesses(string, string, string, string);
-       void appendAlignFiles(string, string); 
        void appendReportFiles(string, string);
        
        #ifdef USE_MPI
index b6dc24fe5751867c6b79f71fb50e3b7b74ad75a6..158069e7ae596e3eebd6a9d2da5834ec3529dcdc 100644 (file)
@@ -919,8 +919,8 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile,
                else { m->mothurRemove(accnos); } //remove so other files can be renamed to it
         
                for(int i=0;i<processIDS.size();i++){
-                       appendTaxFiles((taxFileName + toString(processIDS[i]) + ".temp"), taxFileName);
-                       appendTaxFiles((tempTaxFile + toString(processIDS[i]) + ".temp"), tempTaxFile);
+                       m->appendFiles((taxFileName + toString(processIDS[i]) + ".temp"), taxFileName);
+                       m->appendFiles((tempTaxFile + toString(processIDS[i]) + ".temp"), tempTaxFile);
             if (!(m->isBlank(accnos + toString(processIDS[i]) + ".temp"))) {
                                nonBlankAccnosFiles.push_back(accnos + toString(processIDS[i]) + ".temp");
                        }else { m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));  }
@@ -934,7 +934,7 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile,
                        rename(nonBlankAccnosFiles[0].c_str(), accnos.c_str());
                        
                        for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
-                               appendTaxFiles(nonBlankAccnosFiles[h], accnos);
+                               m->appendFiles(nonBlankAccnosFiles[h], accnos);
                                m->mothurRemove(nonBlankAccnosFiles[h]);
                        }
                }else { //recreate the accnosfile if needed
@@ -951,30 +951,6 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile,
                exit(1);
        }
 }
-/**************************************************************************************************/
-
-void ClassifySeqsCommand::appendTaxFiles(string temp, string filename) {
-       try{
-               
-               ofstream output;
-               ifstream input;
-               m->openOutputFileAppend(filename, output);
-               m->openInputFile(temp, input);
-               
-               while(char c = input.get()){
-                       if(input.eof())         {       break;                  }
-                       else                            {       output << c;    }
-               }
-               
-               input.close();
-               output.close();
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ClassifySeqsCommand", "appendTaxFiles");
-               exit(1);
-       }
-}
-
 //**********************************************************************************************************************
 
 int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempTFName, string accnos, string filename){
index acee70c5cb474dee4e25eea4a6dcf6b5bd55fa06..1316a250cc8418843e542dd446e021769d6f6ebd 100644 (file)
@@ -75,7 +75,6 @@ private:
        bool abort, probs, save, flip;
        
        int driver(linePair*, string, string, string, string);
-       void appendTaxFiles(string, string);
        int createProcesses(string, string, string, string); 
        string addUnclassifieds(string, int);
        
index 223e5dbb86600907fafaafdf5af48ee665adfc5b..55ec8023270cd17d38b830f519ad9f62518e635e 100644 (file)
@@ -653,38 +653,29 @@ int ConsensusSeqsCommand::readFasta(){
 
 int ConsensusSeqsCommand::readNames(){
         try{
-                
-                ifstream in;
-                m->openInputFile(namefile, in);
-                
-                string thisname, repnames;
-                map<string, string>::iterator it;
-                
-                bool error = false;
-                
-                while(!in.eof()){
-                        
-                        if (m->control_pressed) { break; }
-                        
-                        in >> thisname;                m->gobble(in);          //read from first column
-                        in >> repnames;                        //read from second column
-                        
-                        it = nameMap.find(thisname);
+         map<string, string> temp;
+         map<string, string>::iterator it;
+         bool error = false;
+         
+         m->readNames(namefile, temp); //use central buffered read
+         
+         for (map<string, string>::iterator itTemp = temp.begin(); itTemp != temp.end(); itTemp++) {
+             string thisname, repnames;
+             thisname = itTemp->first;
+             repnames = itTemp->second;
+             
+             it = nameMap.find(thisname);
                         if (it != nameMap.end()) { //then this sequence was in the fastafile
-                                
+                                nameFileMap[thisname] = repnames;      //for later when outputting the new namesFile if the list file is unique
+                 
                                 vector<string> splitRepNames;
                                 m->splitAtComma(repnames, splitRepNames);
                                 
-                                nameFileMap[thisname] = repnames;      //for later when outputting the new namesFile if the list file is unique
                                 for (int i = 0; i < splitRepNames.size(); i++) { nameMap[splitRepNames[i]] = thisname; }
                                 
                         }else{ m->mothurOut("[ERROR]: " + thisname + " is not in the fasta file, please correct."); m->mothurOutEndLine(); error = true; }
-                        
-                        m->gobble(in);
-                }
-                
-                in.close();
-                
+         }
+         
                 if (error) { m->control_pressed = true; }
  
                 return 0;
index 1da67e6d8694096da74ce8f855b27cf3b96d2f89..57d5264b8b151d4175b6754ab2ee75732e1e597e 100644 (file)
@@ -209,7 +209,7 @@ int CreateDatabaseCommand::execute(){
         
         //names redundants to uniques. backwards to how we normally do it, but each bin is the list file will be a key entry in the map.
         map<string, string> repNames;
-        int numUniqueNamesFile = readNames(repNames);
+        int numUniqueNamesFile = m->readNames(repnamesfile, repNames);
         
         //are there the same number of otus in the fasta and name files
         if (repOtusSizes.size() != numUniqueNamesFile) { m->mothurOut("[ERROR]: you have " + toString(numUniqueNamesFile) + " unique seqs in your repname file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file.  These should match.\n"); m->control_pressed = true; }
@@ -394,32 +394,6 @@ vector<int> CreateDatabaseCommand::readFasta(vector<Sequence>& seqs){
                exit(1);
        }
 }
-/**********************************************************************************************************************/
-int CreateDatabaseCommand::readNames(map<string, string>& nameMap) { 
-       try {
-               
-               //open input file
-               ifstream in;
-               m->openInputFile(repnamesfile, in);
-               
-               while (!in.eof()) {
-                       if (m->control_pressed) { break; }
-                       
-                       string firstCol, secondCol;
-                       in >> firstCol >> secondCol; m->gobble(in);
-                       
-                       nameMap[secondCol] = firstCol;
-               }
-               in.close();
-               
-               return nameMap.size();
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "CreateDatabaseCommand", "readNames");
-               exit(1);
-       }
-}
 //**********************************************************************************************************************
 ListVector* CreateDatabaseCommand::getList(){
        try {
index 643ff6ec9fc3691c4a9de5f146cbfb1fdcaff354..37e3013607f5f19585a39299a7c22b92a7e8cd24 100644 (file)
@@ -39,7 +39,6 @@ private:
                
        vector<int> readFasta(vector<Sequence>&);
     vector<int> readTax(vector<string>&);
-    int readNames(map<string, string>&); 
        ListVector* getList();
        
 };
index 92a43e965044c06c2fdcaca78d50779440b301ab..612b2364d617432d64819413b8d8eb03d9865a64 100644 (file)
 
 /************************************************************/
 int GroupMap::readMap() {
-               string seqName, seqGroup;
+    try {
+        string seqName, seqGroup;
                int error = 0;
-
-               while(fileHandle){
-                       fileHandle >> seqName;  m->gobble(fileHandle);          //read from first column
-                       fileHandle >> seqGroup;                 //read from second column
-                       
-                       if (m->control_pressed) {  fileHandle.close();  return 1; }
-       
-                       setNamesOfGroups(seqGroup);
-                       
-                       it = groupmap.find(seqName);
-                       
-                       if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
-                       else {
-                               groupmap[seqName] = seqGroup;   //store data in map
-                               seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
-                       }
-                       m->gobble(fileHandle);
-               }
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
+    
+        while (!fileHandle.eof()) {
+            if (m->control_pressed) { fileHandle.close();  return 1; }
+        
+            fileHandle.read(buffer, 4096);
+            vector<string> pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount());
+        
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  seqName = pieces[i]; columnOne=false; }
+                else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
+            
+                if (pairDone) { 
+                    setNamesOfGroups(seqGroup);
+                    
+                    it = groupmap.find(seqName);
+                    
+                    if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
+                    else {
+                        groupmap[seqName] = seqGroup;  //store data in map
+                        seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
+                    }
+                    pairDone = false; 
+                } 
+            }
+        }
                fileHandle.close();
+        
                m->setAllGroups(namesOfGroups);
                return error;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "GroupMap", "readMap");
+               exit(1);
+       }
 }
 /************************************************************/
 int GroupMap::readDesignMap() {
-               string seqName, seqGroup;
+    try {
+        string seqName, seqGroup;
                int error = 0;
-
-               while(fileHandle){
-                       fileHandle >> seqName;  m->gobble(fileHandle);          //read from first column
-                       fileHandle >> seqGroup;                 //read from second column
-                       
-                       if (m->control_pressed) {  fileHandle.close();  return 1; }
-       
-                       setNamesOfGroups(seqGroup);
-                       
-                       it = groupmap.find(seqName);
-                       
-                       if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 group named " + seqName + ", group names must be unique. Please correct."); m->mothurOutEndLine();  }
-                       else {
-                               groupmap[seqName] = seqGroup;   //store data in map
-                               seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
-                       }
-                       m->gobble(fileHandle);
-               }
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
+        
+        while (!fileHandle.eof()) {
+            if (m->control_pressed) { fileHandle.close();  return 1; }
+            
+            fileHandle.read(buffer, 4096);
+            vector<string> pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount());
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  seqName = pieces[i]; columnOne=false; }
+                else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    setNamesOfGroups(seqGroup);
+                    
+                    it = groupmap.find(seqName);
+                    
+                    if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
+                    else {
+                        groupmap[seqName] = seqGroup;  //store data in map
+                        seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
+                    }
+                    pairDone = false; 
+                } 
+            }
+        }
                fileHandle.close();
+        
                m->setAllGroups(namesOfGroups);
                return error;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "GroupMap", "readDesignMap");
+               exit(1);
+       }
 }
 /************************************************************/
 int GroupMap::readDesignMap(string filename) {
-    groupFileName = filename;
-       m->openInputFile(filename, fileHandle);
-       index = 0;
-    string seqName, seqGroup;
-    int error = 0;
-    
-    while(fileHandle){
-        fileHandle >> seqName; m->gobble(fileHandle);          //read from first column
-        fileHandle >> seqGroup;                        //read from second column
-        
-        if (m->control_pressed) {  fileHandle.close();  return 1; }
-        
-        setNamesOfGroups(seqGroup);
-        
-        it = groupmap.find(seqName);
+    try {
+        groupFileName = filename;
+        m->openInputFile(filename, fileHandle);
+        index = 0;
+        string seqName, seqGroup;
+               int error = 0;
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
         
-        if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 group named " + seqName + ", group names must be unique. Please correct."); m->mothurOutEndLine();  }
-        else {
-            groupmap[seqName] = seqGroup;      //store data in map
-            seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
+        while (!fileHandle.eof()) {
+            if (m->control_pressed) { fileHandle.close();  return 1; }
+            
+            fileHandle.read(buffer, 4096);
+            vector<string> pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount());
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  seqName = pieces[i]; columnOne=false; }
+                else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    setNamesOfGroups(seqGroup);
+                    
+                    it = groupmap.find(seqName);
+                    
+                    if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
+                    else {
+                        groupmap[seqName] = seqGroup;  //store data in map
+                        seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
+                    }
+                    pairDone = false; 
+                } 
+            }
         }
-        m->gobble(fileHandle);
+               fileHandle.close();
+        
+               m->setAllGroups(namesOfGroups);
+               return error;
     }
-    fileHandle.close();
-    m->setAllGroups(namesOfGroups);
-    return error;
+       catch(exception& e) {
+               m->errorOut(e, "GroupMap", "readDesignMap");
+               exit(1);
+       }
 }
 /************************************************************/
 int GroupMap::getNumGroups() { return namesOfGroups.size();    }
index 6ecb86f94e5533165551ed5ae571f0c51f3c9ff9..f98bea8ca92fbb387ad0d3727dbfbc858da49583 100644 (file)
@@ -1092,11 +1092,14 @@ int MothurOut::appendFiles(string temp, string filename) {
                
                int numLines = 0;
                if (ableToOpen == 0) { //you opened it
-                       while(!input.eof()){
-                char c = input.get();
-                               if(input.eof())         {       break;                  }
-                               else                            {       output << c;    if (c == '\n') {numLines++;} }
-                       }
+            
+            char buffer[4096];        
+            while (!input.eof()) {
+                input.read(buffer, 4096);
+                output.write(buffer, input.gcount());
+                //count number of lines
+                for (int i = 0; i < input.gcount(); i++) {  if (buffer[i] == '\n') {numLines++;} }
+            }
                        input.close();
                }
                
@@ -1454,6 +1457,30 @@ float MothurOut::ceilDist(float dist, int precision){
                exit(1);
        }
 }
+/***********************************************************************/
+
+vector<string> MothurOut::splitWhiteSpace(string& rest, char buffer[], int size){
+       try {
+        vector<string> pieces;
+        
+        for (int i = 0; i < size; i++) {
+            if (!isspace(buffer[i]))  { rest += buffer[i];  }
+            else {
+                pieces.push_back(rest);  rest = "";
+                while (i < size) {  //gobble white space
+                    if (isspace(buffer[i])) { i++; }
+                    else { rest = buffer[i];  break; } //cout << "next piece buffer = " << nextPiece << endl;
+                } 
+            }
+        }
+        
+        return pieces;
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "parsePieces");
+               exit(1);
+       }
+}
 /**********************************************************************************************************************/
 int MothurOut::readNames(string namefile, map<string, string>& nameMap) { 
        try {
@@ -1461,14 +1488,25 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
                //open input file
                ifstream in;
                openInputFile(namefile, in);
-               
+
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
+        string firstCol, secondCol;
+        
                while (!in.eof()) {
                        if (control_pressed) { break; }
                        
-                       string firstCol, secondCol;
-                       in >> firstCol >> secondCol; gobble(in);
-                       
-                       nameMap[firstCol] = secondCol;
+            in.read(buffer, 4096);
+            vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+             
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; }
+            }
                }
                in.close();
                
@@ -1488,21 +1526,33 @@ int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap)
                ifstream in;
                openInputFile(namefile, in);
                
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
+        string firstCol, secondCol;
+        
                while (!in.eof()) {
                        if (control_pressed) { break; }
                        
-                       string firstCol, secondCol;
-                       in >> firstCol >> secondCol; gobble(in);
-                       
-                       vector<string> temp;
-                       splitAtComma(secondCol, temp);
-                       
-                       nameMap[firstCol] = temp;
+            in.read(buffer, 4096);
+            vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    vector<string> temp;
+                    splitAtComma(secondCol, temp);
+                    nameMap[firstCol] = temp;
+                    pairDone = false;  
+                } 
+            }
                }
                in.close();
-               
+        
                return nameMap.size();
-               
        }
        catch(exception& e) {
                errorOut(e, "MothurOut", "readNames");
@@ -1519,18 +1569,30 @@ map<string, int> MothurOut::readNames(string namefile) {
                ifstream in;
                openInputFile(namefile, in);
                
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
+        string firstCol, secondCol;
+        
                while (!in.eof()) {
                        if (control_pressed) { break; }
                        
-                       string firstCol, secondCol;
-                       in >> firstCol;  gobble(in);
-            in >> secondCol; gobble(in);
-                       
-                       int num = getNumNames(secondCol);
-                       
-                       nameMap[firstCol] = num;
+            in.read(buffer, 4096);
+            vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    int num = getNumNames(secondCol);
+                    nameMap[firstCol] = num;
+                    pairDone = false;  
+                } 
+            }
                }
-               in.close();
+        in.close();
                
                return nameMap;
                
@@ -1549,27 +1611,41 @@ int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, m
                ifstream in;
                openInputFile(namefile, in);
                
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
+        string firstCol, secondCol;
+        
                while (!in.eof()) {
                        if (control_pressed) { break; }
                        
-                       string firstCol, secondCol;
-                       in >> firstCol >> secondCol; gobble(in);
-                       
-                       int num = getNumNames(secondCol);
-                       
-                       map<string, string>::iterator it = fastamap.find(firstCol);
-                       if (it == fastamap.end()) {
-                               error = 1;
-                               mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
-                       }else {
-                               seqPriorityNode temp(num, it->second, firstCol);
-                               nameVector.push_back(temp);
-                       }
+            in.read(buffer, 4096);
+            vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    int num = getNumNames(secondCol);
+                    
+                    map<string, string>::iterator it = fastamap.find(firstCol);
+                    if (it == fastamap.end()) {
+                        error = 1;
+                        mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
+                    }else {
+                        seqPriorityNode temp(num, it->second, firstCol);
+                        nameVector.push_back(temp);
+                    }
+                    
+                    pairDone = false;  
+                } 
+            }
                }
-               in.close();
-               
+        in.close();
+        
                return error;
-               
        }
        catch(exception& e) {
                errorOut(e, "MothurOut", "readNames");
index cc8bfb6b101b13b0ebe45a2eb2f5472600d6e0fc..b19c05a68381c3ca96e19264e0eca931e22a0895 100644 (file)
@@ -97,6 +97,7 @@ class MothurOut {
                string getline(istringstream&);
                void gobble(istream&);
                void gobble(istringstream&);
+        vector<string> splitWhiteSpace(string& rest, char[], int);
                map<string, int> readNames(string);
                int readNames(string, map<string, string>&);
                int readNames(string, map<string, vector<string> >&);
index 291d8e6d1acd4605f4827159f10aa74269e1a771..54c8fbb76a9f8db6d200b02feb2e28ce106682d0 100644 (file)
@@ -60,7 +60,6 @@ private:
        vector<string> outputNames;
        vector<string> optimize;
        map<string, int> nameMap;
-       int readNames();
        
        int getSummary(vector<unsigned long long>&);
        int createProcessesCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string);
index 717b1d3231c20368a2d23e9b86d52a6707f03b12..aebba6bbc430a6608d5fe8bb0a31ef9ebd58c4f4 100644 (file)
@@ -639,34 +639,13 @@ int SubSampleCommand::getNames() {
 int SubSampleCommand::readNames() {
        try {
                
-               ifstream in;
-               m->openInputFile(namefile, in);
-               
-               string thisname, repnames;
-               map<string, vector<string> >::iterator it;
-               
-               while(!in.eof()){
-                       
-                       if (m->control_pressed) { in.close(); return 0; }
-                       
-                       in >> thisname;         m->gobble(in);          //read from first column
-                       in >> repnames;                 //read from second column
-                       
-                       it = nameMap.find(thisname);
-                       if (it == nameMap.end()) {
-                               
-                               vector<string> splitRepNames;
-                               m->splitAtComma(repnames, splitRepNames);
-                               
-                               nameMap[thisname] = splitRepNames;      
-                               for (int i = 0; i < splitRepNames.size(); i++) { names.push_back(splitRepNames[i]); }
-                               
-                       }else{  m->mothurOut(thisname + " is already in namesfile. I will use first definition."); m->mothurOutEndLine();  }
-                       
-                       m->gobble(in);
-               }
-               in.close();     
-               
+        nameMap.clear();
+        m->readNames(namefile, nameMap);
+        
+        //save names of all sequences
+        map<string, vector<string> >::iterator it;
+        for (it = nameMap.begin(); it != nameMap.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { names.push_back((it->second)[i]); } }
+        
                return 0;
                
        }
index c019a70e4a2a7d35374192b3f8cca11787e7ecef..6f5bb979a4d45b9fc3d8201848aa9f6b2d2dddb7 100644 (file)
@@ -687,6 +687,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                                currQual.printQScores(trimQualFile);
                                        }
                                        
+                    
                                        if(nameFile != ""){
                                                map<string, string>::iterator itName = nameMap.find(currSeq.getName());
                                                if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
@@ -708,11 +709,13 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                                        
                                                        outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
                                                        
+                            int numRedundants = 0;
                                                        if (nameFile != "") {
                                                                map<string, string>::iterator itName = nameMap.find(currSeq.getName());
                                                                if (itName != nameMap.end()) { 
                                                                        vector<string> thisSeqsNames; 
                                                                        m->splitAtChar(itName->second, thisSeqsNames, ',');
+                                    numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
                                                                        for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
                                                                                outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
                                                                        }
@@ -720,8 +723,8 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                                        }
                                                        
                                                        map<string, int>::iterator it = groupCounts.find(thisGroup);
-                                                       if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
-                                                       else { groupCounts[it->first]++; }
+                                                       if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1 + numRedundants; }
+                                                       else { groupCounts[it->first] += (1 + numRedundants); }
                                                                
                                                }
                                        }
index ba4e61411b8498820139e8ae2b1ed71cb37b0432..e280c8a1d4b50336f125dda5788d91d2af76c09a 100644 (file)
@@ -413,11 +413,13 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                                        
                                                        outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
                                                        
+                            int numRedundants = 0;
                                                        if (pDataArray->nameFile != "") {
                                                                map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
                                                                if (itName != pDataArray->nameMap.end()) { 
                                                                        vector<string> thisSeqsNames; 
                                                                        pDataArray->m->splitAtChar(itName->second, thisSeqsNames, ',');
+                                    numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
                                                                        for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
                                                                                outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
                                                                        }
@@ -425,8 +427,8 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                                        }
                                                        
                                                        map<string, int>::iterator it = pDataArray->groupCounts.find(thisGroup);
-                                                       if (it == pDataArray->groupCounts.end()) {      pDataArray->groupCounts[thisGroup] = 1; }
-                                                       else { pDataArray->groupCounts[it->first]++; }
+                                                       if (it == pDataArray->groupCounts.end()) {      pDataArray->groupCounts[thisGroup] = 1 + numRedundants; }
+                                                       else { pDataArray->groupCounts[it->first] += (1 + numRedundants); }
                             
                                                }
                                        }