]> git.donarmstrong.com Git - mothur.git/blobdiff - sffinfocommand.cpp
changing command name classify.shared to classifyrf.shared
[mothur.git] / sffinfocommand.cpp
index c50255aeb2ae9c202074b78d89d460651cefdc43..c80ac2e51859331f17f02bcdf5abdd3f89bec6f3 100644 (file)
 //**********************************************************************************************************************
 vector<string> SffInfoCommand::setParameters(){        
        try {           
-               CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(psff);
-        CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
-               CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
-               CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "",false,false); parameters.push_back(psfftxt);
-               CommandParameter pflow("flow", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pflow);
-               CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(ptrim);
-               CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pfasta);
-               CommandParameter pqfile("name", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqfile);
-        CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
-               CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
-        CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
-               CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
-        CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(psff);
+        CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(poligos);
+               CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos);
+               CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "","",false,false); parameters.push_back(psfftxt);
+               CommandParameter pflow("flow", "Boolean", "", "T", "", "", "","flow",false,false); parameters.push_back(pflow);
+               CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(ptrim);
+               CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta);
+               CommandParameter pqfile("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqfile);
+        CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
+               CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
+        CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
+               CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
+        CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -70,32 +70,26 @@ string SffInfoCommand::getHelpString(){
                exit(1);
        }
 }
+
 //**********************************************************************************************************************
-string SffInfoCommand::getOutputFileNameTag(string type, string inputName=""){ 
-       try {
-        string outputFileName = "";
-               map<string, vector<string> >::iterator it;
+string SffInfoCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
         
-        //is this a type this command creates
-        it = outputTypes.find(type);
-        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
-        else {
-            if (type == "fasta")            {   outputFileName =  "fasta";   }
-            else if (type == "flow")    {   outputFileName =  "flow";   }
-            else if (type == "sfftxt")        {   outputFileName =  "sff.txt";   }
-            else if (type == "sff")        {   outputFileName =  "sff";   }
-            else if (type == "qfile")       {   outputFileName =  "qual";   }
-             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
-        }
-        return outputFileName;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "getOutputFileNameTag");
-               exit(1);
-       }
+        if (type == "fasta")            {   pattern =  "[filename],fasta-[filename],[tag],fasta";   }
+        else if (type == "flow")    {   pattern =  "[filename],flow";   }
+        else if (type == "sfftxt")        {   pattern =  "[filename],sff.txt";   }
+        else if (type == "sff")        {   pattern =  "[filename],[group],sff";   }
+        else if (type == "qfile")       {   pattern =  "[filename],qual-[filename],[tag],qual";   }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "SffInfoCommand", "getOutputPattern");
+        exit(1);
+    }
 }
-
-
 //**********************************************************************************************************************
 SffInfoCommand::SffInfoCommand(){      
        try {
@@ -504,16 +498,14 @@ int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){
         string rootName = outputDir + m->getRootName(m->getSimpleName(input));
         if(rootName.find_last_of(".") == rootName.npos){ rootName += "."; }
         
-               string sfftxtFileName = outputDir + m->getRootName(m->getSimpleName(input)) + getOutputFileNameTag("sfftxt");
-               string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(input)) + getOutputFileNameTag("flow");
-               if (trim) {
-                       outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + getOutputFileNameTag("fasta");
-                       outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + getOutputFileNameTag("qfile");
-               }else{
-                       outFastaFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw." + getOutputFileNameTag("fasta");
-                       outQualFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "raw." + getOutputFileNameTag("qfile");
-               }
-               
+        map<string, string> variables; 
+               variables["[filename]"] = rootName;
+               string sfftxtFileName = getOutputFileName("sfftxt",variables);
+               string outFlowFileName = getOutputFileName("flow",variables);
+               if (!trim) { variables["[tag]"] = "raw"; }
+               outFastaFileName = getOutputFileName("fasta",variables);
+        outQualFileName = getOutputFileName("qfile",variables);
+        
                if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint);  outputNames.push_back(sfftxtFileName);  outputTypes["sfftxt"].push_back(sfftxtFileName); }
                if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
                if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName);  }
@@ -654,7 +646,7 @@ int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){
                        char buffer3 [4];
                        in.read(buffer3, 4);
                        header.indexLength =  be_int4(*(unsigned int *)(&buffer3));
-                       
+            
                        //read num reads
                        char buffer4 [4];
                        in.read(buffer4, 4);
@@ -749,9 +741,19 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){
         in.read(mybuffer,8);
         for (int i = 0; i < filehandlesHeaders.size(); i++) {  
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
+                unsigned long long offset = 0;
+                char* thisbuffer = new char[8];
+                thisbuffer[0] = (offset >> 56) & 0xFF;
+                thisbuffer[1] = (offset >> 48) & 0xFF;
+                thisbuffer[2] = (offset >> 40) & 0xFF;
+                thisbuffer[3] = (offset >> 32) & 0xFF;
+                thisbuffer[4] = (offset >> 24) & 0xFF;
+                thisbuffer[5] = (offset >> 16) & 0xFF;
+                thisbuffer[6] = (offset >> 8) & 0xFF;
+                thisbuffer[7] = offset & 0xFF;
                 ofstream out;
                 m->openOutputFileAppend(filehandlesHeaders[i][j], out);
-                out.write(mybuffer, in.gcount()); 
+                out.write(thisbuffer, 8);
                 out.close();
             }
         }
@@ -765,7 +767,13 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
                 ofstream out;
                 m->openOutputFileAppend(filehandlesHeaders[i][j], out);
-                out.write(mybuffer, in.gcount()); 
+                int offset = 0;
+                char* thisbuffer = new char[4];
+                thisbuffer[0] = (offset >> 24) & 0xFF;
+                thisbuffer[1] = (offset >> 16) & 0xFF;
+                thisbuffer[2] = (offset >> 8) & 0xFF;
+                thisbuffer[3] = offset & 0xFF;
+                out.write(thisbuffer, 4);
                 out.close();
             }
         }
@@ -879,7 +887,7 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){
         for (int i = 0; i < filehandlesHeaders.size(); i++) { 
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
                 ofstream out;
-                m->openOutputFileAppend(filehandlesHeaders[i][j], out);
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
                 out.write(mybuffer, spot-spotInFile); 
                 out.close();
             }
@@ -928,7 +936,7 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, H
                        char buffer5 [2];
                        in.read(buffer5, 2);
                        header.clipQualRight =  be_int2(*(unsigned short *)(&buffer5));
-                       
+            
                        //read clipAdapterLeft
                        char buffer6 [2];
                        in.read(buffer6, 2);
@@ -997,7 +1005,7 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, H
                 char * mybuffer;
                 mybuffer = new char [spot-startSpotInFile];
                 ifstream in2;
-                m->openInputFile(currentFileName, in2);
+                in2.open(currentFileName.c_str(), ios::binary);
                 in2.seekg(startSpotInFile);
                 in2.read(mybuffer,spot-startSpotInFile);
                 in2.close();
@@ -1007,7 +1015,7 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, H
                                 
                 if(trashCodeLength == 0){
                     ofstream out;
-                    m->openOutputFileAppend(filehandles[barcodeIndex][primerIndex], out);
+                    m->openOutputFileBinaryAppend(filehandles[barcodeIndex][primerIndex], out);
                     out.write(mybuffer, in2.gcount()); 
                     out.close();
                     delete[] mybuffer;
@@ -1015,7 +1023,7 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, H
                                }
                                else{
                                        ofstream out;
-                    m->openOutputFileAppend(noMatchFile, out);
+                    m->openOutputFileBinaryAppend(noMatchFile, out);
                     out.write(mybuffer, in2.gcount()); 
                     out.close();
                     delete[] mybuffer;
@@ -1047,7 +1055,11 @@ int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& pr
         
         if (trim) {
             if(header.clipQualRight < header.clipQualLeft){
-                seq = "NNNN";
+                if (header.clipQualRight == 0) { //don't trim right
+                    seq = seq.substr(header.clipQualLeft-1);
+                }else {
+                    seq = "NNNN";
+                }
             }
             else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
                 seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));
@@ -1058,11 +1070,12 @@ int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& pr
         }else{
             //if you wanted the sfftxt then you already converted the bases to the right case
             if (!sfftxt) {
+                int endValue = header.clipQualRight;
                 //make the bases you want to clip lowercase and the bases you want to keep upper case
-                if(header.clipQualRight == 0){ header.clipQualRight = seq.length();    }
+                if(endValue == 0){     endValue = seq.length();        }
                 for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]);  }
-                for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++)  {   seq[i] = toupper(seq[i]);  }
-                for (int i = (header.clipQualRight-1); i < seq.length(); i++) {   seq[i] = tolower(seq[i]);  }
+                for (int i = (header.clipQualLeft-1); i < (endValue-1); i++)  {   seq[i] = toupper(seq[i]);  }
+                for (int i = (endValue-1); i < seq.length(); i++) {   seq[i] = tolower(seq[i]);  }
             }
         }
         
@@ -1243,10 +1256,11 @@ int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& hea
                for (int i = 0; i < read.flowIndex.size(); i++) {  sum +=  read.flowIndex[i];  out << sum << '\t'; }
                
                //make the bases you want to clip lowercase and the bases you want to keep upper case
-               if(header.clipQualRight == 0){  header.clipQualRight = read.bases.length();     }
+        int endValue = header.clipQualRight;
+               if(endValue == 0){      endValue = read.bases.length(); }
                for (int i = 0; i < (header.clipQualLeft-1); i++) { read.bases[i] = tolower(read.bases[i]); }
-               for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) {   read.bases[i] = toupper(read.bases[i]);  }
-               for (int i = (header.clipQualRight-1); i < read.bases.length(); i++) {   read.bases[i] = tolower(read.bases[i]);  }
+               for (int i = (header.clipQualLeft-1); i < (endValue-1); i++) {   read.bases[i] = toupper(read.bases[i]);  }
+               for (int i = (endValue-1); i < read.bases.length(); i++) {   read.bases[i] = tolower(read.bases[i]);  }
                
                out << endl <<  "Bases: " << read.bases << endl << "Quality Scores: ";
                for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }
@@ -1268,7 +1282,11 @@ int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& head
                
         if (trim) {
                        if(header.clipQualRight < header.clipQualLeft){
-                               seq = "NNNN";
+                               if (header.clipQualRight == 0) { //don't trim right
+                    seq = seq.substr(header.clipQualLeft-1);
+                }else {
+                    seq = "NNNN";
+                }
                        }
                        else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
                                seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));
@@ -1279,11 +1297,12 @@ int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& head
                }else{
                        //if you wanted the sfftxt then you already converted the bases to the right case
                        if (!sfftxt) {
+                int endValue = header.clipQualRight;
                                //make the bases you want to clip lowercase and the bases you want to keep upper case
-                               if(header.clipQualRight == 0){  header.clipQualRight = seq.length();    }
+                               if(endValue == 0){      endValue = seq.length();        }
                                for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]);  }
-                               for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++)  {   seq[i] = toupper(seq[i]);  }
-                               for (int i = (header.clipQualRight-1); i < seq.length(); i++) {   seq[i] = tolower(seq[i]);  }
+                               for (int i = (header.clipQualLeft-1); i < (endValue-1); i++)  {   seq[i] = toupper(seq[i]);  }
+                               for (int i = (endValue-1); i < seq.length(); i++) {   seq[i] = tolower(seq[i]);  }
                        }
                }
                
@@ -1304,8 +1323,13 @@ int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& heade
                
                if (trim) {
                        if(header.clipQualRight < header.clipQualLeft){
-                               out << ">" << header.name << " xy=" << header.xy << endl;
-                               out << "0\t0\t0\t0";
+                if (header.clipQualRight == 0) { //don't trim right
+                    out << ">" << header.name << " xy=" << header.xy << " length=" << (read.qualScores.size()-header.clipQualLeft) << endl;
+                    for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';      }       
+                }else {
+                    out << ">" << header.name << " xy=" << header.xy << endl;
+                    out << "0\t0\t0\t0";
+                }
                        }
                        else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
                                out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
@@ -1333,15 +1357,21 @@ int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& heade
 //**********************************************************************************************************************
 int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) {
        try {
-               if(header.clipQualRight > header.clipQualLeft){
-                       
-                       int rightIndex = 0;
-                       for (int i = 0; i < header.clipQualRight; i++) {  rightIndex +=  read.flowIndex[i];     }
-
-                       out << header.name << ' ' << rightIndex;
-                       for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << ' ' << (read.flowgram[i]/(float)100);  }
-                       out << endl;
-               }
+        
+        int endValue = header.clipQualRight;
+        if (header.clipQualRight == 0) {
+            endValue = read.flowIndex.size();
+            if (m->debug) { m->mothurOut("[DEBUG]: " + header.name + " has clipQualRight=0.\n"); }
+        }
+        if(endValue > header.clipQualLeft){
+            
+            int rightIndex = 0;
+            for (int i = 0; i < endValue; i++) {  rightIndex +=  read.flowIndex[i];     }
+            
+            out << header.name << ' ' << rightIndex;
+            for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << ' ' << (read.flowgram[i]/(float)100);  }
+            out << endl;
+        }
                
                
                return 0;
@@ -1396,14 +1426,13 @@ int SffInfoCommand::parseSffTxt() {
                        fileRoot = m->getRootName(fileRoot);
                }
                
-               string outFlowFileName = outputDir + fileRoot + getOutputFileNameTag("flow");
-               if (trim) {
-                       outFastaFileName = outputDir + fileRoot + getOutputFileNameTag("fasta");
-                       outQualFileName = outputDir + fileRoot + getOutputFileNameTag("qfile");
-               }else{
-                       outFastaFileName = outputDir + fileRoot + "raw." + getOutputFileNameTag("fasta");
-                       outQualFileName = outputDir + fileRoot + "raw." + getOutputFileNameTag("qfile");
-               }
+        map<string, string> variables; 
+               variables["[filename]"] = fileRoot;
+               string sfftxtFileName = getOutputFileName("sfftxt",variables);
+               string outFlowFileName = getOutputFileName("flow",variables);
+               if (!trim) { variables["[tag]"] = "raw"; }
+               outFastaFileName = getOutputFileName("fasta",variables);
+        outQualFileName = getOutputFileName("qfile",variables);
                
                if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
                if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName);  }
@@ -1650,7 +1679,7 @@ bool SffInfoCommand::readOligos(string oligoFile){
                                        // get rest of line in case there is a primer name
                                        while (!inOligos.eof()) {       
                                                char c = inOligos.get(); 
-                                               if (c == 10 || c == 13){        break;  }
+                                               if (c == 10 || c == 13 || c == -1){     break;  }
                                                else if (c == 32 || c == 9){;} //space or tab
                                                else {  group += c;  }
                                        } 
@@ -1731,7 +1760,10 @@ bool SffInfoCommand::readOligos(string oligoFile){
                                        }
                                        
                                        ofstream temp;
-                                       string thisFilename = outputDir + m->getRootName(m->getSimpleName(currentFileName)) + comboGroupName + "." + getOutputFileNameTag("sff");
+                    map<string, string> variables; 
+                    variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));
+                    variables["[group]"] = comboGroupName;
+                                       string thisFilename = getOutputFileName("sff",variables);
                                        if (uniqueNames.count(thisFilename) == 0) {
                                                outputNames.push_back(thisFilename);
                                                outputTypes["sff"].push_back(thisFilename);
@@ -1746,7 +1778,10 @@ bool SffInfoCommand::readOligos(string oligoFile){
                numFPrimers = primers.size();
         numLinkers = linker.size();
         numSpacers = spacer.size();
-               noMatchFile = outputDir + m->getRootName(m->getSimpleName(currentFileName)) + "scrap." + getOutputFileNameTag("sff");
+        map<string, string> variables; 
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));
+        variables["[group]"] = "scrap";
+               noMatchFile = getOutputFileName("sff",variables);
         m->mothurRemove(noMatchFile);
         
                bool allBlank = true;