X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=sffinfocommand.cpp;h=399f253570feaefcd39f979a0af2b7f3c5bb8632;hb=6b32d112bb60e9f7eb6d4407a4eed4c49b67bced;hp=a1b7066482b973bcbaeebefdc718c5c96d70aae9;hpb=1a5c2356c1b955c6ec024b2baf9f46377ee7c72e;p=mothur.git diff --git a/sffinfocommand.cpp b/sffinfocommand.cpp index a1b7066..399f253 100644 --- a/sffinfocommand.cpp +++ b/sffinfocommand.cpp @@ -559,6 +559,7 @@ int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){ if (m->control_pressed) { count = 0; break; } if (count >= header.numReads) { break; } + //if (count >= 100) { break; } } //report progress @@ -646,11 +647,13 @@ int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){ char buffer3 [4]; in.read(buffer3, 4); header.indexLength = be_int4(*(unsigned int *)(&buffer3)); - + //read num reads char buffer4 [4]; in.read(buffer4, 4); header.numReads = be_int4(*(unsigned int *)(&buffer4)); + + if (m->debug) { m->mothurOut("[DEBUG]: numReads = " + toString(header.numReads) + "\n"); } //read header length char buffer5 [2]; @@ -716,7 +719,7 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { ofstream out; - m->openOutputFileAppend(filehandlesHeaders[i][j], out); + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); out.write(mybuffer, in.gcount()); out.close(); } @@ -729,7 +732,7 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { ofstream out; - m->openOutputFileAppend(filehandlesHeaders[i][j], out); + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); out.write(mybuffer, in.gcount()); out.close(); } @@ -741,9 +744,19 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ in.read(mybuffer,8); for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { + unsigned long long offset = 0; + char* thisbuffer = new char[8]; + thisbuffer[0] = (offset >> 56) & 0xFF; + thisbuffer[1] = (offset >> 48) & 0xFF; + thisbuffer[2] = (offset >> 40) & 0xFF; + thisbuffer[3] = (offset >> 32) & 0xFF; + thisbuffer[4] = (offset >> 24) & 0xFF; + thisbuffer[5] = (offset >> 16) & 0xFF; + thisbuffer[6] = (offset >> 8) & 0xFF; + thisbuffer[7] = offset & 0xFF; ofstream out; - m->openOutputFileAppend(filehandlesHeaders[i][j], out); - out.write(mybuffer, in.gcount()); + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); + out.write(thisbuffer, 8); out.close(); } } @@ -756,8 +769,14 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { ofstream out; - m->openOutputFileAppend(filehandlesHeaders[i][j], out); - out.write(mybuffer, in.gcount()); + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); + unsigned int offset = 0; + char* thisbuffer = new char[4]; + thisbuffer[0] = (offset >> 24) & 0xFF; + thisbuffer[1] = (offset >> 16) & 0xFF; + thisbuffer[2] = (offset >> 8) & 0xFF; + thisbuffer[3] = offset & 0xFF; + out.write(thisbuffer, 4); out.close(); } } @@ -770,13 +789,20 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { ofstream out; - m->openOutputFileAppend(filehandlesHeaders[i][j], out); + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); //convert number of reads to 4 byte char* char* thisbuffer = new char[4]; - thisbuffer[0] = (numSplitReads[i][j] >> 24) & 0xFF; - thisbuffer[1] = (numSplitReads[i][j] >> 16) & 0xFF; - thisbuffer[2] = (numSplitReads[i][j] >> 8) & 0xFF; - thisbuffer[3] = numSplitReads[i][j] & 0xFF; + if ((m->findEdianness()) == "BIG_ENDIAN") { + thisbuffer[0] = (numSplitReads[i][j] >> 24) & 0xFF; + thisbuffer[1] = (numSplitReads[i][j] >> 16) & 0xFF; + thisbuffer[2] = (numSplitReads[i][j] >> 8) & 0xFF; + thisbuffer[3] = numSplitReads[i][j] & 0xFF; + }else { + thisbuffer[0] = numSplitReads[i][j] & 0xFF; + thisbuffer[1] = (numSplitReads[i][j] >> 8) & 0xFF; + thisbuffer[2] = (numSplitReads[i][j] >> 16) & 0xFF; + thisbuffer[3] = (numSplitReads[i][j] >> 24) & 0xFF; + } out.write(thisbuffer, 4); out.close(); delete[] thisbuffer; @@ -789,7 +815,7 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { ofstream out; - m->openOutputFileAppend(filehandlesHeaders[i][j], out); + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); out.write(mybuffer, in.gcount()); out.close(); } @@ -802,7 +828,7 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { ofstream out; - m->openOutputFileAppend(filehandlesHeaders[i][j], out); + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); out.write(mybuffer, in.gcount()); out.close(); } @@ -815,7 +841,7 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { ofstream out; - m->openOutputFileAppend(filehandlesHeaders[i][j], out); + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); out.write(mybuffer, in.gcount()); out.close(); } @@ -828,7 +854,7 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { ofstream out; - m->openOutputFileAppend(filehandlesHeaders[i][j], out); + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); out.write(mybuffer, in.gcount()); out.close(); } @@ -841,7 +867,7 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { ofstream out; - m->openOutputFileAppend(filehandlesHeaders[i][j], out); + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); out.write(mybuffer, in.gcount()); out.close(); } @@ -854,7 +880,7 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { ofstream out; - m->openOutputFileAppend(filehandlesHeaders[i][j], out); + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); out.write(mybuffer, in.gcount()); out.close(); } @@ -871,7 +897,7 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){ for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { ofstream out; - m->openOutputFileAppend(filehandlesHeaders[i][j], out); + m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); out.write(mybuffer, spot-spotInFile); out.close(); } @@ -897,7 +923,7 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, H //read header length char buffer [2]; - in.read(buffer, 2); + in.read(buffer, 2); header.headerLength = be_int2(*(unsigned short *)(&buffer)); //read name length @@ -909,33 +935,39 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, H char buffer3 [4]; in.read(buffer3, 4); header.numBases = be_int4(*(unsigned int *)(&buffer3)); + //read clip qual left char buffer4 [2]; in.read(buffer4, 2); header.clipQualLeft = be_int2(*(unsigned short *)(&buffer4)); - header.clipQualLeft = 5; + header.clipQualLeft = 5; + //read clip qual right char buffer5 [2]; in.read(buffer5, 2); header.clipQualRight = be_int2(*(unsigned short *)(&buffer5)); - + + //read clipAdapterLeft char buffer6 [2]; in.read(buffer6, 2); header.clipAdapterLeft = be_int2(*(unsigned short *)(&buffer6)); + //read clipAdapterRight char buffer7 [2]; in.read(buffer7, 2); header.clipAdapterRight = be_int2(*(unsigned short *)(&buffer7)); + //read name char* tempBuffer = new char[header.nameLength]; in.read(&(*tempBuffer), header.nameLength); header.name = tempBuffer; if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength); } + delete[] tempBuffer; //extract info from name @@ -989,7 +1021,7 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, H char * mybuffer; mybuffer = new char [spot-startSpotInFile]; ifstream in2; - m->openInputFile(currentFileName, in2); + in2.open(currentFileName.c_str(), ios::binary); in2.seekg(startSpotInFile); in2.read(mybuffer,spot-startSpotInFile); in2.close(); @@ -998,21 +1030,21 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, H int trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex); if(trashCodeLength == 0){ + //cout << header.name << " length = " << spot << '\t' << startSpotInFile << '\t' << in2.gcount() << endl; + ofstream out; - m->openOutputFileAppend(filehandles[barcodeIndex][primerIndex], out); + m->openOutputFileBinaryAppend(filehandles[barcodeIndex][primerIndex], out); out.write(mybuffer, in2.gcount()); out.close(); - delete[] mybuffer; numSplitReads[barcodeIndex][primerIndex]++; } else{ ofstream out; - m->openOutputFileAppend(noMatchFile, out); + m->openOutputFileBinaryAppend(noMatchFile, out); out.write(mybuffer, in2.gcount()); out.close(); - delete[] mybuffer; } - + delete[] mybuffer; } }else{ m->mothurOut("Error reading."); m->mothurOutEndLine(); @@ -1039,7 +1071,11 @@ int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& pr if (trim) { if(header.clipQualRight < header.clipQualLeft){ - seq = "NNNN"; + if (header.clipQualRight == 0) { //don't trim right + seq = seq.substr(header.clipQualLeft-1); + }else { + seq = "NNNN"; + } } else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){ seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft)); @@ -1050,11 +1086,12 @@ int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& pr }else{ //if you wanted the sfftxt then you already converted the bases to the right case if (!sfftxt) { + int endValue = header.clipQualRight; //make the bases you want to clip lowercase and the bases you want to keep upper case - if(header.clipQualRight == 0){ header.clipQualRight = seq.length(); } + if(endValue == 0){ endValue = seq.length(); } for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]); } - for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) { seq[i] = toupper(seq[i]); } - for (int i = (header.clipQualRight-1); i < seq.length(); i++) { seq[i] = tolower(seq[i]); } + for (int i = (header.clipQualLeft-1); i < (endValue-1); i++) { seq[i] = toupper(seq[i]); } + for (int i = (endValue-1); i < seq.length(); i++) { seq[i] = tolower(seq[i]); } } } @@ -1235,10 +1272,11 @@ int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& hea for (int i = 0; i < read.flowIndex.size(); i++) { sum += read.flowIndex[i]; out << sum << '\t'; } //make the bases you want to clip lowercase and the bases you want to keep upper case - if(header.clipQualRight == 0){ header.clipQualRight = read.bases.length(); } + int endValue = header.clipQualRight; + if(endValue == 0){ endValue = read.bases.length(); } for (int i = 0; i < (header.clipQualLeft-1); i++) { read.bases[i] = tolower(read.bases[i]); } - for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) { read.bases[i] = toupper(read.bases[i]); } - for (int i = (header.clipQualRight-1); i < read.bases.length(); i++) { read.bases[i] = tolower(read.bases[i]); } + for (int i = (header.clipQualLeft-1); i < (endValue-1); i++) { read.bases[i] = toupper(read.bases[i]); } + for (int i = (endValue-1); i < read.bases.length(); i++) { read.bases[i] = tolower(read.bases[i]); } out << endl << "Bases: " << read.bases << endl << "Quality Scores: "; for (int i = 0; i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; } @@ -1260,7 +1298,11 @@ int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& head if (trim) { if(header.clipQualRight < header.clipQualLeft){ - seq = "NNNN"; + if (header.clipQualRight == 0) { //don't trim right + seq = seq.substr(header.clipQualLeft-1); + }else { + seq = "NNNN"; + } } else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){ seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft)); @@ -1271,11 +1313,12 @@ int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& head }else{ //if you wanted the sfftxt then you already converted the bases to the right case if (!sfftxt) { + int endValue = header.clipQualRight; //make the bases you want to clip lowercase and the bases you want to keep upper case - if(header.clipQualRight == 0){ header.clipQualRight = seq.length(); } + if(endValue == 0){ endValue = seq.length(); } for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]); } - for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) { seq[i] = toupper(seq[i]); } - for (int i = (header.clipQualRight-1); i < seq.length(); i++) { seq[i] = tolower(seq[i]); } + for (int i = (header.clipQualLeft-1); i < (endValue-1); i++) { seq[i] = toupper(seq[i]); } + for (int i = (endValue-1); i < seq.length(); i++) { seq[i] = tolower(seq[i]); } } } @@ -1296,8 +1339,13 @@ int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& heade if (trim) { if(header.clipQualRight < header.clipQualLeft){ - out << ">" << header.name << " xy=" << header.xy << endl; - out << "0\t0\t0\t0"; + if (header.clipQualRight == 0) { //don't trim right + out << ">" << header.name << " xy=" << header.xy << " length=" << (read.qualScores.size()-header.clipQualLeft) << endl; + for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; } + }else { + out << ">" << header.name << " xy=" << header.xy << endl; + out << "0\t0\t0\t0"; + } } else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){ out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl; @@ -1325,15 +1373,21 @@ int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& heade //********************************************************************************************************************** int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) { try { - if(header.clipQualRight > header.clipQualLeft){ - - int rightIndex = 0; - for (int i = 0; i < header.clipQualRight; i++) { rightIndex += read.flowIndex[i]; } - - out << header.name << ' ' << rightIndex; - for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << ' ' << (read.flowgram[i]/(float)100); } - out << endl; - } + + int endValue = header.clipQualRight; + if (header.clipQualRight == 0) { + endValue = read.flowIndex.size(); + if (m->debug) { m->mothurOut("[DEBUG]: " + header.name + " has clipQualRight=0.\n"); } + } + if(endValue > header.clipQualLeft){ + + int rightIndex = 0; + for (int i = 0; i < endValue; i++) { rightIndex += read.flowIndex[i]; } + + out << header.name << ' ' << rightIndex; + for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << ' ' << (read.flowgram[i]/(float)100); } + out << endl; + } return 0; @@ -1641,7 +1695,7 @@ bool SffInfoCommand::readOligos(string oligoFile){ // get rest of line in case there is a primer name while (!inOligos.eof()) { char c = inOligos.get(); - if (c == 10 || c == 13){ break; } + if (c == 10 || c == 13 || c == -1){ break; } else if (c == 32 || c == 9){;} //space or tab else { group += c; } } @@ -1733,7 +1787,7 @@ bool SffInfoCommand::readOligos(string oligoFile){ } filehandles[itBar->second][itPrimer->second] = thisFilename; - m->openOutputFile(thisFilename, temp); temp.close(); + temp.open(thisFilename.c_str(), ios::binary); temp.close(); } } }