From: Sarah Westcott Date: Thu, 24 May 2012 16:50:47 +0000 (-0400) Subject: fixed bug in sffinfo when ~ was used in the sff filename. fixed issue in shhh.flows... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=a5d3e10e24e503483d8e85b54f0fd34b9308d31b fixed bug in sffinfo when ~ was used in the sff filename. fixed issue in shhh.flows, it was producing an output file called *.flow.fasta instead of *.fasta. Also when using outputdir with the file option, it put the shhh.fasta and shhh.names files in the wrong folder. changed format of rarefaction.single output with groups to look more like the phylo.diversity command. --- diff --git a/rarefactcommand.cpp b/rarefactcommand.cpp index 82ff7fa..b3d359c 100644 --- a/rarefactcommand.cpp +++ b/rarefactcommand.cpp @@ -474,17 +474,38 @@ vector RareFactCommand::createGroupFile(vector& outputNames, map //find different types of files map > typesFiles; + map > > fileLabels; //combofile name to labels. each label is a vector because it may be unique lci hci. + vector groupNames; for (int i = 0; i < outputNames.size(); i++) { + string extension = m->getExtension(outputNames[i]); - + string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension; + m->mothurRemove(combineFileName); //remove old file + ifstream in; m->openInputFile(outputNames[i], in); string labels = m->getline(in); - string newLine = labels.substr(0, labels.find_first_of('\t')); - - newLine += "\tGroup" + labels.substr(labels.find_first_of('\t')); + istringstream iss (labels,istringstream::in); + string newLabel = ""; vector theseLabels; + while(!iss.eof()) { iss >> newLabel; m->gobble(iss); theseLabels.push_back(newLabel); } + vector< vector > allLabels; + vector thisSet; thisSet.push_back(theseLabels[0]); allLabels.push_back(thisSet); thisSet.clear(); //makes "numSampled" its own grouping + for (int j = 1; j < theseLabels.size()-1; j++) { + if (theseLabels[j+1] == "lci") { + thisSet.push_back(theseLabels[j]); + thisSet.push_back(theseLabels[j+1]); + thisSet.push_back(theseLabels[j+2]); + j++; j++; + }else{ //no lci or hci for this calc. + thisSet.push_back(theseLabels[j]); + } + allLabels.push_back(thisSet); + thisSet.clear(); + } + fileLabels[combineFileName] = allLabels; + map >::iterator itfind = typesFiles.find(extension); if (itfind != typesFiles.end()) { (itfind->second)[outputNames[i]] = file2Group[i]; @@ -493,62 +514,57 @@ vector RareFactCommand::createGroupFile(vector& outputNames, map temp[outputNames[i]] = file2Group[i]; typesFiles[extension] = temp; } - - string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension; - - //print headers - ofstream out; - m->openOutputFile(combineFileName, out); - out << newLine << endl; - out.close(); - + if (!(m->inUsersGroups(file2Group[i], groupNames))) { groupNames.push_back(file2Group[i]); } } //for each type create a combo file - map lineToNumber; + for (map >::iterator it = typesFiles.begin(); it != typesFiles.end(); it++) { ofstream out; string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + it->first; m->openOutputFileAppend(combineFileName, out); newFileNames.push_back(combineFileName); - map thisTypesFiles = it->second; - + map thisTypesFiles = it->second; //it->second maps filename to group + set numSampledSet; + //open each type summary file - map > files; //maps file name to lines in file + map > > > files; //maps file name to lines in file int maxLines = 0; - int numColumns = 0; for (map::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) { string thisfilename = itFileNameGroup->first; string group = itFileNameGroup->second; - + ifstream temp; m->openInputFile(thisfilename, temp); //read through first line - labels m->getline(temp); m->gobble(temp); - vector thisFilesLines; - - thisFilesLines.push_back(group); - int count = 1; + map > > thisFilesLines; while (!temp.eof()){ - - string thisLine = m->getline(temp); - - string numSampled = thisLine.substr(0, thisLine.find_first_of('\t')); - int num = 0; - convert(numSampled, num); - numColumns = m->getNumChar(thisLine, '\t'); - lineToNumber[count] = num; - count++; - - thisFilesLines.push_back(thisLine); - m->gobble(temp); + int numSampled = 0; + temp >> numSampled; m->gobble(temp); + + vector< vector > theseReads; + vector thisSet; thisSet.push_back(toString(numSampled)); theseReads.push_back(thisSet); thisSet.clear(); + for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A + vector reads; + string next = ""; + for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels + temp >> next; m->gobble(temp); + reads.push_back(next); + } + theseReads.push_back(reads); + } + thisFilesLines[numSampled] = theseReads; + m->gobble(temp); + + numSampledSet.insert(numSampled); } - files[thisfilename] = thisFilesLines; + files[group] = thisFilesLines; //save longest file for below if (maxLines < thisFilesLines.size()) { maxLines = thisFilesLines.size(); } @@ -557,34 +573,46 @@ vector RareFactCommand::createGroupFile(vector& outputNames, map m->mothurRemove(thisfilename); } - + //output new labels line + out << fileLabels[combineFileName][0][0] << '\t'; + for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A + for (int n = 0; n < groupNames.size(); n++) { // for each group + for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels + out << fileLabels[combineFileName][k][l] << '-' << groupNames[n] << '\t'; + } + } + } + out << endl; + //for each label - for (int k = 1; k < maxLines; k++) { + for (set::iterator itNumSampled = numSampledSet.begin(); itNumSampled != numSampledSet.end(); itNumSampled++) { - //grab data for each group - for (map::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) { - - string thisfilename = itFileNameGroup->first; - map::iterator itLine = lineToNumber.find(k); - if (itLine != lineToNumber.end()) { - string output = toString(itLine->second); - if (k < files[thisfilename].size()) { - string line = files[thisfilename][k]; - output = line.substr(0, line.find_first_of('\t')); - output += '\t' + files[thisfilename][0] + '\t' + line.substr(line.find_first_of('\t')); - }else{ - output += '\t' + files[thisfilename][0] + '\t'; - for (int h = 0; h < numColumns; h++) { - output += "NA\t"; - } - } - out << output << endl; - }else { m->mothurOut("[ERROR]: parsing results, cant find " + toString(k)); m->mothurOutEndLine(); } - } + out << (*itNumSampled) << '\t'; + + if (m->control_pressed) { break; } + + for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //each chunk + //grab data for each group + for (map > > >::iterator itFileNameGroup = files.begin(); itFileNameGroup != files.end(); itFileNameGroup++) { + + string group = itFileNameGroup->first; + + map > >::iterator itLine = files[group].find(*itNumSampled); + if (itLine != files[group].end()) { + for (int l = 0; l < (itLine->second)[k].size(); l++) { + out << (itLine->second)[k][l] << '\t'; + + } + }else { + for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { + out << "NA" << '\t'; + } + } + } + } + out << endl; } - out.close(); - } //return combine file name diff --git a/sffinfocommand.cpp b/sffinfocommand.cpp index 20caead..f267ba4 100644 --- a/sffinfocommand.cpp +++ b/sffinfocommand.cpp @@ -306,6 +306,7 @@ int SffInfoCommand::execute(){ int start = time(NULL); + filenames[s] = m->getFullPathName(filenames[s]); m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine(); string accnos = ""; diff --git a/shhhercommand.cpp b/shhhercommand.cpp index 5bcd7d8..be410d9 100644 --- a/shhhercommand.cpp +++ b/shhhercommand.cpp @@ -150,14 +150,21 @@ ShhherCommand::ShhherCommand(string option) { else{ ofstream temp; - string thisoutputDir = m->hasPath(flowFilesFileName); //if user entered a file with a path then preserve it + string thisoutputDir = outputDir; + if (outputDir == "") { thisoutputDir = m->hasPath(flowFilesFileName); } //if user entered a file with a path then preserve it - //flow.files = 9 character offset - compositeFASTAFileName = thisoutputDir + m->getRootName(m->getSimpleName(flowFilesFileName)) + "shhh.fasta"; + //we want to rip off .files, and also .flow if its there + string fileroot = m->getRootName(m->getSimpleName(flowFilesFileName)); + if (fileroot[fileroot.length()-1] == '.') { fileroot = fileroot.substr(0, fileroot.length()-1); } //rip off dot + string extension = m->getExtension(fileroot); + if (extension == ".flow") { fileroot = m->getRootName(fileroot); } + else { fileroot += "."; } //add back if needed + + compositeFASTAFileName = thisoutputDir + fileroot + "shhh.fasta"; m->openOutputFile(compositeFASTAFileName, temp); temp.close(); - compositeNamesFileName = thisoutputDir + m->getRootName(m->getSimpleName(flowFilesFileName)) + "shhh.names"; + compositeNamesFileName = thisoutputDir + fileroot + "shhh.names"; m->openOutputFile(compositeNamesFileName, temp); temp.close(); }