From 28bcfc4a41b8b82f66636587e0d4d355d07cbdd1 Mon Sep 17 00:00:00 2001 From: Sarah Westcott Date: Mon, 6 Aug 2012 08:38:00 -0400 Subject: [PATCH] changes while testing 1.27 --- clustercommand.cpp | 4 +- clusterdoturcommand.cpp | 11 +-- clustersplitcommand.cpp | 4 +- clustersplitcommand.h | 6 +- countseqscommand.cpp | 40 +++++++++++ flowdata.cpp | 16 +++-- groupmap.cpp | 67 +++++++++++++++++ hcluster.cpp | 1 - mgclustercommand.h | 1 - mothurout.cpp | 151 ++++++++++++++++++++++++++++++++++++--- readmatrix.hpp | 1 - sequenceparser.cpp | 74 ++++++++++++++++++- sharedcommand.cpp | 2 +- shhhercommand.cpp | 38 ++++++---- shhhercommand.h | 107 ++++++++++++++------------- sparsedistancematrix.cpp | 4 +- subsamplecommand.cpp | 2 +- treegroupscommand.h | 3 - treemap.cpp | 63 ++++++++++++++++ trimflowscommand.cpp | 2 - trimseqscommand.cpp | 1 - weightedlinkage.cpp | 1 - 22 files changed, 488 insertions(+), 111 deletions(-) diff --git a/clustercommand.cpp b/clustercommand.cpp index 19eaf85..5a46996 100644 --- a/clustercommand.cpp +++ b/clustercommand.cpp @@ -481,12 +481,12 @@ void ClusterCommand::printData(string label){ loops = 0; start = time(NULL); + oldRAbund.setLabel(label); if (countfile == "") { oldRAbund.print(rabundFile); oldRAbund.getSAbundVector().print(sabundFile); } - - oldRAbund.setLabel(label); + if (m->isTrue(showabund)) { oldRAbund.getSAbundVector().print(cout); } diff --git a/clusterdoturcommand.cpp b/clusterdoturcommand.cpp index 34d2e0e..2515b5c 100644 --- a/clusterdoturcommand.cpp +++ b/clusterdoturcommand.cpp @@ -352,11 +352,12 @@ int ClusterDoturCommand::execute(){ void ClusterDoturCommand::printData(string label){ try { - - oldRAbund.setLabel(label); - oldRAbund.print(rabundFile); - oldRAbund.getSAbundVector().print(sabundFile); - + oldRAbund.setLabel(label); + if (countfile == "") { + oldRAbund.print(rabundFile); + oldRAbund.getSAbundVector().print(sabundFile); + } + oldRAbund.getSAbundVector().print(cout); oldList.setLabel(label); diff --git a/clustersplitcommand.cpp b/clustersplitcommand.cpp index 09be4aa..b3ce0f9 100644 --- a/clustersplitcommand.cpp +++ b/clustersplitcommand.cpp @@ -1035,7 +1035,7 @@ vector ClusterSplitCommand::createProcesses(vector< map //Above fork() will clone, so memory is separate, but that's not the case with windows, //Taking advantage of shared memory to allow both threads to add labels. ////////////////////////////////////////////////////////////////////////////////////////////////////// - + /* vector pDataArray; DWORD dwThreadIdArray[processors-1]; HANDLE hThreadArray[processors-1]; @@ -1073,7 +1073,7 @@ vector ClusterSplitCommand::createProcesses(vector< map CloseHandle(hThreadArray[i]); delete pDataArray[i]; } - +*/ #endif return listFiles; diff --git a/clustersplitcommand.h b/clustersplitcommand.h index 29dc69a..e079197 100644 --- a/clustersplitcommand.h +++ b/clustersplitcommand.h @@ -76,7 +76,7 @@ private: // anything to do with mothur's use of copy constructors in many of our data structures. ie. listvector // is copied by nameassignment and passed to read which passes to the thread? -westcott 2-8-12 //////////////////////////////////////////////////////////////////////////////////////////////////// -/**************************************************************************************************/ +/************************************************************************************************** //custom data structure for threads to use. // This is passed by void pointer so it can be any data type // that can be passed using a single void pointer (LPVOID). @@ -106,7 +106,7 @@ struct clusterData { } }; -/**************************************************************************************************/ +/************************************************************************************************** #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) #else static DWORD WINAPI MyClusterThreadFunction(LPVOID lpParam){ @@ -258,7 +258,7 @@ static DWORD WINAPI MyClusterThreadFunction(LPVOID lpParam){ } #endif - +*/ #endif diff --git a/countseqscommand.cpp b/countseqscommand.cpp index 210dd96..7b9aebf 100644 --- a/countseqscommand.cpp +++ b/countseqscommand.cpp @@ -450,6 +450,26 @@ map CountSeqsCommand::processNameFile(string name) { in.close(); out.close(); + if (rest != "") { + vector pieces = m->splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + //parse names into vector + vector theseNames; + m->splitAtComma(secondCol, theseNames); + for (int i = 0; i < theseNames.size(); i++) { out << theseNames[i] << '\t' << count << endl; } + indexToNames[count] = firstCol; + pairDone = false; + count++; + } + } + + } + return indexToNames; } catch(exception& e) { @@ -502,6 +522,26 @@ map CountSeqsCommand::getGroupNames(string filename, set& n } in.close(); out.close(); + + if (rest != "") { + vector pieces = m->splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + it = groupIndex.find(secondCol); + if (it == groupIndex.end()) { //add group, assigning the group and number so we can use vectors above + groupIndex[secondCol] = count; + count++; + } + out << firstCol << '\t' << groupIndex[secondCol] << endl; + namesOfGroups.insert(secondCol); + pairDone = false; + } + } + } for (it = groupIndex.begin(); it != groupIndex.end(); it++) { indexToGroups[it->second] = it->first; } diff --git a/flowdata.cpp b/flowdata.cpp index 1420f84..1fe7d7f 100644 --- a/flowdata.cpp +++ b/flowdata.cpp @@ -43,13 +43,15 @@ bool FlowData::getNext(ifstream& flowFile){ try { flowFile >> seqName >> endFlow; - //cout << "in Flowdata " + seqName << endl; - for(int i=0;i> flowData[i]; } - //cout << "in Flowdata read " << seqName + " done" << endl; - updateEndFlow(); - translateFlow(); - - m->gobble(flowFile); + if (seqName.length() != 0) { + //cout << "in Flowdata " + seqName << endl; + for(int i=0;i> flowData[i]; } + //cout << "in Flowdata read " << seqName + " done" << endl; + updateEndFlow(); + translateFlow(); + m->gobble(flowFile); + }else{ m->mothurOut("Error in reading your flowfile, at position " + toString(flowFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); } + if(flowFile){ return 1; } else { return 0; } } diff --git a/groupmap.cpp b/groupmap.cpp index 612b236..5b81210 100644 --- a/groupmap.cpp +++ b/groupmap.cpp @@ -57,6 +57,28 @@ int GroupMap::readMap() { } fileHandle.close(); + if (rest != "") { + vector pieces = m->splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } + m->setAllGroups(namesOfGroups); return error; } @@ -101,6 +123,29 @@ int GroupMap::readDesignMap() { } fileHandle.close(); + if (rest != "") { + vector pieces = m->splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + + } + m->setAllGroups(namesOfGroups); return error; } @@ -148,6 +193,28 @@ int GroupMap::readDesignMap(string filename) { } fileHandle.close(); + if (rest != "") { + vector pieces = m->splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } + m->setAllGroups(namesOfGroups); return error; } diff --git a/hcluster.cpp b/hcluster.cpp index 6cd4531..f8f4809 100644 --- a/hcluster.cpp +++ b/hcluster.cpp @@ -10,7 +10,6 @@ #include "hcluster.h" #include "rabundvector.hpp" #include "listvector.hpp" -#include "sparsematrix.hpp" /***********************************************************************/ HCluster::HCluster(RAbundVector* rav, ListVector* lv, string ms, string d, NameAssignment* n, float c) : rabund(rav), list(lv), method(ms), distfile(d), nameMap(n), cutoff(c) { diff --git a/mgclustercommand.h b/mgclustercommand.h index c9c23c8..b5b295f 100644 --- a/mgclustercommand.h +++ b/mgclustercommand.h @@ -12,7 +12,6 @@ #include "command.hpp" #include "readblast.h" -#include "sparsematrix.hpp" #include "nameassignment.hpp" #include "cluster.hpp" #include "hcluster.h" diff --git a/mothurout.cpp b/mothurout.cpp index 9704464..1a3bf79 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -1544,7 +1544,6 @@ vector MothurOut::splitWhiteSpace(string input){ //********************************************************************************************************************** int MothurOut::readTax(string namefile, map& taxMap) { try { - //open input file ifstream in; openInputFile(namefile, in); @@ -1575,6 +1574,23 @@ int MothurOut::readTax(string namefile, map& taxMap) { } } in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + //are there confidence scores, if so remove them + if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); } + taxMap[firstCol] = secondCol; + if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); } + pairDone = false; + } + } + } return taxMap.size(); @@ -1587,7 +1603,6 @@ int MothurOut::readTax(string namefile, map& taxMap) { /**********************************************************************************************************************/ int MothurOut::readNames(string namefile, map& nameMap, bool redund) { try { - //open input file ifstream in; openInputFile(namefile, in); @@ -1618,6 +1633,23 @@ int MothurOut::readNames(string namefile, map& nameMap, bool red } } in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + //parse names into vector + vector theseNames; + splitAtComma(secondCol, theseNames); + for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; } + pairDone = false; + } + } + } return nameMap.size(); @@ -1630,7 +1662,6 @@ int MothurOut::readNames(string namefile, map& nameMap, bool red /**********************************************************************************************************************/ int MothurOut::readNames(string namefile, map& nameMap, int flip) { try { - //open input file ifstream in; openInputFile(namefile, in); @@ -1658,6 +1689,20 @@ int MothurOut::readNames(string namefile, map& nameMap, int flip } } in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + nameMap[secondCol] = firstCol; + pairDone = false; + } + } + } return nameMap.size(); @@ -1670,7 +1715,7 @@ int MothurOut::readNames(string namefile, map& nameMap, int flip /**********************************************************************************************************************/ int MothurOut::readNames(string namefile, map& nameMap, map& nameCount) { try { - nameMap.clear(); nameCount.clear(); + nameMap.clear(); nameCount.clear(); //open input file ifstream in; openInputFile(namefile, in); @@ -1703,6 +1748,24 @@ int MothurOut::readNames(string namefile, map& nameMap, map pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + //parse names into vector + vector theseNames; + splitAtComma(secondCol, theseNames); + for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; } + nameCount[firstCol] = theseNames.size(); + pairDone = false; + } + } + + } return nameMap.size(); } @@ -1714,7 +1777,6 @@ int MothurOut::readNames(string namefile, map& nameMap, map& nameMap) { try { - //open input file ifstream in; openInputFile(namefile, in); @@ -1739,6 +1801,17 @@ int MothurOut::readNames(string namefile, map& nameMap) { } } in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; } + } + } return nameMap.size(); @@ -1750,8 +1823,7 @@ int MothurOut::readNames(string namefile, map& nameMap) { } /**********************************************************************************************************************/ int MothurOut::readNames(string namefile, map >& nameMap) { - try { - + try { //open input file ifstream in; openInputFile(namefile, in); @@ -1782,6 +1854,22 @@ int MothurOut::readNames(string namefile, map >& nameMap) } in.close(); + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + vector temp; + splitAtComma(secondCol, temp); + nameMap[firstCol] = temp; + pairDone = false; + } + } + } + return nameMap.size(); } catch(exception& e) { @@ -1792,7 +1880,6 @@ int MothurOut::readNames(string namefile, map >& nameMap) /**********************************************************************************************************************/ map MothurOut::readNames(string namefile) { try { - map nameMap; //open input file @@ -1823,6 +1910,20 @@ map MothurOut::readNames(string namefile) { } } in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + int num = getNumNames(secondCol); + nameMap[firstCol] = num; + pairDone = false; + } + } + } return nameMap; @@ -1875,6 +1976,29 @@ int MothurOut::readNames(string namefile, vector& nameVector, m } in.close(); + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + int num = getNumNames(secondCol); + + map::iterator it = fastamap.find(firstCol); + if (it == fastamap.end()) { + error = 1; + mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine(); + }else { + seqPriorityNode temp(num, it->second, firstCol); + nameVector.push_back(temp); + } + + pairDone = false; + } + } + } return error; } catch(exception& e) { @@ -1885,7 +2009,7 @@ int MothurOut::readNames(string namefile, vector& nameVector, m //********************************************************************************************************************** set MothurOut::readAccnos(string accnosfile){ try { - set names; + set names; ifstream in; openInputFile(accnosfile, in); string name; @@ -1903,6 +2027,10 @@ set MothurOut::readAccnos(string accnosfile){ } in.close(); + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + for (int i = 0; i < pieces.size(); i++) { names.insert(pieces[i]); } + } return names; } catch(exception& e) { @@ -1930,6 +2058,11 @@ int MothurOut::readAccnos(string accnosfile, vector& names){ for (int i = 0; i < pieces.size(); i++) { names.push_back(pieces[i]); } } in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + for (int i = 0; i < pieces.size(); i++) { names.push_back(pieces[i]); } + } return 0; } diff --git a/readmatrix.hpp b/readmatrix.hpp index 90d5b43..bc3874e 100644 --- a/readmatrix.hpp +++ b/readmatrix.hpp @@ -16,7 +16,6 @@ #include "counttable.h" #include "sparsedistancematrix.h" -class SparseMatrix; class ReadMatrix { diff --git a/sequenceparser.cpp b/sequenceparser.cpp index 08e5ae8..2aff07e 100644 --- a/sequenceparser.cpp +++ b/sequenceparser.cpp @@ -59,7 +59,7 @@ SequenceParser::SequenceParser(string groupFile, string fastaFile, string nameFi in.close(); if (error == 1) { m->control_pressed = true; } - + //read name file ifstream inName; m->openInputFile(nameFile, inName); @@ -148,6 +148,78 @@ SequenceParser::SequenceParser(string groupFile, string fastaFile, string nameFi } } inName.close(); + + //in case file does not end in white space + if (rest != "") { + vector pieces = m->splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { //save one line + if (m->debug) { m->mothurOut("[DEBUG]: reading names: " + firstCol + '\t' + secondCol + ".\n"); } + vector names; + m->splitAtChar(secondCol, names, ','); + + //get aligned string for these seqs from the fasta file + string alignedString = ""; + map::iterator itAligned = seqName.find(names[0]); + if (itAligned == seqName.end()) { + error = 1; m->mothurOut("[ERROR]: " + names[0] + " is in your name file and not in your fasta file, please correct."); m->mothurOutEndLine(); + }else { + alignedString = itAligned->second; + } + + //separate by group - parse one line in name file + map splitMap; //group -> name1,name2,... + map::iterator it; + for (int i = 0; i < names.size(); i++) { + + string group = groupMap->getGroup(names[i]); + if (group == "not found") { error = 1; m->mothurOut("[ERROR]: " + names[i] + " is in your name file and not in your groupfile, please correct."); m->mothurOutEndLine(); } + else { + + it = splitMap.find(group); + if (it != splitMap.end()) { //adding seqs to this group + (it->second) += "," + names[i]; + thisnames1.insert(names[i]); + countName++; + }else { //first sighting of this group + splitMap[group] = names[i]; + countName++; + thisnames1.insert(names[i]); + + //is this seq in the fasta file? + if (i != 0) { //if not then we need to add a duplicate sequence to the seqs for this group so the new "fasta" and "name" files will match + Sequence tempSeq(names[i], alignedString); //get the first guys sequence string since he's in the fasta file. + seqs[group].push_back(tempSeq); + } + } + } + + allSeqsMap[names[i]] = names[0]; + } + + + //fill nameMapPerGroup - holds all lines in namefile separated by group + for (it = splitMap.begin(); it != splitMap.end(); it++) { + //grab first name + string firstName = ""; + for(int i = 0; i < (it->second).length(); i++) { + if (((it->second)[i]) != ',') { + firstName += ((it->second)[i]); + }else { break; } + } + + //group1 -> seq1 -> seq1,seq2,seq3 + nameMapPerGroup[it->first][firstName] = it->second; + } + + pairDone = false; + } + } + } if (error == 1) { m->control_pressed = true; } diff --git a/sharedcommand.cpp b/sharedcommand.cpp index 1150e53..e6e790d 100644 --- a/sharedcommand.cpp +++ b/sharedcommand.cpp @@ -825,7 +825,7 @@ int SharedCommand::createSharedFromListGroup(string filename) { int error = ListGroupSameSeqs(namesSeqs, SharedList); if ((!pickedGroups) && (SharedList->getNumSeqs() != numGroupNames)) { //if the user has not specified any groups and their files don't match exit with error - m->mothurOut("Your group file contains " + toString(numGroupNames) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine(); + m->mothurOut("Your group file contains " + toString(numGroupNames) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine(); m->control_pressed = true; out.close(); m->mothurRemove(filename); //remove blank shared file you made diff --git a/shhhercommand.cpp b/shhhercommand.cpp index c34f25d..c409639 100644 --- a/shhhercommand.cpp +++ b/shhhercommand.cpp @@ -1039,7 +1039,12 @@ void ShhherCommand::getFlowData(){ float intensity; - flowFile >> numFlowCells; + string numFlowTest; + flowFile >> numFlowTest; + + if (!m->isContainingOnlyDigits(numFlowTest)) { m->mothurOut("[ERROR]: expected a number and got " + numFlowTest + ", quitting. Did you use the flow parameter instead of the file parameter?"); m->mothurOutEndLine(); exit(1); } + else { convert(numFlowTest, numFlowCells); } + int index = 0;//pcluster while(!flowFile.eof()){ @@ -1376,17 +1381,17 @@ string ShhherCommand::cluster(string distFileName, string namesFileName){ try { ReadMatrix* read = new ReadColumnMatrix(distFileName); - read->setCutoff(cutoff); - - NameAssignment* clusterNameMap = new NameAssignment(namesFileName); - clusterNameMap->readMap(); - read->read(clusterNameMap); - - ListVector* list = read->getListVector(); - SparseMatrix* matrix = read->getMatrix(); + read->setCutoff(cutoff); + + NameAssignment* clusterNameMap = new NameAssignment(namesFileName); + clusterNameMap->readMap(); + read->read(clusterNameMap); - delete read; - delete clusterNameMap; + ListVector* list = read->getListVector(); + SparseDistanceMatrix* matrix = read->getDMatrix(); + + delete read; + delete clusterNameMap; RAbundVector* rabund = new RAbundVector(list->getRAbundVector()); @@ -2029,7 +2034,7 @@ int ShhherCommand::createProcesses(vector filenames){ //Windows version shared memory, so be careful when passing variables through the shhhFlowsData struct. //Above fork() will clone, so memory is separate, but that's not the case with windows, ////////////////////////////////////////////////////////////////////////////////////////////////////// - + /* vector pDataArray; DWORD dwThreadIdArray[processors-1]; HANDLE hThreadArray[processors-1]; @@ -2060,7 +2065,7 @@ int ShhherCommand::createProcesses(vector filenames){ CloseHandle(hThreadArray[i]); delete pDataArray[i]; } - + */ #endif for (int i=0;i& thisSeqNameVecto thisFlowDataIntI.clear(); thisNameMap.clear(); - flowFile >> numFlowCells; + string numFlowTest; + flowFile >> numFlowTest; + + if (!m->isContainingOnlyDigits(numFlowTest)) { m->mothurOut("[ERROR]: expected a number and got " + numFlowTest + ", quitting. Did you use the flow parameter instead of the file parameter?"); m->mothurOutEndLine(); exit(1); } + else { convert(numFlowTest, numFlowCells); } + if (m->debug) { m->mothurOut("[DEBUG]: numFlowCells = " + toString(numFlowCells) + ".\n"); } int index = 0;//pcluster while(!flowFile.eof()){ diff --git a/shhhercommand.h b/shhhercommand.h index 03f171e..9820cce 100644 --- a/shhhercommand.h +++ b/shhhercommand.h @@ -18,7 +18,6 @@ #include "sabundvector.hpp" #include "listvector.hpp" #include "cluster.hpp" -#include "sparsematrix.hpp" #include //********************************************************************************************************************** @@ -167,7 +166,7 @@ private: }; -/**************************************************************************************************/ +/************************************************************************************************** //custom data structure for threads to use. // This is passed by void pointer so it can be any data type // that can be passed using a single void pointer (LPVOID). @@ -203,7 +202,7 @@ struct shhhFlowsData { } }; -/**************************************************************************************************/ +/************************************************************************************************** #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) #else static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ @@ -234,7 +233,7 @@ static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ int numFlowCells; //int numSeqs = getFlowData(flowFileName, seqNameVector, lengths, flowDataIntI, nameMap, numFlowCells); - /*****************************************************************************************************/ + /***************************************************************************************************** ifstream flowFile; // cout << "herethread " << flowFileName << '\t' << &flowFile << endl; @@ -279,13 +278,13 @@ static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ } } // cout << "here" << endl; - /*****************************************************************************************************/ + /***************************************************************************************************** if (pDataArray->m->control_pressed) { return 0; } pDataArray->m->mothurOut("Identifying unique flowgrams...\n"); //int numUniques = getUniques(numSeqs, numFlowCells, uniqueFlowgrams, uniqueCount, uniqueLengths, mapSeqToUnique, mapUniqueToSeq, lengths, flowDataPrI, flowDataIntI); - /*****************************************************************************************************/ + /***************************************************************************************************** int numUniques = 0; uniqueFlowgrams.assign(numFlowCells * numSeqs, -1); uniqueCount.assign(numSeqs, 0); // anWeights @@ -364,7 +363,7 @@ static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ } } - /*****************************************************************************************************/ + /***************************************************************************************************** if (pDataArray->m->control_pressed) { return 0; } @@ -374,7 +373,7 @@ static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ double begClock = clock(); //flowDistParentFork(numFlowCells, distFileName, numUniques, mapUniqueToSeq, mapSeqToUnique, lengths, flowDataPrI, flowDataIntI); - /*****************************************************************************************************/ + /***************************************************************************************************** ostringstream outStream; outStream.setf(ios::fixed, ios::floatfield); outStream.setf(ios::dec, ios::basefield); @@ -390,7 +389,7 @@ static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ for(int j=0;jm->mothurOut("\t" + toString((clock()-thisbegClock)/CLOCKS_PER_SEC)); pDataArray->m->mothurOutEndLine(); } - /*****************************************************************************************************/ + /***************************************************************************************************** pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n'); string namesFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names"; //createNamesFile(numSeqs, numUniques, namesFileName, seqNameVector, mapSeqToUnique, mapUniqueToSeq); - /*****************************************************************************************************/ + /***************************************************************************************************** vector duplicateNames(numUniques, ""); for(int i=0;im->control_pressed) { return 0; } pDataArray->m->mothurOut("\nClustering flowgrams...\n"); string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list"; //cluster(listFileName, distFileName, namesFileName); - /*****************************************************************************************************/ + /***************************************************************************************************** ReadMatrix* read = new ReadColumnMatrix(distFileName); read->setCutoff(pDataArray->cutoff); @@ -502,7 +501,7 @@ static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ listFileOut.close(); delete matrix; delete cluster; delete rabund; delete list; - /*****************************************************************************************************/ + /***************************************************************************************************** if (pDataArray->m->control_pressed) { return 0; } @@ -516,7 +515,7 @@ static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ //int numOTUs = getOTUData(numSeqs, listFileName, otuData, cumNumSeqs, nSeqsPerOTU, aaP, aaI, seqNumber, seqIndex, nameMap); - /*****************************************************************************************************/ + /***************************************************************************************************** ifstream listFile; pDataArray->m->openInputFile(listFileName, listFile); string label; @@ -596,7 +595,7 @@ static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ seqIndex = seqNumber; listFile.close(); - /*****************************************************************************************************/ + /***************************************************************************************************** if (pDataArray->m->control_pressed) { return 0; } @@ -643,7 +642,7 @@ static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ double cycClock = clock(); unsigned long long cycTime = time(NULL); //fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI); - /*****************************************************************************************************/ + /***************************************************************************************************** int indexFill = 0; for(int i=0;im->control_pressed) { break; } //calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber); - /*****************************************************************************************************/ + /***************************************************************************************************** for(int i=0;im->control_pressed) { break; } @@ -708,7 +707,7 @@ static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ for(int k=0;km->control_pressed) { break; } //maxDelta = getNewWeights(numOTUs, cumNumSeqs, nSeqsPerOTU, singleTau, seqNumber, weight); - /*****************************************************************************************************/ + /***************************************************************************************************** double maxChange = 0; for(int i=0;i maxChange){ maxChange = difference; } } maxDelta = maxChange; - /*****************************************************************************************************/ + /***************************************************************************************************** if (pDataArray->m->control_pressed) { break; } //double nLL = getLikelihood(numSeqs, numOTUs, nSeqsPerOTU, seqNumber, cumNumSeqs, seqIndex, dist, weight); - /*****************************************************************************************************/ + /***************************************************************************************************** vector P(numSeqs, 0); int effNumOTUs = 0; @@ -804,12 +803,12 @@ static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ } nLL = nLL -(double)numSeqs * log(pDataArray->sigma); - /*****************************************************************************************************/ + /***************************************************************************************************** if (pDataArray->m->control_pressed) { break; } //checkCentroids(numOTUs, centroids, weight); - /*****************************************************************************************************/ + /***************************************************************************************************** vector unique(numOTUs, 1); for(int i=0;im->control_pressed) { break; } //calcNewDistances(numSeqs, numOTUs, nSeqsPerOTU, dist, weight, change, centroids, aaP, singleTau, aaI, seqNumber, seqIndex, uniqueFlowgrams, flowDataIntI, numFlowCells, lengths); - /*****************************************************************************************************/ + /***************************************************************************************************** int total = 0; vector newTau(numOTUs,0); vector norms(numSeqs, 0); @@ -860,7 +859,7 @@ static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ if(weight[j] > MIN_WEIGHT && change[j] == 1){ //dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i], uniqueFlowgrams, flowDataIntI, numFlowCells); - /*****************************************************************************************************/ + /***************************************************************************************************** int flowAValue = centroids[j] * numFlowCells; int flowBValue = i * numFlowCells; @@ -873,7 +872,7 @@ static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ } dist[indexOffset + j] = distTemp / (double)lengths[i]; - /*****************************************************************************************************/ + /***************************************************************************************************** } @@ -917,7 +916,7 @@ static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ } - /*****************************************************************************************************/ + /***************************************************************************************************** if (pDataArray->m->control_pressed) { break; } @@ -931,7 +930,7 @@ static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ pDataArray->m->mothurOut("\nFinalizing...\n"); //fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI); - /*****************************************************************************************************/ + /***************************************************************************************************** int indexFill = 0; for(int i=0;im->control_pressed) { break; } //setOTUs(numOTUs, numSeqs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, otuData, singleTau, dist, aaP, aaI); - /*****************************************************************************************************/ + /***************************************************************************************************** vector bigTauMatrix(numOTUs * numSeqs, 0.0000); for(int i=0;im->control_pressed) { break; } @@ -1017,7 +1016,7 @@ static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ for(int i=0;im->control_pressed) { break; } @@ -1062,7 +1061,7 @@ static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ for(int k=0;km->control_pressed) { break; } //writeQualities(numOTUs, numFlowCells, flowFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI); if (pDataArray->m->control_pressed) { break; } - /*****************************************************************************************************/ + /***************************************************************************************************** string thisOutputDir = pDataArray->outputDir; if (pDataArray->outputDir == "") { thisOutputDir += pDataArray->m->hasPath(flowFileName); } string qualityFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.qual"; @@ -1200,11 +1199,11 @@ static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ } qualityFile.close(); pDataArray->outputNames.push_back(qualityFileName); - /*****************************************************************************************************/ + /***************************************************************************************************** // writeSequences(thisCompositeFASTAFileName, numOTUs, numFlowCells, flowFileName, otuCounts, uniqueFlowgrams, seqNameVector, aaI, centroids); if (pDataArray->m->control_pressed) { break; } - /*****************************************************************************************************/ + /***************************************************************************************************** thisOutputDir = pDataArray->outputDir; if (pDataArray->outputDir == "") { thisOutputDir += pDataArray->m->hasPath(flowFileName); } string fastaFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.fasta"; @@ -1243,11 +1242,11 @@ static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ pDataArray->m->appendFiles(fastaFileName, pDataArray->thisCompositeFASTAFileName); } - /*****************************************************************************************************/ + /***************************************************************************************************** //writeNames(thisCompositeNamesFileName, numOTUs, flowFileName, otuCounts, seqNameVector, aaI, nSeqsPerOTU); if (pDataArray->m->control_pressed) { break; } - /*****************************************************************************************************/ + /***************************************************************************************************** thisOutputDir = pDataArray->outputDir; if (pDataArray->outputDir == "") { thisOutputDir += pDataArray->m->hasPath(flowFileName); } string nameFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.names"; @@ -1275,11 +1274,11 @@ static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ if(pDataArray->thisCompositeNameFileName != ""){ pDataArray->m->appendFiles(nameFileName, pDataArray->thisCompositeNameFileName); } - /*****************************************************************************************************/ + /***************************************************************************************************** //writeClusters(flowFileName, numOTUs, numFlowCells,otuCounts, centroids, uniqueFlowgrams, seqNameVector, aaI, nSeqsPerOTU, lengths, flowDataIntI); if (pDataArray->m->control_pressed) { break; } - /*****************************************************************************************************/ + /***************************************************************************************************** thisOutputDir = pDataArray->outputDir; if (pDataArray->outputDir == "") { thisOutputDir += pDataArray->m->hasPath(flowFileName); } string otuCountsFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.counts"; @@ -1327,12 +1326,12 @@ static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ } } otuCountsFile.close(); - pDataArray->outputNames.push_back(otuCountsFileName); - /*****************************************************************************************************/ + pDataArray->outputNames.push_back(otuCountsFileName) + /***************************************************************************************************** //writeGroups(flowFileName, numSeqs, seqNameVector); if (pDataArray->m->control_pressed) { break; } - /*****************************************************************************************************/ + /***************************************************************************************************** thisOutputDir = pDataArray->outputDir; if (pDataArray->outputDir == "") { thisOutputDir += pDataArray->m->hasPath(flowFileName); } string fileRoot = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)); @@ -1346,7 +1345,7 @@ static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ } groupFile.close(); pDataArray->outputNames.push_back(groupFileName); - /*****************************************************************************************************/ + /***************************************************************************************************** pDataArray->m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n'); } @@ -1362,7 +1361,7 @@ static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ } } #endif - +*/ #endif diff --git a/sparsedistancematrix.cpp b/sparsedistancematrix.cpp index 7d50523..b315c48 100644 --- a/sparsedistancematrix.cpp +++ b/sparsedistancematrix.cpp @@ -126,7 +126,7 @@ ull SparseDistanceMatrix::getSmallestCell(ull& row){ return col; } catch(exception& e) { - m->errorOut(e, "SparseMatrix", "getSmallestCell"); + m->errorOut(e, "SparseDistanceMatrix", "getSmallestCell"); exit(1); } } @@ -141,7 +141,7 @@ int SparseDistanceMatrix::sortSeqVec(){ return 0; } catch(exception& e) { - m->errorOut(e, "SparseMatrix", "getSmallestCell"); + m->errorOut(e, "SparseDistanceMatrix", "sortSeqVec"); exit(1); } } diff --git a/subsamplecommand.cpp b/subsamplecommand.cpp index f9cb1e6..8c5761d 100644 --- a/subsamplecommand.cpp +++ b/subsamplecommand.cpp @@ -808,7 +808,7 @@ int SubSampleCommand::processShared(vector& thislookup) { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(sharedfile); } - string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + thislookup[0]->getLabel() + getOutputFileNameTag("shared", sharedfile); + string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + thislookup[0]->getLabel() + "." +getOutputFileNameTag("shared", sharedfile); SubSample sample; vector subsampledLabels = sample.getSample(thislookup, size); diff --git a/treegroupscommand.h b/treegroupscommand.h index b0ae730..6790afb 100644 --- a/treegroupscommand.h +++ b/treegroupscommand.h @@ -19,7 +19,6 @@ #include "readmatrix.hpp" #include "readcolumn.h" #include "readphylip.h" -#include "sparsematrix.hpp" #include "sharedsobscollectsummary.h" #include "sharedchao1.h" #include "sharedace.h" @@ -69,8 +68,6 @@ They can also use as many or as few calculators as they wish. */ -typedef list::iterator MatData; - class TreeGroupCommand : public Command { public: diff --git a/treemap.cpp b/treemap.cpp index 42ec336..7b9fd32 100644 --- a/treemap.cpp +++ b/treemap.cpp @@ -13,6 +13,9 @@ TreeMap::TreeMap(string filename) { m = MothurOut::getInstance(); + ofstream out2; + m->openOutputFileAppend(filename, out2); + out2 << endl; out2.close(); groupFileName = filename; m->openInputFile(filename, fileHandle); } @@ -22,6 +25,10 @@ /************************************************************/ int TreeMap::readMap(string gf) { try { + ofstream out2; + m->openOutputFileAppend(gf, out2); + out2 << endl; out2.close(); + groupFileName = gf; m->openInputFile(gf, fileHandle); @@ -65,6 +72,34 @@ int TreeMap::readMap(string gf) { } fileHandle.close(); + if (rest != "") { + vector pieces = m->splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + map::iterator itCheck = treemap.find(seqName); + if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + namesOfSeqs.push_back(seqName); + treemap[seqName].groupname = seqGroup; //store data in map + + it2 = seqsPerGroup.find(seqGroup); + if (it2 == seqsPerGroup.end()) { //if it's a new group + seqsPerGroup[seqGroup] = 1; + }else {//it's a group we already have + seqsPerGroup[seqGroup]++; + } + } + pairDone = false; + } + } + } + return error; } catch(exception& e) { @@ -116,6 +151,34 @@ int TreeMap::readMap() { } fileHandle.close(); + if (rest != "") { + vector pieces = m->splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + map::iterator itCheck = treemap.find(seqName); + if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + namesOfSeqs.push_back(seqName); + treemap[seqName].groupname = seqGroup; //store data in map + + it2 = seqsPerGroup.find(seqGroup); + if (it2 == seqsPerGroup.end()) { //if it's a new group + seqsPerGroup[seqGroup] = 1; + }else {//it's a group we already have + seqsPerGroup[seqGroup]++; + } + } + pairDone = false; + } + } + } + return error; } catch(exception& e) { diff --git a/trimflowscommand.cpp b/trimflowscommand.cpp index d45f20c..6a3535f 100644 --- a/trimflowscommand.cpp +++ b/trimflowscommand.cpp @@ -423,11 +423,9 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN string trashCode = ""; flowData.getNext(flowFile); - //cout << "driver good bit " << flowFile.good() << endl; flowData.capFlows(maxFlows); Sequence currSeq = flowData.getSequence(); - if(!flowData.hasMinFlows(minFlows)){ //screen to see if sequence is of a minimum number of flows success = 0; trashCode += 'l'; diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index 637a720..ae8dfb7 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -611,7 +611,6 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string QualityScores currQual; if(qFileName != ""){ currQual = QualityScores(qFile); m->gobble(qFile); - if ((m->debug)&&(count>15800)) { m->mothurOut("[DEBUG]: " + toString(count) + " fasta = " + currSeq.getName() + '\n'); m->mothurOut("[DEBUG]: " + toString(getpid()) + '\n'); } } string origSeq = currSeq.getUnaligned(); diff --git a/weightedlinkage.cpp b/weightedlinkage.cpp index 19c41ce..c1e4d51 100644 --- a/weightedlinkage.cpp +++ b/weightedlinkage.cpp @@ -5,7 +5,6 @@ #include "mothur.h" #include "cluster.hpp" #include "rabundvector.hpp" -#include "sparsematrix.hpp" /* This class implements the WPGMA, weighted average neighbor clustering algorithm */ -- 2.39.2