loops = 0;
start = time(NULL);
+ oldRAbund.setLabel(label);
if (countfile == "") {
oldRAbund.print(rabundFile);
oldRAbund.getSAbundVector().print(sabundFile);
}
-
- oldRAbund.setLabel(label);
+
if (m->isTrue(showabund)) {
oldRAbund.getSAbundVector().print(cout);
}
void ClusterDoturCommand::printData(string label){
try {
-
- oldRAbund.setLabel(label);
- oldRAbund.print(rabundFile);
- oldRAbund.getSAbundVector().print(sabundFile);
-
+ oldRAbund.setLabel(label);
+ if (countfile == "") {
+ oldRAbund.print(rabundFile);
+ oldRAbund.getSAbundVector().print(sabundFile);
+ }
+
oldRAbund.getSAbundVector().print(cout);
oldList.setLabel(label);
//Above fork() will clone, so memory is separate, but that's not the case with windows,
//Taking advantage of shared memory to allow both threads to add labels.
//////////////////////////////////////////////////////////////////////////////////////////////////////
-
+ /*
vector<clusterData*> pDataArray;
DWORD dwThreadIdArray[processors-1];
HANDLE hThreadArray[processors-1];
CloseHandle(hThreadArray[i]);
delete pDataArray[i];
}
-
+*/
#endif
return listFiles;
// anything to do with mothur's use of copy constructors in many of our data structures. ie. listvector
// is copied by nameassignment and passed to read which passes to the thread? -westcott 2-8-12
////////////////////////////////////////////////////////////////////////////////////////////////////
-/**************************************************************************************************/
+/**************************************************************************************************
//custom data structure for threads to use.
// This is passed by void pointer so it can be any data type
// that can be passed using a single void pointer (LPVOID).
}
};
-/**************************************************************************************************/
+/**************************************************************************************************
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
#else
static DWORD WINAPI MyClusterThreadFunction(LPVOID lpParam){
}
#endif
-
+*/
#endif
in.close();
out.close();
+ if (rest != "") {
+ vector<string> pieces = m->splitWhiteSpace(rest);
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ //parse names into vector
+ vector<string> theseNames;
+ m->splitAtComma(secondCol, theseNames);
+ for (int i = 0; i < theseNames.size(); i++) { out << theseNames[i] << '\t' << count << endl; }
+ indexToNames[count] = firstCol;
+ pairDone = false;
+ count++;
+ }
+ }
+
+ }
+
return indexToNames;
}
catch(exception& e) {
}
in.close();
out.close();
+
+ if (rest != "") {
+ vector<string> pieces = m->splitWhiteSpace(rest);
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ it = groupIndex.find(secondCol);
+ if (it == groupIndex.end()) { //add group, assigning the group and number so we can use vectors above
+ groupIndex[secondCol] = count;
+ count++;
+ }
+ out << firstCol << '\t' << groupIndex[secondCol] << endl;
+ namesOfGroups.insert(secondCol);
+ pairDone = false;
+ }
+ }
+ }
for (it = groupIndex.begin(); it != groupIndex.end(); it++) { indexToGroups[it->second] = it->first; }
try {
flowFile >> seqName >> endFlow;
- //cout << "in Flowdata " + seqName << endl;
- for(int i=0;i<numFlows;i++) { flowFile >> flowData[i]; }
- //cout << "in Flowdata read " << seqName + " done" << endl;
- updateEndFlow();
- translateFlow();
-
- m->gobble(flowFile);
+ if (seqName.length() != 0) {
+ //cout << "in Flowdata " + seqName << endl;
+ for(int i=0;i<numFlows;i++) { flowFile >> flowData[i]; }
+ //cout << "in Flowdata read " << seqName + " done" << endl;
+ updateEndFlow();
+ translateFlow();
+ m->gobble(flowFile);
+ }else{ m->mothurOut("Error in reading your flowfile, at position " + toString(flowFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
+
if(flowFile){ return 1; }
else { return 0; }
}
}
fileHandle.close();
+ if (rest != "") {
+ vector<string> pieces = m->splitWhiteSpace(rest);
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { seqName = pieces[i]; columnOne=false; }
+ else { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ setNamesOfGroups(seqGroup);
+
+ it = groupmap.find(seqName);
+
+ if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); }
+ else {
+ groupmap[seqName] = seqGroup; //store data in map
+ seqsPerGroup[seqGroup]++; //increment number of seqs in that group
+ }
+ pairDone = false;
+ }
+ }
+ }
+
m->setAllGroups(namesOfGroups);
return error;
}
}
fileHandle.close();
+ if (rest != "") {
+ vector<string> pieces = m->splitWhiteSpace(rest);
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { seqName = pieces[i]; columnOne=false; }
+ else { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ setNamesOfGroups(seqGroup);
+
+ it = groupmap.find(seqName);
+
+ if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); }
+ else {
+ groupmap[seqName] = seqGroup; //store data in map
+ seqsPerGroup[seqGroup]++; //increment number of seqs in that group
+ }
+ pairDone = false;
+ }
+ }
+
+ }
+
m->setAllGroups(namesOfGroups);
return error;
}
}
fileHandle.close();
+ if (rest != "") {
+ vector<string> pieces = m->splitWhiteSpace(rest);
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { seqName = pieces[i]; columnOne=false; }
+ else { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ setNamesOfGroups(seqGroup);
+
+ it = groupmap.find(seqName);
+
+ if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); }
+ else {
+ groupmap[seqName] = seqGroup; //store data in map
+ seqsPerGroup[seqGroup]++; //increment number of seqs in that group
+ }
+ pairDone = false;
+ }
+ }
+ }
+
m->setAllGroups(namesOfGroups);
return error;
}
#include "hcluster.h"
#include "rabundvector.hpp"
#include "listvector.hpp"
-#include "sparsematrix.hpp"
/***********************************************************************/
HCluster::HCluster(RAbundVector* rav, ListVector* lv, string ms, string d, NameAssignment* n, float c) : rabund(rav), list(lv), method(ms), distfile(d), nameMap(n), cutoff(c) {
#include "command.hpp"
#include "readblast.h"
-#include "sparsematrix.hpp"
#include "nameassignment.hpp"
#include "cluster.hpp"
#include "hcluster.h"
//**********************************************************************************************************************
int MothurOut::readTax(string namefile, map<string, string>& taxMap) {
try {
-
//open input file
ifstream in;
openInputFile(namefile, in);
}
}
in.close();
+
+ if (rest != "") {
+ vector<string> pieces = splitWhiteSpace(rest);
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ //are there confidence scores, if so remove them
+ if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
+ taxMap[firstCol] = secondCol;
+ if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
+ pairDone = false;
+ }
+ }
+ }
return taxMap.size();
/**********************************************************************************************************************/
int MothurOut::readNames(string namefile, map<string, string>& nameMap, bool redund) {
try {
-
//open input file
ifstream in;
openInputFile(namefile, in);
}
}
in.close();
+
+ if (rest != "") {
+ vector<string> pieces = splitWhiteSpace(rest);
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ //parse names into vector
+ vector<string> theseNames;
+ splitAtComma(secondCol, theseNames);
+ for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
+ pairDone = false;
+ }
+ }
+ }
return nameMap.size();
/**********************************************************************************************************************/
int MothurOut::readNames(string namefile, map<string, string>& nameMap, int flip) {
try {
-
//open input file
ifstream in;
openInputFile(namefile, in);
}
}
in.close();
+
+ if (rest != "") {
+ vector<string> pieces = splitWhiteSpace(rest);
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ nameMap[secondCol] = firstCol;
+ pairDone = false;
+ }
+ }
+ }
return nameMap.size();
/**********************************************************************************************************************/
int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<string, int>& nameCount) {
try {
- nameMap.clear(); nameCount.clear();
+ nameMap.clear(); nameCount.clear();
//open input file
ifstream in;
openInputFile(namefile, in);
}
in.close();
+ if (rest != "") {
+ vector<string> pieces = splitWhiteSpace(rest);
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ //parse names into vector
+ vector<string> theseNames;
+ splitAtComma(secondCol, theseNames);
+ for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
+ nameCount[firstCol] = theseNames.size();
+ pairDone = false;
+ }
+ }
+
+ }
return nameMap.size();
}
/**********************************************************************************************************************/
int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
try {
-
//open input file
ifstream in;
openInputFile(namefile, in);
}
}
in.close();
+
+ if (rest != "") {
+ vector<string> pieces = splitWhiteSpace(rest);
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; }
+ }
+ }
return nameMap.size();
}
/**********************************************************************************************************************/
int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap) {
- try {
-
+ try {
//open input file
ifstream in;
openInputFile(namefile, in);
}
in.close();
+ if (rest != "") {
+ vector<string> pieces = splitWhiteSpace(rest);
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ vector<string> temp;
+ splitAtComma(secondCol, temp);
+ nameMap[firstCol] = temp;
+ pairDone = false;
+ }
+ }
+ }
+
return nameMap.size();
}
catch(exception& e) {
/**********************************************************************************************************************/
map<string, int> MothurOut::readNames(string namefile) {
try {
-
map<string, int> nameMap;
//open input file
}
}
in.close();
+
+ if (rest != "") {
+ vector<string> pieces = splitWhiteSpace(rest);
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ int num = getNumNames(secondCol);
+ nameMap[firstCol] = num;
+ pairDone = false;
+ }
+ }
+ }
return nameMap;
}
in.close();
+ if (rest != "") {
+ vector<string> pieces = splitWhiteSpace(rest);
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ int num = getNumNames(secondCol);
+
+ map<string, string>::iterator it = fastamap.find(firstCol);
+ if (it == fastamap.end()) {
+ error = 1;
+ mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
+ }else {
+ seqPriorityNode temp(num, it->second, firstCol);
+ nameVector.push_back(temp);
+ }
+
+ pairDone = false;
+ }
+ }
+ }
return error;
}
catch(exception& e) {
//**********************************************************************************************************************
set<string> MothurOut::readAccnos(string accnosfile){
try {
- set<string> names;
+ set<string> names;
ifstream in;
openInputFile(accnosfile, in);
string name;
}
in.close();
+ if (rest != "") {
+ vector<string> pieces = splitWhiteSpace(rest);
+ for (int i = 0; i < pieces.size(); i++) { names.insert(pieces[i]); }
+ }
return names;
}
catch(exception& e) {
for (int i = 0; i < pieces.size(); i++) { names.push_back(pieces[i]); }
}
in.close();
+
+ if (rest != "") {
+ vector<string> pieces = splitWhiteSpace(rest);
+ for (int i = 0; i < pieces.size(); i++) { names.push_back(pieces[i]); }
+ }
return 0;
}
#include "counttable.h"
#include "sparsedistancematrix.h"
-class SparseMatrix;
class ReadMatrix {
in.close();
if (error == 1) { m->control_pressed = true; }
-
+
//read name file
ifstream inName;
m->openInputFile(nameFile, inName);
}
}
inName.close();
+
+ //in case file does not end in white space
+ if (rest != "") {
+ vector<string> pieces = m->splitWhiteSpace(rest);
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) { //save one line
+ if (m->debug) { m->mothurOut("[DEBUG]: reading names: " + firstCol + '\t' + secondCol + ".\n"); }
+ vector<string> names;
+ m->splitAtChar(secondCol, names, ',');
+
+ //get aligned string for these seqs from the fasta file
+ string alignedString = "";
+ map<string, string>::iterator itAligned = seqName.find(names[0]);
+ if (itAligned == seqName.end()) {
+ error = 1; m->mothurOut("[ERROR]: " + names[0] + " is in your name file and not in your fasta file, please correct."); m->mothurOutEndLine();
+ }else {
+ alignedString = itAligned->second;
+ }
+
+ //separate by group - parse one line in name file
+ map<string, string> splitMap; //group -> name1,name2,...
+ map<string, string>::iterator it;
+ for (int i = 0; i < names.size(); i++) {
+
+ string group = groupMap->getGroup(names[i]);
+ if (group == "not found") { error = 1; m->mothurOut("[ERROR]: " + names[i] + " is in your name file and not in your groupfile, please correct."); m->mothurOutEndLine(); }
+ else {
+
+ it = splitMap.find(group);
+ if (it != splitMap.end()) { //adding seqs to this group
+ (it->second) += "," + names[i];
+ thisnames1.insert(names[i]);
+ countName++;
+ }else { //first sighting of this group
+ splitMap[group] = names[i];
+ countName++;
+ thisnames1.insert(names[i]);
+
+ //is this seq in the fasta file?
+ if (i != 0) { //if not then we need to add a duplicate sequence to the seqs for this group so the new "fasta" and "name" files will match
+ Sequence tempSeq(names[i], alignedString); //get the first guys sequence string since he's in the fasta file.
+ seqs[group].push_back(tempSeq);
+ }
+ }
+ }
+
+ allSeqsMap[names[i]] = names[0];
+ }
+
+
+ //fill nameMapPerGroup - holds all lines in namefile separated by group
+ for (it = splitMap.begin(); it != splitMap.end(); it++) {
+ //grab first name
+ string firstName = "";
+ for(int i = 0; i < (it->second).length(); i++) {
+ if (((it->second)[i]) != ',') {
+ firstName += ((it->second)[i]);
+ }else { break; }
+ }
+
+ //group1 -> seq1 -> seq1,seq2,seq3
+ nameMapPerGroup[it->first][firstName] = it->second;
+ }
+
+ pairDone = false;
+ }
+ }
+ }
if (error == 1) { m->control_pressed = true; }
int error = ListGroupSameSeqs(namesSeqs, SharedList);
if ((!pickedGroups) && (SharedList->getNumSeqs() != numGroupNames)) { //if the user has not specified any groups and their files don't match exit with error
- m->mothurOut("Your group file contains " + toString(numGroupNames) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine();
+ m->mothurOut("Your group file contains " + toString(numGroupNames) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine(); m->control_pressed = true;
out.close(); m->mothurRemove(filename); //remove blank shared file you made
float intensity;
- flowFile >> numFlowCells;
+ string numFlowTest;
+ flowFile >> numFlowTest;
+
+ if (!m->isContainingOnlyDigits(numFlowTest)) { m->mothurOut("[ERROR]: expected a number and got " + numFlowTest + ", quitting. Did you use the flow parameter instead of the file parameter?"); m->mothurOutEndLine(); exit(1); }
+ else { convert(numFlowTest, numFlowCells); }
+
int index = 0;//pcluster
while(!flowFile.eof()){
try {
ReadMatrix* read = new ReadColumnMatrix(distFileName);
- read->setCutoff(cutoff);
-
- NameAssignment* clusterNameMap = new NameAssignment(namesFileName);
- clusterNameMap->readMap();
- read->read(clusterNameMap);
-
- ListVector* list = read->getListVector();
- SparseMatrix* matrix = read->getMatrix();
+ read->setCutoff(cutoff);
+
+ NameAssignment* clusterNameMap = new NameAssignment(namesFileName);
+ clusterNameMap->readMap();
+ read->read(clusterNameMap);
- delete read;
- delete clusterNameMap;
+ ListVector* list = read->getListVector();
+ SparseDistanceMatrix* matrix = read->getDMatrix();
+
+ delete read;
+ delete clusterNameMap;
RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
//Windows version shared memory, so be careful when passing variables through the shhhFlowsData struct.
//Above fork() will clone, so memory is separate, but that's not the case with windows,
//////////////////////////////////////////////////////////////////////////////////////////////////////
-
+ /*
vector<shhhFlowsData*> pDataArray;
DWORD dwThreadIdArray[processors-1];
HANDLE hThreadArray[processors-1];
CloseHandle(hThreadArray[i]);
delete pDataArray[i];
}
-
+ */
#endif
for (int i=0;i<processIDS.size();i++) {
thisFlowDataIntI.clear();
thisNameMap.clear();
- flowFile >> numFlowCells;
+ string numFlowTest;
+ flowFile >> numFlowTest;
+
+ if (!m->isContainingOnlyDigits(numFlowTest)) { m->mothurOut("[ERROR]: expected a number and got " + numFlowTest + ", quitting. Did you use the flow parameter instead of the file parameter?"); m->mothurOutEndLine(); exit(1); }
+ else { convert(numFlowTest, numFlowCells); }
+
if (m->debug) { m->mothurOut("[DEBUG]: numFlowCells = " + toString(numFlowCells) + ".\n"); }
int index = 0;//pcluster
while(!flowFile.eof()){
#include "sabundvector.hpp"
#include "listvector.hpp"
#include "cluster.hpp"
-#include "sparsematrix.hpp"
#include <cfloat>
//**********************************************************************************************************************
};
-/**************************************************************************************************/
+/**************************************************************************************************
//custom data structure for threads to use.
// This is passed by void pointer so it can be any data type
// that can be passed using a single void pointer (LPVOID).
}
};
-/**************************************************************************************************/
+/**************************************************************************************************
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
#else
static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){
int numFlowCells;
//int numSeqs = getFlowData(flowFileName, seqNameVector, lengths, flowDataIntI, nameMap, numFlowCells);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
ifstream flowFile;
// cout << "herethread " << flowFileName << '\t' << &flowFile << endl;
}
}
// cout << "here" << endl;
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { return 0; }
pDataArray->m->mothurOut("Identifying unique flowgrams...\n");
//int numUniques = getUniques(numSeqs, numFlowCells, uniqueFlowgrams, uniqueCount, uniqueLengths, mapSeqToUnique, mapUniqueToSeq, lengths, flowDataPrI, flowDataIntI);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
int numUniques = 0;
uniqueFlowgrams.assign(numFlowCells * numSeqs, -1);
uniqueCount.assign(numSeqs, 0); // anWeights
}
}
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { return 0; }
double begClock = clock();
//flowDistParentFork(numFlowCells, distFileName, numUniques, mapUniqueToSeq, mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
ostringstream outStream;
outStream.setf(ios::fixed, ios::floatfield);
outStream.setf(ios::dec, ios::basefield);
for(int j=0;j<i;j++){
//float flowDistance = calcPairwiseDist(numFlowCells, mapUniqueToSeq[i], mapUniqueToSeq[j], mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
int seqA = mapUniqueToSeq[i]; int seqB = mapUniqueToSeq[j];
int minLength = lengths[mapSeqToUnique[seqA]];
if(lengths[seqB] < minLength){ minLength = lengths[mapSeqToUnique[seqB]]; }
}
flowDistance /= (float) minLength;
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if(flowDistance < 1e-6){
outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
pDataArray->m->mothurOut("\t" + toString((clock()-thisbegClock)/CLOCKS_PER_SEC));
pDataArray->m->mothurOutEndLine();
}
- /*****************************************************************************************************/
+ /*****************************************************************************************************
pDataArray->m->mothurOutEndLine();
pDataArray->m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
string namesFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
//createNamesFile(numSeqs, numUniques, namesFileName, seqNameVector, mapSeqToUnique, mapUniqueToSeq);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
vector<string> duplicateNames(numUniques, "");
for(int i=0;i<numSeqs;i++){
duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
}
nameFile.close();
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { return 0; }
pDataArray->m->mothurOut("\nClustering flowgrams...\n");
string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
//cluster(listFileName, distFileName, namesFileName);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
ReadMatrix* read = new ReadColumnMatrix(distFileName);
read->setCutoff(pDataArray->cutoff);
listFileOut.close();
delete matrix; delete cluster; delete rabund; delete list;
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { return 0; }
//int numOTUs = getOTUData(numSeqs, listFileName, otuData, cumNumSeqs, nSeqsPerOTU, aaP, aaI, seqNumber, seqIndex, nameMap);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
ifstream listFile;
pDataArray->m->openInputFile(listFileName, listFile);
string label;
seqIndex = seqNumber;
listFile.close();
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { return 0; }
double cycClock = clock();
unsigned long long cycTime = time(NULL);
//fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
int indexFill = 0;
for(int i=0;i<numOTUs;i++){
indexFill++;
}
}
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { break; }
//calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
for(int i=0;i<numOTUs;i++){
if (pDataArray->m->control_pressed) { break; }
for(int k=0;k<position;k++){
// double dist = getDistToCentroid(anL[k], nI, lengths[nI], uniqueFlowgrams, flowDataIntI, numFlowCells);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
int flowAValue = anL[k] * numFlowCells;
int flowBValue = nI * numFlowCells;
}
dist = dist / (double)lengths[nI];
- /*****************************************************************************************************/
+ /*****************************************************************************************************
adF[k] += dist * tauValue;
}
}
centroids[i] = -1;
}
}
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { break; }
//maxDelta = getNewWeights(numOTUs, cumNumSeqs, nSeqsPerOTU, singleTau, seqNumber, weight);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
double maxChange = 0;
for(int i=0;i<numOTUs;i++){
if(difference > maxChange){ maxChange = difference; }
}
maxDelta = maxChange;
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { break; }
//double nLL = getLikelihood(numSeqs, numOTUs, nSeqsPerOTU, seqNumber, cumNumSeqs, seqIndex, dist, weight);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
vector<long double> P(numSeqs, 0);
int effNumOTUs = 0;
}
nLL = nLL -(double)numSeqs * log(pDataArray->sigma);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { break; }
//checkCentroids(numOTUs, centroids, weight);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
vector<int> unique(numOTUs, 1);
for(int i=0;i<numOTUs;i++){
}
}
}
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { break; }
//calcNewDistances(numSeqs, numOTUs, nSeqsPerOTU, dist, weight, change, centroids, aaP, singleTau, aaI, seqNumber, seqIndex, uniqueFlowgrams, flowDataIntI, numFlowCells, lengths);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
int total = 0;
vector<double> newTau(numOTUs,0);
vector<double> norms(numSeqs, 0);
if(weight[j] > MIN_WEIGHT && change[j] == 1){
//dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i], uniqueFlowgrams, flowDataIntI, numFlowCells);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
int flowAValue = centroids[j] * numFlowCells;
int flowBValue = i * numFlowCells;
}
dist[indexOffset + j] = distTemp / (double)lengths[i];
- /*****************************************************************************************************/
+ /*****************************************************************************************************
}
}
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { break; }
pDataArray->m->mothurOut("\nFinalizing...\n");
//fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
int indexFill = 0;
for(int i=0;i<numOTUs;i++){
indexFill++;
}
}
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { break; }
//setOTUs(numOTUs, numSeqs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, otuData, singleTau, dist, aaP, aaI);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
for(int i=0;i<numOTUs;i++){
}
//fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
indexFill = 0;
for(int i=0;i<numOTUs;i++){
}
/*****************************************************************************************************/
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { break; }
for(int i=0;i<numSeqs;i++) { otuCounts[otuData[i]]++; }
//calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
for(int i=0;i<numOTUs;i++){
if (pDataArray->m->control_pressed) { break; }
for(int k=0;k<position;k++){
// double dist = getDistToCentroid(anL[k], nI, lengths[nI], uniqueFlowgrams, flowDataIntI, numFlowCells);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
int flowAValue = anL[k] * numFlowCells;
int flowBValue = nI * numFlowCells;
}
dist = dist / (double)lengths[nI];
- /*****************************************************************************************************/
+ /*****************************************************************************************************
adF[k] += dist * tauValue;
}
}
}
}
- /*****************************************************************************************************/
+ /*****************************************************************************************************
if (pDataArray->m->control_pressed) { break; }
//writeQualities(numOTUs, numFlowCells, flowFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI);
if (pDataArray->m->control_pressed) { break; }
- /*****************************************************************************************************/
+ /*****************************************************************************************************
string thisOutputDir = pDataArray->outputDir;
if (pDataArray->outputDir == "") { thisOutputDir += pDataArray->m->hasPath(flowFileName); }
string qualityFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.qual";
}
qualityFile.close();
pDataArray->outputNames.push_back(qualityFileName);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
// writeSequences(thisCompositeFASTAFileName, numOTUs, numFlowCells, flowFileName, otuCounts, uniqueFlowgrams, seqNameVector, aaI, centroids);
if (pDataArray->m->control_pressed) { break; }
- /*****************************************************************************************************/
+ /*****************************************************************************************************
thisOutputDir = pDataArray->outputDir;
if (pDataArray->outputDir == "") { thisOutputDir += pDataArray->m->hasPath(flowFileName); }
string fastaFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.fasta";
pDataArray->m->appendFiles(fastaFileName, pDataArray->thisCompositeFASTAFileName);
}
- /*****************************************************************************************************/
+ /*****************************************************************************************************
//writeNames(thisCompositeNamesFileName, numOTUs, flowFileName, otuCounts, seqNameVector, aaI, nSeqsPerOTU);
if (pDataArray->m->control_pressed) { break; }
- /*****************************************************************************************************/
+ /*****************************************************************************************************
thisOutputDir = pDataArray->outputDir;
if (pDataArray->outputDir == "") { thisOutputDir += pDataArray->m->hasPath(flowFileName); }
string nameFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.names";
if(pDataArray->thisCompositeNameFileName != ""){
pDataArray->m->appendFiles(nameFileName, pDataArray->thisCompositeNameFileName);
}
- /*****************************************************************************************************/
+ /*****************************************************************************************************
//writeClusters(flowFileName, numOTUs, numFlowCells,otuCounts, centroids, uniqueFlowgrams, seqNameVector, aaI, nSeqsPerOTU, lengths, flowDataIntI);
if (pDataArray->m->control_pressed) { break; }
- /*****************************************************************************************************/
+ /*****************************************************************************************************
thisOutputDir = pDataArray->outputDir;
if (pDataArray->outputDir == "") { thisOutputDir += pDataArray->m->hasPath(flowFileName); }
string otuCountsFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.counts";
}
}
otuCountsFile.close();
- pDataArray->outputNames.push_back(otuCountsFileName);
- /*****************************************************************************************************/
+ pDataArray->outputNames.push_back(otuCountsFileName)
+ /*****************************************************************************************************
//writeGroups(flowFileName, numSeqs, seqNameVector);
if (pDataArray->m->control_pressed) { break; }
- /*****************************************************************************************************/
+ /*****************************************************************************************************
thisOutputDir = pDataArray->outputDir;
if (pDataArray->outputDir == "") { thisOutputDir += pDataArray->m->hasPath(flowFileName); }
string fileRoot = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName));
}
groupFile.close();
pDataArray->outputNames.push_back(groupFileName);
- /*****************************************************************************************************/
+ /*****************************************************************************************************
pDataArray->m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
}
}
}
#endif
-
+*/
#endif
return col;
}
catch(exception& e) {
- m->errorOut(e, "SparseMatrix", "getSmallestCell");
+ m->errorOut(e, "SparseDistanceMatrix", "getSmallestCell");
exit(1);
}
}
return 0;
}
catch(exception& e) {
- m->errorOut(e, "SparseMatrix", "getSmallestCell");
+ m->errorOut(e, "SparseDistanceMatrix", "sortSeqVec");
exit(1);
}
}
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(sharedfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + thislookup[0]->getLabel() + getOutputFileNameTag("shared", sharedfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + thislookup[0]->getLabel() + "." +getOutputFileNameTag("shared", sharedfile);
SubSample sample;
vector<string> subsampledLabels = sample.getSample(thislookup, size);
#include "readmatrix.hpp"
#include "readcolumn.h"
#include "readphylip.h"
-#include "sparsematrix.hpp"
#include "sharedsobscollectsummary.h"
#include "sharedchao1.h"
#include "sharedace.h"
They can also use as many or as few calculators as they wish. */
-typedef list<PCell>::iterator MatData;
-
class TreeGroupCommand : public Command {
public:
TreeMap::TreeMap(string filename) {
m = MothurOut::getInstance();
+ ofstream out2;
+ m->openOutputFileAppend(filename, out2);
+ out2 << endl; out2.close();
groupFileName = filename;
m->openInputFile(filename, fileHandle);
}
/************************************************************/
int TreeMap::readMap(string gf) {
try {
+ ofstream out2;
+ m->openOutputFileAppend(gf, out2);
+ out2 << endl; out2.close();
+
groupFileName = gf;
m->openInputFile(gf, fileHandle);
}
fileHandle.close();
+ if (rest != "") {
+ vector<string> pieces = m->splitWhiteSpace(rest);
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { seqName = pieces[i]; columnOne=false; }
+ else { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ setNamesOfGroups(seqGroup);
+
+ map<string, GroupIndex>::iterator itCheck = treemap.find(seqName);
+ if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); }
+ else {
+ namesOfSeqs.push_back(seqName);
+ treemap[seqName].groupname = seqGroup; //store data in map
+
+ it2 = seqsPerGroup.find(seqGroup);
+ if (it2 == seqsPerGroup.end()) { //if it's a new group
+ seqsPerGroup[seqGroup] = 1;
+ }else {//it's a group we already have
+ seqsPerGroup[seqGroup]++;
+ }
+ }
+ pairDone = false;
+ }
+ }
+ }
+
return error;
}
catch(exception& e) {
}
fileHandle.close();
+ if (rest != "") {
+ vector<string> pieces = m->splitWhiteSpace(rest);
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { seqName = pieces[i]; columnOne=false; }
+ else { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ setNamesOfGroups(seqGroup);
+
+ map<string, GroupIndex>::iterator itCheck = treemap.find(seqName);
+ if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); }
+ else {
+ namesOfSeqs.push_back(seqName);
+ treemap[seqName].groupname = seqGroup; //store data in map
+
+ it2 = seqsPerGroup.find(seqGroup);
+ if (it2 == seqsPerGroup.end()) { //if it's a new group
+ seqsPerGroup[seqGroup] = 1;
+ }else {//it's a group we already have
+ seqsPerGroup[seqGroup]++;
+ }
+ }
+ pairDone = false;
+ }
+ }
+ }
+
return error;
}
catch(exception& e) {
string trashCode = "";
flowData.getNext(flowFile);
- //cout << "driver good bit " << flowFile.good() << endl;
flowData.capFlows(maxFlows);
Sequence currSeq = flowData.getSequence();
-
if(!flowData.hasMinFlows(minFlows)){ //screen to see if sequence is of a minimum number of flows
success = 0;
trashCode += 'l';
QualityScores currQual;
if(qFileName != ""){
currQual = QualityScores(qFile); m->gobble(qFile);
- if ((m->debug)&&(count>15800)) { m->mothurOut("[DEBUG]: " + toString(count) + " fasta = " + currSeq.getName() + '\n'); m->mothurOut("[DEBUG]: " + toString(getpid()) + '\n'); }
}
string origSeq = currSeq.getUnaligned();
#include "mothur.h"
#include "cluster.hpp"
#include "rabundvector.hpp"
-#include "sparsematrix.hpp"
/* This class implements the WPGMA, weighted average neighbor clustering algorithm */