X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=mothurout.cpp;h=81436871d6fd14b8f40c7101b397c1637887bc96;hp=64f0bc88713d2c3f56cfb6f98cc1cbc387ee7966;hb=fefd5ee1517abd3bc38b469cb2dffc85a1571c7e;hpb=957d67f7d8bbadfd2930de061e89fd9b149270fd diff --git a/mothurout.cpp b/mothurout.cpp index 64f0bc8..8143687 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -23,6 +23,7 @@ set MothurOut::getCurrentTypes() { set types; types.insert("fasta"); + types.insert("summary"); types.insert("accnos"); types.insert("column"); types.insert("design"); @@ -43,6 +44,7 @@ set MothurOut::getCurrentTypes() { types.insert("tree"); types.insert("flow"); types.insert("biom"); + types.insert("count"); types.insert("processors"); return types; @@ -78,7 +80,9 @@ void MothurOut::printCurrentFiles() { if (treefile != "") { mothurOut("tree=" + treefile); mothurOutEndLine(); } if (flowfile != "") { mothurOut("flow=" + flowfile); mothurOutEndLine(); } if (biomfile != "") { mothurOut("biom=" + biomfile); mothurOutEndLine(); } + if (counttablefile != "") { mothurOut("count=" + counttablefile); mothurOutEndLine(); } if (processors != "1") { mothurOut("processors=" + processors); mothurOutEndLine(); } + if (summaryfile != "") { mothurOut("summary=" + summaryfile); mothurOutEndLine(); } } catch(exception& e) { @@ -112,6 +116,8 @@ bool MothurOut::hasCurrentFiles() { if (treefile != "") { return true; } if (flowfile != "") { return true; } if (biomfile != "") { return true; } + if (counttablefile != "") { return true; } + if (summaryfile != "") { return true; } if (processors != "1") { return true; } return hasCurrent; @@ -147,6 +153,8 @@ void MothurOut::clearCurrentFiles() { taxonomyfile = ""; flowfile = ""; biomfile = ""; + counttablefile = ""; + summaryfile = ""; processors = "1"; } catch(exception& e) { @@ -340,6 +348,27 @@ void MothurOut::mothurOut(string output) { } } /*********************************************************************************************/ +void MothurOut::mothurOutJustToScreen(string output) { + try { + +#ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + if (pid == 0) { //only one process should output to screen +#endif + logger() << output; + +#ifdef USE_MPI + } +#endif + } + catch(exception& e) { + errorOut(e, "MothurOut", "MothurOut"); + exit(1); + } +} +/*********************************************************************************************/ void MothurOut::mothurOutEndLine() { try { #ifdef USE_MPI @@ -436,10 +465,24 @@ void MothurOut::errorOut(exception& e, string object, string function) { //double vm, rss; //mem_usage(vm, rss); - mothurOut("[ERROR]: "); - mothurOut(toString(e.what())); - mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry."); - mothurOutEndLine(); + string errorType = toString(e.what()); + + int pos = errorType.find("bad_alloc"); + mothurOut("[ERROR]: "); + mothurOut(errorType); + + if (pos == string::npos) { //not bad_alloc + mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry."); + mothurOutEndLine(); + }else { //bad alloc + if (object == "cluster"){ + mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. There are two common causes for this, file size and format.\n\nFile Size:\nThe cluster command loads your distance matrix into RAM, and your distance file is most likely too large to fit in RAM. There are two options to help with this. The first is to use a cutoff. By using a cutoff mothur will only load distances that are below the cutoff. If that is still not enough, there is a command called cluster.split, http://www.mothur.org/wiki/cluster.split which divides the distance matrix, and clusters the smaller pieces separately. You may also be able to reduce the size of the original distance matrix by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. \n\nWrong Format:\nThis error can be caused by trying to read a column formatted distance matrix using the phylip parameter. By default, the dist.seqs command generates a column formatted distance matrix. To make a phylip formatted matrix set the dist.seqs command parameter output to lt. \n\nIf you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry."); + }else if (object == "shhh.flows"){ + mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. The shhh.flows command is very memory intensive. This error is most commonly caused by trying to process a dataset too large, using multiple processors, or failing to run trim.flows before shhh.flows. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Running trim.flows with an oligos file, and then shhh.flows with the file option may also resolve the issue. If for some reason you are unable to run shhh.flows with your data, a good alternative is to use the trim.seqs command using a 50-bp sliding window and to trim the sequence when the average quality score over that window drops below 35. Our results suggest that the sequencing error rates by this method are very good, but not quite as good as by shhh.flows and that the resulting sequences tend to be a bit shorter. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry. "); + }else { + mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. This is most commonly caused by trying to process a dataset too large, using multiple processors, or a file format issue. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Also, you may be able to reduce the size of your dataset by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry."); + } + } } /*********************************************************************************************/ //The following was originally from http://stackoverflow.com/questions/669438/how-to-get-memory-usage-at-run-time-in-c @@ -545,6 +588,26 @@ int MothurOut::openOutputFileAppend(string fileName, ofstream& fileHandle){ exit(1); } } +/***********************************************************************/ +int MothurOut::openOutputFileBinaryAppend(string fileName, ofstream& fileHandle){ + try { + fileName = getFullPathName(fileName); + + fileHandle.open(fileName.c_str(), ios::app | ios::binary); + if(!fileHandle) { + mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine(); + return 1; + } + else { + return 0; + } + } + catch(exception& e) { + errorOut(e, "MothurOut", "openOutputFileAppend"); + exit(1); + } +} + /***********************************************************************/ void MothurOut::gobble(istream& f){ try { @@ -716,6 +779,8 @@ string MothurOut::getPathName(string longName){ bool MothurOut::dirCheck(string& dirName){ try { + if (dirName == "") { return false; } + string tag = ""; #ifdef USE_MPI int pid; @@ -734,7 +799,7 @@ bool MothurOut::dirCheck(string& dirName){ //test to make sure directory exists dirName = getFullPathName(dirName); - string outTemp = dirName + tag + "temp"; + string outTemp = dirName + tag + "temp"+ toString(time(NULL)); ofstream out; out.open(outTemp.c_str(), ios::trunc); if(!out) { @@ -753,6 +818,39 @@ bool MothurOut::dirCheck(string& dirName){ } } +//********************************************************************************************************************** + +map > MothurOut::parseClasses(string classes){ + try { + map > parts; + + //treatment-age + vector pieces; splitAtDash(classes, pieces); // -> treatment, age + + for (int i = 0; i < pieces.size(); i++) { + string category = ""; string value = ""; + bool foundOpen = false; + for (int j = 0; j < pieces[i].length(); j++) { + if (control_pressed) { return parts; } + + if (pieces[i][j] == '<') { foundOpen = true; } + else if (pieces[i][j] == '>') { j += pieces[i].length(); } + else { + if (!foundOpen) { category += pieces[i][j]; } + else { value += pieces[i][j]; } + } + } + vector values; splitAtChar(value, values, '|'); + parts[category] = values; + } + + return parts; + } + catch(exception& e) { + errorOut(e, "MothurOut", "parseClasses"); + exit(1); + } +} /***********************************************************************/ string MothurOut::hasPath(string longName){ @@ -1044,10 +1142,112 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle){ exit(1); } } +/***********************************************************************/ +int MothurOut::openInputFileBinary(string fileName, ifstream& fileHandle){ + try { + + //get full path name + string completeFileName = getFullPathName(fileName); +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) +#ifdef USE_COMPRESSION + // check for gzipped or bzipped file + if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) { + string tempName = string(tmpnam(0)); + mkfifo(tempName.c_str(), 0666); + int fork_result = fork(); + if (fork_result < 0) { + cerr << "Error forking.\n"; + exit(1); + } else if (fork_result == 0) { + string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName; + cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n"; + system(command.c_str()); + cerr << "Done decompressing " << completeFileName << "\n"; + mothurRemove(tempName); + exit(EXIT_SUCCESS); + } else { + cerr << "waiting on child process " << fork_result << "\n"; + completeFileName = tempName; + } + } +#endif +#endif + + fileHandle.open(completeFileName.c_str(), ios::binary); + if(!fileHandle) { + mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine(); + return 1; + } + else { + //check for blank file + gobble(fileHandle); + if (fileHandle.eof()) { mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct."); mothurOutEndLine(); } + + return 0; + } + } + catch(exception& e) { + errorOut(e, "MothurOut", "openInputFileBinary"); + exit(1); + } +} +/***********************************************************************/ +int MothurOut::openInputFileBinary(string fileName, ifstream& fileHandle, string noerror){ + try { + + //get full path name + string completeFileName = getFullPathName(fileName); +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) +#ifdef USE_COMPRESSION + // check for gzipped or bzipped file + if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) { + string tempName = string(tmpnam(0)); + mkfifo(tempName.c_str(), 0666); + int fork_result = fork(); + if (fork_result < 0) { + cerr << "Error forking.\n"; + exit(1); + } else if (fork_result == 0) { + string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName; + cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n"; + system(command.c_str()); + cerr << "Done decompressing " << completeFileName << "\n"; + mothurRemove(tempName); + exit(EXIT_SUCCESS); + } else { + cerr << "waiting on child process " << fork_result << "\n"; + completeFileName = tempName; + } + } +#endif +#endif + + fileHandle.open(completeFileName.c_str(), ios::binary); + if(!fileHandle) { + //mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine(); + return 1; + } + else { + //check for blank file + gobble(fileHandle); + //if (fileHandle.eof()) { mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct."); mothurOutEndLine(); } + + return 0; + } + } + catch(exception& e) { + errorOut(e, "MothurOut", "openInputFileBinary - no error"); + exit(1); + } +} + /***********************************************************************/ int MothurOut::renameFile(string oldName, string newName){ try { + + if (oldName == newName) { return 0; } + ifstream inTest; int exist = openInputFile(newName, inTest, ""); inTest.close(); @@ -1115,7 +1315,48 @@ int MothurOut::openOutputFile(string fileName, ofstream& fileHandle){ } } +/***********************************************************************/ +int MothurOut::openOutputFileBinary(string fileName, ofstream& fileHandle){ + try { + + string completeFileName = getFullPathName(fileName); +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) +#ifdef USE_COMPRESSION + // check for gzipped file + if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) { + string tempName = string(tmpnam(0)); + mkfifo(tempName.c_str(), 0666); + cerr << "Compressing " << completeFileName << " via temporary named pipe " << tempName << "\n"; + int fork_result = fork(); + if (fork_result < 0) { + cerr << "Error forking.\n"; + exit(1); + } else if (fork_result == 0) { + string command = string(endsWith(completeFileName, ".gz") ? "gzip" : "bzip2") + " -v > " + completeFileName + string(" < ") + tempName; + system(command.c_str()); + exit(0); + } else { + completeFileName = tempName; + } + } +#endif +#endif + fileHandle.open(completeFileName.c_str(), ios::trunc | ios::binary); + if(!fileHandle) { + mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine(); + return 1; + } + else { + return 0; + } + } + catch(exception& e) { + errorOut(e, "MothurOut", "openOutputFileBinary"); + exit(1); + } + +} /**************************************************************************************************/ int MothurOut::appendFiles(string temp, string filename) { try{ @@ -1123,8 +1364,8 @@ int MothurOut::appendFiles(string temp, string filename) { ifstream input; //open output file in append mode - openOutputFileAppend(filename, output); - int ableToOpen = openInputFile(temp, input, "no error"); + openOutputFileBinaryAppend(filename, output); + int ableToOpen = openInputFileBinary(temp, input, "no error"); //int ableToOpen = openInputFile(temp, input); int numLines = 0; @@ -1149,7 +1390,72 @@ int MothurOut::appendFiles(string temp, string filename) { exit(1); } } +/**************************************************************************************************/ +int MothurOut::appendBinaryFiles(string temp, string filename) { + try{ + ofstream output; + ifstream input; + + //open output file in append mode + openOutputFileBinaryAppend(filename, output); + int ableToOpen = openInputFileBinary(temp, input, "no error"); + + if (ableToOpen == 0) { //you opened it + + char buffer[4096]; + while (!input.eof()) { + input.read(buffer, 4096); + output.write(buffer, input.gcount()); + } + input.close(); + } + + output.close(); + + return ableToOpen; + } + catch(exception& e) { + errorOut(e, "MothurOut", "appendBinaryFiles"); + exit(1); + } +} +/**************************************************************************************************/ +int MothurOut::appendFilesWithoutHeaders(string temp, string filename) { + try{ + ofstream output; + ifstream input; + + //open output file in append mode + openOutputFileAppend(filename, output); + int ableToOpen = openInputFile(temp, input, "no error"); + //int ableToOpen = openInputFile(temp, input); + + int numLines = 0; + if (ableToOpen == 0) { //you opened it + + string headers = getline(input); gobble(input); + if (debug) { mothurOut("[DEBUG]: skipping headers " + headers +'\n'); } + + char buffer[4096]; + while (!input.eof()) { + input.read(buffer, 4096); + output.write(buffer, input.gcount()); + //count number of lines + for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} } + } + input.close(); + } + + output.close(); + + return numLines; + } + catch(exception& e) { + errorOut(e, "MothurOut", "appendFiles"); + exit(1); + } +} /**************************************************************************************************/ string MothurOut::sortFile(string distFile, string outputDir){ try { @@ -1176,7 +1482,7 @@ string MothurOut::sortFile(string distFile, string outputDir){ string firstName, secondName; float dist; - while (input) { + while (!input.eof()) { input >> firstName >> secondName >> dist; output << dist << '\t' << firstName << '\t' << secondName << endl; gobble(input); @@ -1192,16 +1498,17 @@ string MothurOut::sortFile(string distFile, string outputDir){ //read in sorted file and put distance at end again ifstream input2; + ofstream output2; openInputFile(tempOutfile, input2); - openOutputFile(outfile, output); + openOutputFile(outfile, output2); - while (input2) { + while (!input2.eof()) { input2 >> dist >> firstName >> secondName; - output << firstName << '\t' << secondName << '\t' << dist << endl; + output2 << firstName << '\t' << secondName << '\t' << dist << endl; gobble(input2); } input2.close(); - output.close(); + output2.close(); //remove temp files mothurRemove(tempDistFile); @@ -1235,15 +1542,15 @@ vector MothurOut::setFilePosFasta(string filename, int& num) char c = inFASTA.get(); count++; if (c == '>') { positions.push_back(count-1); - //cout << count << endl; + if (debug) { mothurOut("[DEBUG]: numSeqs = " + toString(positions.size()) + " count = " + toString(count) + ".\n"); } } } inFASTA.close(); num = positions.size(); - - /*FILE * pFile; - long size; + if (debug) { mothurOut("[DEBUG]: num = " + toString(num) + ".\n"); } + FILE * pFile; + unsigned long long size; //get num bytes in file pFile = fopen (filename.c_str(),"rb"); @@ -1252,9 +1559,9 @@ vector MothurOut::setFilePosFasta(string filename, int& num) fseek (pFile, 0, SEEK_END); size=ftell (pFile); fclose (pFile); - }*/ + } - unsigned long long size = positions[(positions.size()-1)]; + /*unsigned long long size = positions[(positions.size()-1)]; ifstream in; openInputFile(filename, in); @@ -1264,8 +1571,10 @@ vector MothurOut::setFilePosFasta(string filename, int& num) if(in.eof()) { break; } else { size++; } } - in.close(); - + in.close();*/ + + if (debug) { mothurOut("[DEBUG]: size = " + toString(size) + ".\n"); } + positions.push_back(size); positions[0] = 0; @@ -1276,6 +1585,67 @@ vector MothurOut::setFilePosFasta(string filename, int& num) exit(1); } } +//********************************************************************************************************************** +vector MothurOut::readConsTax(string inputfile){ + try { + + vector taxes; + + ifstream in; + openInputFile(inputfile, in); + + //read headers + getline(in); + + while (!in.eof()) { + + if (control_pressed) { break; } + + string otu = ""; string tax = "unknown"; + int size = 0; + + in >> otu >> size >> tax; gobble(in); + consTax temp(otu, tax, size); + taxes.push_back(temp); + } + in.close(); + + return taxes; + } + catch(exception& e) { + errorOut(e, "MothurOut", "readConsTax"); + exit(1); + } +} +//********************************************************************************************************************** +int MothurOut::readConsTax(string inputfile, map& taxes){ + try { + ifstream in; + openInputFile(inputfile, in); + + //read headers + getline(in); + + while (!in.eof()) { + + if (control_pressed) { break; } + + string otu = ""; string tax = "unknown"; + int size = 0; + + in >> otu >> size >> tax; gobble(in); + consTax2 temp(tax, size); + taxes[otu] = temp; + } + in.close(); + + return 0; + } + catch(exception& e) { + errorOut(e, "MothurOut", "readConsTax"); + exit(1); + } +} /**************************************************************************************************/ vector MothurOut::setFilePosEachLine(string filename, int& num) { try { @@ -1291,16 +1661,6 @@ vector MothurOut::setFilePosEachLine(string filename, int& n positions.push_back(0); while(!in.eof()){ - //unsigned long long lastpos = in.tellg(); - //input = getline(in); - //if (input.length() != 0) { - //unsigned long long pos = in.tellg(); - //if (pos != -1) { positions.push_back(pos - input.length() - 1); } - //else { positions.push_back(lastpos); } - //} - //gobble(in); //has to be here since windows line endings are 2 characters and mess up the positions - - //getline counting reads char d = in.get(); count++; while ((d != '\n') && (d != '\r') && (d != '\f') && (d != in.eof())) { @@ -1420,23 +1780,100 @@ vector MothurOut::divideFile(string filename, int& proc) { } } /**************************************************************************************************/ -int MothurOut::divideFile(string filename, int& proc, vector& files) { + +vector MothurOut::divideFilePerLine(string filename, int& proc) { try{ + vector filePos; + filePos.push_back(0); - vector filePos = divideFile(filename, proc); + FILE * pFile; + unsigned long long size; - for (int i = 0; i < (filePos.size()-1); i++) { - - //read file chunk - ifstream in; - openInputFile(filename, in); - in.seekg(filePos[i]); - unsigned long long size = filePos[(i+1)] - filePos[i]; - char* chunk = new char[size]; - in.read(chunk, size); - in.close(); - - //open new file + filename = getFullPathName(filename); + + //get num bytes in file + pFile = fopen (filename.c_str(),"rb"); + if (pFile==NULL) perror ("Error opening file"); + else{ + fseek (pFile, 0, SEEK_END); + size=ftell (pFile); + fclose (pFile); + } + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + + //estimate file breaks + unsigned long long chunkSize = 0; + chunkSize = size / proc; + + //file to small to divide by processors + if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; } + + //for each process seekg to closest file break and search for next '>' char. make that the filebreak + for (int i = 0; i < proc; i++) { + unsigned long long spot = (i+1) * chunkSize; + + ifstream in; + openInputFile(filename, in); + in.seekg(spot); + + //look for next line break + unsigned long long newSpot = spot; + while (!in.eof()) { + char c = in.get(); + + if ((c == '\n') || (c == '\r') || (c == '\f')) { gobble(in); newSpot = in.tellg(); break; } + else if (int(c) == -1) { break; } + } + + //there was not another line before the end of the file + unsigned long long sanityPos = in.tellg(); + + if (sanityPos == -1) { break; } + else { filePos.push_back(newSpot); } + + in.close(); + } + + //save end pos + filePos.push_back(size); + + //sanity check filePos + for (int i = 0; i < (filePos.size()-1); i++) { + if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; } + } + + proc = (filePos.size() - 1); +#else + mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine(); + proc=1; + filePos.push_back(size); +#endif + return filePos; + } + catch(exception& e) { + errorOut(e, "MothurOut", "divideFile"); + exit(1); + } +} +/**************************************************************************************************/ +int MothurOut::divideFile(string filename, int& proc, vector& files) { + try{ + + vector filePos = divideFile(filename, proc); + + for (int i = 0; i < (filePos.size()-1); i++) { + + //read file chunk + ifstream in; + openInputFile(filename, in); + in.seekg(filePos[i]); + unsigned long long size = filePos[(i+1)] - filePos[i]; + char* chunk = new char[size]; + in.read(chunk, size); + in.close(); + + //open new file string fileChunkName = filename + "." + toString(i) + ".tmp"; ofstream out; openOutputFile(fileChunkName, out); @@ -1503,7 +1940,7 @@ vector MothurOut::splitWhiteSpace(string& rest, char buffer[], int size) for (int i = 0; i < size; i++) { if (!isspace(buffer[i])) { rest += buffer[i]; } else { - pieces.push_back(rest); rest = ""; + if (rest != "") { pieces.push_back(rest); rest = ""; } while (i < size) { //gobble white space if (isspace(buffer[i])) { i++; } else { rest = buffer[i]; break; } //cout << "next piece buffer = " << nextPiece << endl; @@ -1527,7 +1964,7 @@ vector MothurOut::splitWhiteSpace(string input){ for (int i = 0; i < input.length(); i++) { if (!isspace(input[i])) { rest += input[i]; } else { - pieces.push_back(rest); rest = ""; + if (rest != "") { pieces.push_back(rest); rest = ""; } while (i < input.length()) { //gobble white space if (isspace(input[i])) { i++; } else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl; @@ -1544,10 +1981,49 @@ vector MothurOut::splitWhiteSpace(string input){ exit(1); } } +/***********************************************************************/ +vector MothurOut::splitWhiteSpaceWithQuotes(string input){ + try { + vector pieces; + string rest = ""; + + int pos = input.find('\''); + int pos2 = input.find('\"'); + + if ((pos == string::npos) && (pos2 == string::npos)) { return splitWhiteSpace(input); } //no quotes to worry about + else { + for (int i = 0; i < input.length(); i++) { + if ((input[i] == '\'') || (input[i] == '\"') || (rest == "\'") || (rest == "\"")) { //grab everything til end or next ' or " + rest += input[i]; + for (int j = i+1; j < input.length(); j++) { + if ((input[j] == '\'') || (input[j] == '\"')) { //then quit + rest += input[j]; + i = j+1; + j+=input.length(); + }else { rest += input[j]; } + } + }else if (!isspace(input[i])) { rest += input[i]; } + else { + if (rest != "") { pieces.push_back(rest); rest = ""; } + while (i < input.length()) { //gobble white space + if (isspace(input[i])) { i++; } + else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl; + } + } + } + + if (rest != "") { pieces.push_back(rest); } + } + return pieces; + } + catch(exception& e) { + errorOut(e, "MothurOut", "splitWhiteSpace"); + exit(1); + } +} //********************************************************************************************************************** int MothurOut::readTax(string namefile, map& taxMap) { try { - //open input file ifstream in; openInputFile(namefile, in); @@ -1557,6 +2033,7 @@ int MothurOut::readTax(string namefile, map& taxMap) { bool pairDone = false; bool columnOne = true; string firstCol, secondCol; + bool error = false; while (!in.eof()) { if (control_pressed) { break; } @@ -1569,16 +2046,56 @@ int MothurOut::readTax(string namefile, map& taxMap) { else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { + checkName(firstCol); //are there confidence scores, if so remove them if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); } - taxMap[firstCol] = secondCol; - if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); } + map::iterator itTax = taxMap.find(firstCol); + + if(itTax == taxMap.end()) { + bool ignore = false; + if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; } + } + if (!ignore) { taxMap[firstCol] = secondCol; } + if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); } + }else { + mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique.\n"); error = true; + } pairDone = false; } } } in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + checkName(firstCol); + //are there confidence scores, if so remove them + if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); } + map::iterator itTax = taxMap.find(firstCol); + + if(itTax == taxMap.end()) { + bool ignore = false; + if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; } + } + if (!ignore) { taxMap[firstCol] = secondCol; } + if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); } + }else { + mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); error = true; + } + + pairDone = false; + } + } + } + if (error) { control_pressed = true; } + if (debug) { mothurOut("[DEBUG]: numSeqs saved = '" + toString(taxMap.size()) + "'\n"); } return taxMap.size(); } @@ -1590,7 +2107,6 @@ int MothurOut::readTax(string namefile, map& taxMap) { /**********************************************************************************************************************/ int MothurOut::readNames(string namefile, map& nameMap, bool redund) { try { - //open input file ifstream in; openInputFile(namefile, in); @@ -1612,6 +2128,9 @@ int MothurOut::readNames(string namefile, map& nameMap, bool red else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { + checkName(firstCol); + checkName(secondCol); + //parse names into vector vector theseNames; splitAtComma(secondCol, theseNames); @@ -1621,6 +2140,26 @@ int MothurOut::readNames(string namefile, map& nameMap, bool red } } in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + checkName(firstCol); + checkName(secondCol); + + //parse names into vector + vector theseNames; + splitAtComma(secondCol, theseNames); + for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; } + pairDone = false; + } + } + } return nameMap.size(); @@ -1633,7 +2172,6 @@ int MothurOut::readNames(string namefile, map& nameMap, bool red /**********************************************************************************************************************/ int MothurOut::readNames(string namefile, map& nameMap, int flip) { try { - //open input file ifstream in; openInputFile(namefile, in); @@ -1655,12 +2193,30 @@ int MothurOut::readNames(string namefile, map& nameMap, int flip else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { + checkName(firstCol); + checkName(secondCol); nameMap[secondCol] = firstCol; pairDone = false; } } } in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + checkName(firstCol); + checkName(secondCol); + nameMap[secondCol] = firstCol; + pairDone = false; + } + } + } return nameMap.size(); @@ -1673,7 +2229,7 @@ int MothurOut::readNames(string namefile, map& nameMap, int flip /**********************************************************************************************************************/ int MothurOut::readNames(string namefile, map& nameMap, map& nameCount) { try { - nameMap.clear(); nameCount.clear(); + nameMap.clear(); nameCount.clear(); //open input file ifstream in; openInputFile(namefile, in); @@ -1695,6 +2251,8 @@ int MothurOut::readNames(string namefile, map& nameMap, map theseNames; splitAtComma(secondCol, theseNames); @@ -1706,6 +2264,26 @@ int MothurOut::readNames(string namefile, map& nameMap, map pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + checkName(firstCol); + checkName(secondCol); + //parse names into vector + vector theseNames; + splitAtComma(secondCol, theseNames); + for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; } + nameCount[firstCol] = theseNames.size(); + pairDone = false; + } + } + + } return nameMap.size(); } @@ -1717,7 +2295,6 @@ int MothurOut::readNames(string namefile, map& nameMap, map& nameMap) { try { - //open input file ifstream in; openInputFile(namefile, in); @@ -1738,10 +2315,27 @@ int MothurOut::readNames(string namefile, map& nameMap) { if (columnOne) { firstCol = pieces[i]; columnOne=false; } else { secondCol = pieces[i]; pairDone = true; columnOne=true; } - if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; } + if (pairDone) { + checkName(firstCol); + checkName(secondCol); + nameMap[firstCol] = secondCol; pairDone = false; } } } in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + checkName(firstCol); + checkName(secondCol); + nameMap[firstCol] = secondCol; pairDone = false; } + } + } return nameMap.size(); @@ -1753,8 +2347,7 @@ int MothurOut::readNames(string namefile, map& nameMap) { } /**********************************************************************************************************************/ int MothurOut::readNames(string namefile, map >& nameMap) { - try { - + try { //open input file ifstream in; openInputFile(namefile, in); @@ -1776,6 +2369,8 @@ int MothurOut::readNames(string namefile, map >& nameMap) else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { + checkName(firstCol); + checkName(secondCol); vector temp; splitAtComma(secondCol, temp); nameMap[firstCol] = temp; @@ -1785,6 +2380,24 @@ int MothurOut::readNames(string namefile, map >& nameMap) } in.close(); + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + checkName(firstCol); + checkName(secondCol); + vector temp; + splitAtComma(secondCol, temp); + nameMap[firstCol] = temp; + pairDone = false; + } + } + } + return nameMap.size(); } catch(exception& e) { @@ -1795,7 +2408,6 @@ int MothurOut::readNames(string namefile, map >& nameMap) /**********************************************************************************************************************/ map MothurOut::readNames(string namefile) { try { - map nameMap; //open input file @@ -1819,6 +2431,8 @@ map MothurOut::readNames(string namefile) { else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { + checkName(firstCol); + checkName(secondCol); int num = getNumNames(secondCol); nameMap[firstCol] = num; pairDone = false; @@ -1826,6 +2440,22 @@ map MothurOut::readNames(string namefile) { } } in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + checkName(firstCol); + checkName(secondCol); + int num = getNumNames(secondCol); + nameMap[firstCol] = num; + pairDone = false; + } + } + } return nameMap; @@ -1836,9 +2466,10 @@ map MothurOut::readNames(string namefile) { } } /**********************************************************************************************************************/ -int MothurOut::readNames(string namefile, vector& nameVector, map& fastamap) { +map MothurOut::readNames(string namefile, unsigned long int& numSeqs) { try { - int error = 0; + map nameMap; + numSeqs = 0; //open input file ifstream in; @@ -1861,40 +2492,71 @@ int MothurOut::readNames(string namefile, vector& nameVector, m else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { + checkName(firstCol); + checkName(secondCol); int num = getNumNames(secondCol); - - map::iterator it = fastamap.find(firstCol); - if (it == fastamap.end()) { - error = 1; - mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine(); - }else { - seqPriorityNode temp(num, it->second, firstCol); - nameVector.push_back(temp); - } - + nameMap[firstCol] = num; pairDone = false; + numSeqs += num; } } } in.close(); - return error; + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + checkName(firstCol); + checkName(secondCol); + int num = getNumNames(secondCol); + nameMap[firstCol] = num; + pairDone = false; + numSeqs += num; + } + } + } + + return nameMap; + } catch(exception& e) { errorOut(e, "MothurOut", "readNames"); exit(1); } } -//********************************************************************************************************************** -set MothurOut::readAccnos(string accnosfile){ +/************************************************************/ +int MothurOut::checkName(string& name) { + try { + if (modifyNames) { + for (int i = 0; i < name.length(); i++) { + if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; } + } + } + return 0; + } + catch(exception& e) { + errorOut(e, "MothurOut", "checkName"); + exit(1); + } +} +/**********************************************************************************************************************/ +int MothurOut::readNames(string namefile, vector& nameVector, map& fastamap) { try { - set names; + int error = 0; + + //open input file ifstream in; - openInputFile(accnosfile, in); - string name; + openInputFile(namefile, in); string rest = ""; char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + string firstCol, secondCol; while (!in.eof()) { if (control_pressed) { break; } @@ -1902,12 +2564,91 @@ set MothurOut::readAccnos(string accnosfile){ in.read(buffer, 4096); vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); - for (int i = 0; i < pieces.size(); i++) { names.insert(pieces[i]); } - } - in.close(); - - return names; - } + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + checkName(firstCol); + checkName(secondCol); + int num = getNumNames(secondCol); + + map::iterator it = fastamap.find(firstCol); + if (it == fastamap.end()) { + error = 1; + mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine(); + }else { + seqPriorityNode temp(num, it->second, firstCol); + nameVector.push_back(temp); + } + + pairDone = false; + } + } + } + in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { firstCol = pieces[i]; columnOne=false; } + else { secondCol = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + checkName(firstCol); + checkName(secondCol); + int num = getNumNames(secondCol); + + map::iterator it = fastamap.find(firstCol); + if (it == fastamap.end()) { + error = 1; + mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine(); + }else { + seqPriorityNode temp(num, it->second, firstCol); + nameVector.push_back(temp); + } + + pairDone = false; + } + } + } + return error; + } + catch(exception& e) { + errorOut(e, "MothurOut", "readNames"); + exit(1); + } +} +//********************************************************************************************************************** +set MothurOut::readAccnos(string accnosfile){ + try { + set names; + ifstream in; + openInputFile(accnosfile, in); + string name; + + string rest = ""; + char buffer[4096]; + + while (!in.eof()) { + if (control_pressed) { break; } + + in.read(buffer, 4096); + vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); + + for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); + names.insert(pieces[i]); + } + } + in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); } + } + return names; + } catch(exception& e) { errorOut(e, "MothurOut", "readAccnos"); exit(1); @@ -1930,9 +2671,14 @@ int MothurOut::readAccnos(string accnosfile, vector& names){ in.read(buffer, 4096); vector pieces = splitWhiteSpace(rest, buffer, in.gcount()); - for (int i = 0; i < pieces.size(); i++) { names.push_back(pieces[i]); } + for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); } } in.close(); + + if (rest != "") { + vector pieces = splitWhiteSpace(rest); + for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); } + } return 0; } @@ -1985,6 +2731,118 @@ int MothurOut::getNumChar(string line, char c){ } } /***********************************************************************/ +string MothurOut::getSimpleLabel(string label){ + try { + string simple = ""; + + //remove OTU or phylo tag + string newLabel1 = ""; + for (int i = 0; i < label.length(); i++) { + if(label[i]>47 && label[i]<58) { //is a digit + newLabel1 += label[i]; + } + } + + int num1; + mothurConvert(newLabel1, num1); + + simple = toString(num1); + + return simple; + } + catch(exception& e) { + errorOut(e, "MothurOut", "isLabelEquivalent"); + exit(1); + } +} +/***********************************************************************/ +string MothurOut::mothurGetpid(int threadID){ + try { + + string pid = ""; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + + pid += toString(getpid()); if(debug) { mothurOut("[DEBUG]: " + pid + "\n"); } + //remove any weird chars + string pid1 = ""; + for (int i = 0; i < pid.length(); i++) { + if(pid[i]>47 && pid[i]<58) { //is a digit + pid1 += pid[i]; + } + } + pid = pid1; +#else + pid += toString(threadID); +#endif + return pid; + } + catch(exception& e) { + errorOut(e, "MothurOut", "mothurGetpid"); + exit(1); + } +} + +/***********************************************************************/ + +bool MothurOut::isLabelEquivalent(string label1, string label2){ + try { + bool same = false; + + //remove OTU or phylo tag + string newLabel1 = ""; + for (int i = 0; i < label1.length(); i++) { + if(label1[i]>47 && label1[i]<58) { //is a digit + newLabel1 += label1[i]; + } + } + + string newLabel2 = ""; + for (int i = 0; i < label2.length(); i++) { + if(label2[i]>47 && label2[i]<58) { //is a digit + newLabel2 += label2[i]; + } + } + + int num1, num2; + mothurConvert(newLabel1, num1); + mothurConvert(newLabel2, num2); + + if (num1 == num2) { same = true; } + + return same; + } + catch(exception& e) { + errorOut(e, "MothurOut", "isLabelEquivalent"); + exit(1); + } +} +//********************************************************************************************************************** +bool MothurOut::isSubset(vector bigset, vector subset) { + try { + + + if (subset.size() > bigset.size()) { return false; } + + //check if each guy in subset is also in bigset + for (int i = 0; i < subset.size(); i++) { + bool match = false; + for (int j = 0; j < bigset.size(); j++) { + if (subset[i] == bigset[j]) { match = true; break; } + } + + //you have a guy in subset that had no match in bigset + if (match == false) { return false; } + } + + return true; + + } + catch(exception& e) { + errorOut(e, "MothurOut", "isSubset"); + exit(1); + } +} +/***********************************************************************/ int MothurOut::mothurRemove(string filename){ try { filename = getFullPathName(filename); @@ -2023,6 +2881,28 @@ bool MothurOut::mothurConvert(string item, int& num){ exit(1); } } +/***********************************************************************/ +bool MothurOut::mothurConvert(string item, intDist& num){ + try { + bool error = false; + + if (isNumeric1(item)) { + convert(item, num); + }else { + num = 0; + error = true; + mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine(); + commandInputsConvertError = true; + } + + return error; + } + catch(exception& e) { + errorOut(e, "MothurOut", "mothurConvert"); + exit(1); + } +} + /***********************************************************************/ bool MothurOut::isNumeric1(string stringToCheck){ try { @@ -2201,7 +3081,50 @@ unsigned int MothurOut::fromBase36(string base36){ } } /***********************************************************************/ - +string MothurOut::findEdianness() { + try { + // find real endian type + unsigned char EndianTest[2] = {1,0}; + short x = *(short *)EndianTest; + + string endianType = "unknown"; + if(x == 1) { endianType = "BIG_ENDIAN"; } + else { endianType = "LITTLE_ENDIAN"; } + + return endianType; + } + catch(exception& e) { + errorOut(e, "MothurOut", "findEdianness"); + exit(1); + } +} +/***********************************************************************/ +double MothurOut::median(vector x) { + try { + double value = 0.0; + + if (x.size() == 0) { } //error + else { + //For example, if a < b < c, then the median of the list {a, b, c} is b, and, if a < b < c < d, then the median of the list {a, b, c, d} is the mean of b and c; i.e., it is (b + c)/2. + sort(x.begin(), x.end()); + //is x.size even? + if ((x.size()%2) == 0) { //size() is even. median = average of 2 midpoints + int midIndex1 = (x.size()/2)-1; + int midIndex2 = (x.size()/2); + value = (x[midIndex1]+ x[midIndex2]) / 2.0; + }else { + int midIndex = (x.size()/2); + value = x[midIndex]; + } + } + return value; + } + catch(exception& e) { + errorOut(e, "MothurOut", "median"); + exit(1); + } +} +/***********************************************************************/ int MothurOut::factorial(int num){ try { int total = 1; @@ -2248,10 +3171,71 @@ void MothurOut::getNumSeqs(ifstream& file, int& numSeqs){ } } /***********************************************************************/ +bool MothurOut::checkLocations(string& filename, string inputDir){ + try { + filename = getFullPathName(filename); + + int ableToOpen; + ifstream in; + ableToOpen = openInputFile(filename, in, "noerror"); + in.close(); + + //if you can't open it, try input location + if (ableToOpen == 1) { + if (inputDir != "") { //default path is set + string tryPath = inputDir + getSimpleName(filename); + mothurOut("Unable to open " + filename + ". Trying input directory " + tryPath); mothurOutEndLine(); + ifstream in2; + ableToOpen = openInputFile(tryPath, in2, "noerror"); + in2.close(); + filename = tryPath; + } + } + + //if you can't open it, try default location + if (ableToOpen == 1) { + if (getDefaultPath() != "") { //default path is set + string tryPath = getDefaultPath() + getSimpleName(filename); + mothurOut("Unable to open " + filename + ". Trying default " + tryPath); mothurOutEndLine(); + ifstream in2; + ableToOpen = openInputFile(tryPath, in2, "noerror"); + in2.close(); + filename = tryPath; + } + } + + //if you can't open it its not in current working directory or inputDir, try mothur excutable location + if (ableToOpen == 1) { + string exepath = argv; + string tempPath = exepath; + for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); } + exepath = exepath.substr(0, (tempPath.find_last_of('m'))); + + string tryPath = getFullPathName(exepath) + getSimpleName(filename); + mothurOut("Unable to open " + filename + ". Trying mothur's executable location " + tryPath); mothurOutEndLine(); + ifstream in2; + ableToOpen = openInputFile(tryPath, in2, "noerror"); + in2.close(); + filename = tryPath; + } + + if (ableToOpen == 1) { mothurOut("Unable to open " + filename + "."); mothurOutEndLine(); return false; } + + return true; + } + catch(exception& e) { + errorOut(e, "MothurOut", "checkLocations"); + exit(1); + } +} +/***********************************************************************/ //This function parses the estimator options and puts them in a vector void MothurOut::splitAtChar(string& estim, vector& container, char symbol) { try { + + if (symbol == '-') { splitAtDash(estim, container); return; } + string individual = ""; int estimLength = estim.size(); for(int i=0;i& container) { try { string individual = ""; int estimLength = estim.size(); - for(int i=0;i& container) { try { string individual = ""; int estimLength = estim.size(); + bool prevEscape = false; + /* for(int i=0;i& container) { //This function parses the line options and puts them in a set void MothurOut::splitAtDash(string& estim, set& container) { try { - string individual; + string individual = ""; int lineNum; - - while (estim.find_first_of('-') != -1) { - individual = estim.substr(0,estim.find_first_of('-')); - if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string - estim = estim.substr(estim.find_first_of('-')+1, estim.length()); - convert(individual, lineNum); //convert the string to int - container.insert(lineNum); + int estimLength = estim.size(); + bool prevEscape = false; + /* + for(int i=0;i& container) { exit(1); } } + /***********************************************************************/ string MothurOut::makeList(vector& names) { try { @@ -2435,11 +3492,11 @@ void MothurOut::splitAtChar(string& prefix, string& suffix, char c){ string space = " "; while(suffix.at(0) == ' ') suffix = suffix.substr(1, suffix.length()); - } + }else { suffix = ""; } - } + } catch(exception& e) { - errorOut(e, "MothurOut", "splitAtComma"); + errorOut(e, "MothurOut", "splitAtChar"); exit(1); } } @@ -2455,7 +3512,7 @@ void MothurOut::splitAtComma(string& prefix, string& suffix){ string space = " "; while(suffix.at(0) == ' ') suffix = suffix.substr(1, suffix.length()); - } + }else { suffix = ""; } } catch(exception& e) { @@ -2498,6 +3555,35 @@ bool MothurOut::inUsersGroups(string groupname, vector Groups) { exit(1); } } +/**************************************************************************************************/ + +bool MothurOut::inUsersGroups(vector set, vector< vector > sets) { + try { + for (int i = 0; i < sets.size(); i++) { + if (set == sets[i]) { return true; } + } + return false; + } + catch(exception& e) { + errorOut(e, "MothurOut", "inUsersGroups"); + exit(1); + } +} +/**************************************************************************************************/ + +bool MothurOut::inUsersGroups(int groupname, vector Groups) { + try { + for (int i = 0; i < Groups.size(); i++) { + if (groupname == Groups[i]) { return true; } + } + return false; + } + catch(exception& e) { + errorOut(e, "MothurOut", "inUsersGroups"); + exit(1); + } +} + /**************************************************************************************************/ //returns true if any of the strings in first vector are in second vector bool MothurOut::inUsersGroups(vector groupnames, vector Groups) { @@ -2513,6 +3599,26 @@ bool MothurOut::inUsersGroups(vector groupnames, vector Groups) exit(1); } } +/**************************************************************************************************/ +//removes entries that are only white space +int MothurOut::removeBlanks(vector& tempVector) { + try { + vector newVector; + for (int i = 0; i < tempVector.size(); i++) { + bool isBlank = true; + for (int j = 0; j < tempVector[i].length(); j++) { + if (!isspace(tempVector[i][j])) { isBlank = false; j+= tempVector[i].length(); } //contains non space chars, break out and save + } + if (!isBlank) { newVector.push_back(tempVector[i]); } + } + tempVector = newVector; + return 0; + } + catch(exception& e) { + errorOut(e, "MothurOut", "removeBlanks"); + exit(1); + } +} /***********************************************************************/ //this function determines if the user has given us labels that are smaller than the given label. //if so then it returns true so that the calling function can run the previous valid distance. @@ -2642,6 +3748,273 @@ bool MothurOut::checkReleaseVersion(ifstream& file, string version) { exit(1); } } +/**************************************************************************************************/ +vector MothurOut::getAverages(vector< vector >& dists) { + try{ + vector averages; //averages.resize(numComp, 0.0); + for (int i = 0; i < dists[0].size(); i++) { averages.push_back(0.0); } + + for (int thisIter = 0; thisIter < dists.size(); thisIter++) { + for (int i = 0; i < dists[thisIter].size(); i++) { + averages[i] += dists[thisIter][i]; + } + } + + //finds average. + for (int i = 0; i < averages.size(); i++) { averages[i] /= (double) dists.size(); } + + return averages; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getAverages"); + exit(1); + } +} +/**************************************************************************************************/ +double MothurOut::getAverage(vector dists) { + try{ + double average = 0; + + for (int i = 0; i < dists.size(); i++) { + average += dists[i]; + } + + //finds average. + average /= (double) dists.size(); + + return average; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getAverage"); + exit(1); + } +} + +/**************************************************************************************************/ +vector MothurOut::getStandardDeviation(vector< vector >& dists) { + try{ + + vector averages = getAverages(dists); + + //find standard deviation + vector stdDev; //stdDev.resize(numComp, 0.0); + for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); } + + for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each + for (int j = 0; j < dists[thisIter].size(); j++) { + stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j])); + } + } + for (int i = 0; i < stdDev.size(); i++) { + stdDev[i] /= (double) dists.size(); + stdDev[i] = sqrt(stdDev[i]); + } + + return stdDev; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getAverages"); + exit(1); + } +} +/**************************************************************************************************/ +vector MothurOut::getStandardDeviation(vector< vector >& dists, vector& averages) { + try{ + //find standard deviation + vector stdDev; //stdDev.resize(numComp, 0.0); + for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); } + + for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each + for (int j = 0; j < dists[thisIter].size(); j++) { + stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j])); + } + } + for (int i = 0; i < stdDev.size(); i++) { + stdDev[i] /= (double) dists.size(); + stdDev[i] = sqrt(stdDev[i]); + } + + return stdDev; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getStandardDeviation"); + exit(1); + } +} +/**************************************************************************************************/ +vector< vector > MothurOut::getAverages(vector< vector< vector > >& calcDistsTotals, string mode) { + try{ + + vector< vector > calcAverages; //calcAverages.resize(calcDistsTotals[0].size()); + for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero. + //calcAverages[i].resize(calcDistsTotals[0][i].size()); + vector temp; + for (int j = 0; j < calcDistsTotals[0][i].size(); j++) { + seqDist tempDist; + tempDist.seq1 = calcDistsTotals[0][i][j].seq1; + tempDist.seq2 = calcDistsTotals[0][i][j].seq2; + tempDist.dist = 0.0; + temp.push_back(tempDist); + } + calcAverages.push_back(temp); + } + + if (mode == "average") { + for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator + for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero. + for (int j = 0; j < calcAverages[i].size(); j++) { + calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist; + } + } + } + + for (int i = 0; i < calcAverages.size(); i++) { //finds average. + for (int j = 0; j < calcAverages[i].size(); j++) { + calcAverages[i][j].dist /= (float) calcDistsTotals.size(); + } + } + }else { //find median + for (int i = 0; i < calcAverages.size(); i++) { //for each calc + for (int j = 0; j < calcAverages[i].size(); j++) { //for each comparison + vector dists; + for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //for each subsample + dists.push_back(calcDistsTotals[thisIter][i][j].dist); + } + sort(dists.begin(), dists.end()); + calcAverages[i][j].dist = dists[(calcDistsTotals.size()/2)]; + } + } + } + + return calcAverages; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getAverages"); + exit(1); + } +} +/**************************************************************************************************/ +vector< vector > MothurOut::getAverages(vector< vector< vector > >& calcDistsTotals) { + try{ + + vector< vector > calcAverages; //calcAverages.resize(calcDistsTotals[0].size()); + for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero. + //calcAverages[i].resize(calcDistsTotals[0][i].size()); + vector temp; + for (int j = 0; j < calcDistsTotals[0][i].size(); j++) { + seqDist tempDist; + tempDist.seq1 = calcDistsTotals[0][i][j].seq1; + tempDist.seq2 = calcDistsTotals[0][i][j].seq2; + tempDist.dist = 0.0; + temp.push_back(tempDist); + } + calcAverages.push_back(temp); + } + + + for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator + for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero. + for (int j = 0; j < calcAverages[i].size(); j++) { + calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist; + } + } + } + + for (int i = 0; i < calcAverages.size(); i++) { //finds average. + for (int j = 0; j < calcAverages[i].size(); j++) { + calcAverages[i][j].dist /= (float) calcDistsTotals.size(); + } + } + + return calcAverages; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getAverages"); + exit(1); + } +} +/**************************************************************************************************/ +vector< vector > MothurOut::getStandardDeviation(vector< vector< vector > >& calcDistsTotals) { + try{ + + vector< vector > calcAverages = getAverages(calcDistsTotals); + + //find standard deviation + vector< vector > stdDev; + for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero. + vector temp; + for (int j = 0; j < calcDistsTotals[0][i].size(); j++) { + seqDist tempDist; + tempDist.seq1 = calcDistsTotals[0][i][j].seq1; + tempDist.seq2 = calcDistsTotals[0][i][j].seq2; + tempDist.dist = 0.0; + temp.push_back(tempDist); + } + stdDev.push_back(temp); + } + + for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each + for (int i = 0; i < stdDev.size(); i++) { + for (int j = 0; j < stdDev[i].size(); j++) { + stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist)); + } + } + } + + for (int i = 0; i < stdDev.size(); i++) { //finds average. + for (int j = 0; j < stdDev[i].size(); j++) { + stdDev[i][j].dist /= (float) calcDistsTotals.size(); + stdDev[i][j].dist = sqrt(stdDev[i][j].dist); + } + } + + return stdDev; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getAverages"); + exit(1); + } +} +/**************************************************************************************************/ +vector< vector > MothurOut::getStandardDeviation(vector< vector< vector > >& calcDistsTotals, vector< vector >& calcAverages) { + try{ + //find standard deviation + vector< vector > stdDev; + for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero. + vector temp; + for (int j = 0; j < calcDistsTotals[0][i].size(); j++) { + seqDist tempDist; + tempDist.seq1 = calcDistsTotals[0][i][j].seq1; + tempDist.seq2 = calcDistsTotals[0][i][j].seq2; + tempDist.dist = 0.0; + temp.push_back(tempDist); + } + stdDev.push_back(temp); + } + + for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each + for (int i = 0; i < stdDev.size(); i++) { + for (int j = 0; j < stdDev[i].size(); j++) { + stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist)); + } + } + } + + for (int i = 0; i < stdDev.size(); i++) { //finds average. + for (int j = 0; j < stdDev[i].size(); j++) { + stdDev[i][j].dist /= (float) calcDistsTotals.size(); + stdDev[i][j].dist = sqrt(stdDev[i][j].dist); + } + } + + return stdDev; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getAverages"); + exit(1); + } +} + /**************************************************************************************************/ bool MothurOut::isContainingOnlyDigits(string input) { try{ @@ -2700,8 +4073,92 @@ int MothurOut::removeConfidences(string& tax) { } } /**************************************************************************************************/ +string MothurOut::removeQuotes(string tax) { + try { + + string taxon; + string newTax = ""; + + for (int i = 0; i < tax.length(); i++) { + + if (control_pressed) { return newTax; } + + if ((tax[i] != '\'') && (tax[i] != '\"')) { newTax += tax[i]; } + + } + + return newTax; + } + catch(exception& e) { + errorOut(e, "MothurOut", "removeQuotes"); + exit(1); + } +} +/**************************************************************************************************/ +// function for calculating standard deviation +double MothurOut::getStandardDeviation(vector& featureVector){ + try { + //finds sum + double average = 0; + for (int i = 0; i < featureVector.size(); i++) { average += featureVector[i]; } + average /= (double) featureVector.size(); + + //find standard deviation + double stdDev = 0; + for (int i = 0; i < featureVector.size(); i++) { //compute the difference of each dist from the mean, and square the result of each + stdDev += ((featureVector[i] - average) * (featureVector[i] - average)); + } + + stdDev /= (double) featureVector.size(); + stdDev = sqrt(stdDev); + + return stdDev; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getStandardDeviation"); + exit(1); + } +} +/**************************************************************************************************/ +// returns largest value in vector +double MothurOut::max(vector& featureVector){ + try { + if (featureVector.size() == 0) { mothurOut("[ERROR]: vector size = 0!\n"); control_pressed=true; return 0.0; } + + //finds largest + double largest = featureVector[0]; + for (int i = 1; i < featureVector.size(); i++) { + if (featureVector[i] > largest) { largest = featureVector[i]; } + } + + return largest; + } + catch(exception& e) { + errorOut(e, "MothurOut", "max"); + exit(1); + } +} +/**************************************************************************************************/ +// returns smallest value in vector +double MothurOut::min(vector& featureVector){ + try { + if (featureVector.size() == 0) { mothurOut("[ERROR]: vector size = 0!\n"); control_pressed=true; return 0.0; } + + //finds smallest + double smallest = featureVector[0]; + for (int i = 1; i < featureVector.size(); i++) { + if (featureVector[i] < smallest) { smallest = featureVector[i]; } + } + + return smallest; + } + catch(exception& e) { + errorOut(e, "MothurOut", "min"); + exit(1); + } +} - +/**************************************************************************************************/