5 * Created by westcott on 2/25/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "mothurout.h"
13 /******************************************************/
14 MothurOut* MothurOut::getInstance() {
15 if( _uniqueInstance == 0) {
16 _uniqueInstance = new MothurOut();
18 return _uniqueInstance;
20 /*********************************************************************************************/
21 set<string> MothurOut::getCurrentTypes() {
25 types.insert("fasta");
26 types.insert("summary");
27 types.insert("accnos");
28 types.insert("column");
29 types.insert("design");
30 types.insert("group");
33 types.insert("oligos");
34 types.insert("order");
35 types.insert("ordergroup");
36 types.insert("phylip");
37 types.insert("qfile");
38 types.insert("relabund");
39 types.insert("sabund");
40 types.insert("rabund");
42 types.insert("shared");
43 types.insert("taxonomy");
47 types.insert("count");
48 types.insert("processors");
53 errorOut(e, "MothurOut", "getCurrentTypes");
57 /*********************************************************************************************/
58 void MothurOut::printCurrentFiles() {
62 if (accnosfile != "") { mothurOut("accnos=" + accnosfile); mothurOutEndLine(); }
63 if (columnfile != "") { mothurOut("column=" + columnfile); mothurOutEndLine(); }
64 if (designfile != "") { mothurOut("design=" + designfile); mothurOutEndLine(); }
65 if (fastafile != "") { mothurOut("fasta=" + fastafile); mothurOutEndLine(); }
66 if (groupfile != "") { mothurOut("group=" + groupfile); mothurOutEndLine(); }
67 if (listfile != "") { mothurOut("list=" + listfile); mothurOutEndLine(); }
68 if (namefile != "") { mothurOut("name=" + namefile); mothurOutEndLine(); }
69 if (oligosfile != "") { mothurOut("oligos=" + oligosfile); mothurOutEndLine(); }
70 if (orderfile != "") { mothurOut("order=" + orderfile); mothurOutEndLine(); }
71 if (ordergroupfile != "") { mothurOut("ordergroup=" + ordergroupfile); mothurOutEndLine(); }
72 if (phylipfile != "") { mothurOut("phylip=" + phylipfile); mothurOutEndLine(); }
73 if (qualfile != "") { mothurOut("qfile=" + qualfile); mothurOutEndLine(); }
74 if (rabundfile != "") { mothurOut("rabund=" + rabundfile); mothurOutEndLine(); }
75 if (relabundfile != "") { mothurOut("relabund=" + relabundfile); mothurOutEndLine(); }
76 if (sabundfile != "") { mothurOut("sabund=" + sabundfile); mothurOutEndLine(); }
77 if (sfffile != "") { mothurOut("sff=" + sfffile); mothurOutEndLine(); }
78 if (sharedfile != "") { mothurOut("shared=" + sharedfile); mothurOutEndLine(); }
79 if (taxonomyfile != "") { mothurOut("taxonomy=" + taxonomyfile); mothurOutEndLine(); }
80 if (treefile != "") { mothurOut("tree=" + treefile); mothurOutEndLine(); }
81 if (flowfile != "") { mothurOut("flow=" + flowfile); mothurOutEndLine(); }
82 if (biomfile != "") { mothurOut("biom=" + biomfile); mothurOutEndLine(); }
83 if (counttablefile != "") { mothurOut("count=" + counttablefile); mothurOutEndLine(); }
84 if (processors != "1") { mothurOut("processors=" + processors); mothurOutEndLine(); }
85 if (summaryfile != "") { mothurOut("summary=" + summaryfile); mothurOutEndLine(); }
89 errorOut(e, "MothurOut", "printCurrentFiles");
93 /*********************************************************************************************/
94 bool MothurOut::hasCurrentFiles() {
96 bool hasCurrent = false;
98 if (accnosfile != "") { return true; }
99 if (columnfile != "") { return true; }
100 if (designfile != "") { return true; }
101 if (fastafile != "") { return true; }
102 if (groupfile != "") { return true; }
103 if (listfile != "") { return true; }
104 if (namefile != "") { return true; }
105 if (oligosfile != "") { return true; }
106 if (orderfile != "") { return true; }
107 if (ordergroupfile != "") { return true; }
108 if (phylipfile != "") { return true; }
109 if (qualfile != "") { return true; }
110 if (rabundfile != "") { return true; }
111 if (relabundfile != "") { return true; }
112 if (sabundfile != "") { return true; }
113 if (sfffile != "") { return true; }
114 if (sharedfile != "") { return true; }
115 if (taxonomyfile != "") { return true; }
116 if (treefile != "") { return true; }
117 if (flowfile != "") { return true; }
118 if (biomfile != "") { return true; }
119 if (counttablefile != "") { return true; }
120 if (summaryfile != "") { return true; }
121 if (processors != "1") { return true; }
126 catch(exception& e) {
127 errorOut(e, "MothurOut", "hasCurrentFiles");
132 /*********************************************************************************************/
133 void MothurOut::clearCurrentFiles() {
160 catch(exception& e) {
161 errorOut(e, "MothurOut", "clearCurrentFiles");
165 /***********************************************************************/
166 string MothurOut::findProgramPath(string programName){
169 string envPath = getenv("PATH");
172 //delimiting path char
174 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
180 //break apart path variable by ':'
182 splitAtChar(envPath, dirs, delim);
184 if (debug) { mothurOut("[DEBUG]: dir's in path: \n"); }
186 //get path related to mothur
187 for (int i = 0; i < dirs.size(); i++) {
189 if (debug) { mothurOut("[DEBUG]: " + dirs[i] + "\n"); }
191 //to lower so we can find it
192 string tempLower = "";
193 for (int j = 0; j < dirs[i].length(); j++) { tempLower += tolower(dirs[i][j]); }
195 //is this mothurs path?
196 if (tempLower.find(programName) != -1) { pPath = dirs[i]; break; }
199 if (debug) { mothurOut("[DEBUG]: programPath = " + pPath + "\n"); }
202 //add programName so it looks like what argv would look like
203 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
204 pPath += "/" + programName;
206 pPath += "\\" + programName;
209 //okay programName is not in the path, so the folder programName is in must be in the path
210 //lets find out which one
212 //get path related to the program
213 for (int i = 0; i < dirs.size(); i++) {
215 if (debug) { mothurOut("[DEBUG]: looking in " + dirs[i] + " for " + programName + " \n"); }
217 //is this the programs path?
219 string tempIn = dirs[i];
220 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
221 tempIn += "/" + programName;
223 tempIn += "\\" + programName;
225 openInputFile(tempIn, in, "");
227 //if this file exists
228 if (in) { in.close(); pPath = tempIn; if (debug) { mothurOut("[DEBUG]: found it, programPath = " + pPath + "\n"); } break; }
235 catch(exception& e) {
236 errorOut(e, "MothurOut", "findProgramPath");
240 /*********************************************************************************************/
241 void MothurOut::setFileName(string filename) {
243 logFileName = filename;
247 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
249 if (pid == 0) { //only one process should output to screen
252 openOutputFile(filename, out);
258 catch(exception& e) {
259 errorOut(e, "MothurOut", "setFileName");
263 /*********************************************************************************************/
264 void MothurOut::setDefaultPath(string pathname) {
267 //add / to name if needed
268 string lastChar = pathname.substr(pathname.length()-1);
269 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
270 if (lastChar != "/") { pathname += "/"; }
272 if (lastChar != "\\") { pathname += "\\"; }
275 defaultPath = pathname;
278 catch(exception& e) {
279 errorOut(e, "MothurOut", "setDefaultPath");
283 /*********************************************************************************************/
284 void MothurOut::setOutputDir(string pathname) {
286 outputDir = pathname;
288 catch(exception& e) {
289 errorOut(e, "MothurOut", "setOutputDir");
293 /*********************************************************************************************/
294 void MothurOut::closeLog() {
299 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
301 if (pid == 0) { //only one process should output to screen
310 catch(exception& e) {
311 errorOut(e, "MothurOut", "closeLog");
316 /*********************************************************************************************/
317 MothurOut::~MothurOut() {
322 catch(exception& e) {
323 errorOut(e, "MothurOut", "MothurOut");
327 /*********************************************************************************************/
328 void MothurOut::mothurOut(string output) {
333 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
335 if (pid == 0) { //only one process should output to screen
345 catch(exception& e) {
346 errorOut(e, "MothurOut", "MothurOut");
350 /*********************************************************************************************/
351 void MothurOut::mothurOutJustToScreen(string output) {
356 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
358 if (pid == 0) { //only one process should output to screen
366 catch(exception& e) {
367 errorOut(e, "MothurOut", "MothurOut");
371 /*********************************************************************************************/
372 void MothurOut::mothurOutEndLine() {
376 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
378 if (pid == 0) { //only one process should output to screen
388 catch(exception& e) {
389 errorOut(e, "MothurOut", "MothurOutEndLine");
393 /*********************************************************************************************/
394 void MothurOut::mothurOut(string output, ofstream& outputFile) {
399 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
401 if (pid == 0) { //only one process should output to screen
406 outputFile << output;
414 catch(exception& e) {
415 errorOut(e, "MothurOut", "MothurOut");
419 /*********************************************************************************************/
420 void MothurOut::mothurOutEndLine(ofstream& outputFile) {
424 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
426 if (pid == 0) { //only one process should output to screen
437 catch(exception& e) {
438 errorOut(e, "MothurOut", "MothurOutEndLine");
442 /*********************************************************************************************/
443 void MothurOut::mothurOutJustToLog(string output) {
447 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
449 if (pid == 0) { //only one process should output to screen
458 catch(exception& e) {
459 errorOut(e, "MothurOut", "MothurOutJustToLog");
463 /*********************************************************************************************/
464 void MothurOut::errorOut(exception& e, string object, string function) {
466 //mem_usage(vm, rss);
468 string errorType = toString(e.what());
470 int pos = errorType.find("bad_alloc");
471 mothurOut("[ERROR]: ");
472 mothurOut(errorType);
474 if (pos == string::npos) { //not bad_alloc
475 mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
478 if (object == "cluster"){
479 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. There are two common causes for this, file size and format.\n\nFile Size:\nThe cluster command loads your distance matrix into RAM, and your distance file is most likely too large to fit in RAM. There are two options to help with this. The first is to use a cutoff. By using a cutoff mothur will only load distances that are below the cutoff. If that is still not enough, there is a command called cluster.split, http://www.mothur.org/wiki/cluster.split which divides the distance matrix, and clusters the smaller pieces separately. You may also be able to reduce the size of the original distance matrix by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. \n\nWrong Format:\nThis error can be caused by trying to read a column formatted distance matrix using the phylip parameter. By default, the dist.seqs command generates a column formatted distance matrix. To make a phylip formatted matrix set the dist.seqs command parameter output to lt. \n\nIf you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
480 }else if (object == "shhh.flows"){
481 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. The shhh.flows command is very memory intensive. This error is most commonly caused by trying to process a dataset too large, using multiple processors, or failing to run trim.flows before shhh.flows. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Running trim.flows with an oligos file, and then shhh.flows with the file option may also resolve the issue. If for some reason you are unable to run shhh.flows with your data, a good alternative is to use the trim.seqs command using a 50-bp sliding window and to trim the sequence when the average quality score over that window drops below 35. Our results suggest that the sequencing error rates by this method are very good, but not quite as good as by shhh.flows and that the resulting sequences tend to be a bit shorter. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry. ");
483 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. This is most commonly caused by trying to process a dataset too large, using multiple processors, or a file format issue. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Also, you may be able to reduce the size of your dataset by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
487 /*********************************************************************************************/
488 //The following was originally from http://stackoverflow.com/questions/669438/how-to-get-memory-usage-at-run-time-in-c
489 // process_mem_usage(double &, double &) - takes two doubles by reference,
490 // attempts to read the system-dependent data for a process' virtual memory
491 // size and resident set size, and return the results in KB.
493 // On failure, returns 0.0, 0.0
494 int MothurOut::mem_usage(double& vm_usage, double& resident_set) {
495 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
500 // 'file' stat seems to give the most reliable results
502 ifstream stat_stream("/proc/self/stat",ios_base::in);
504 // dummy vars for leading entries in stat that we don't care about
506 string pid, comm, state, ppid, pgrp, session, tty_nr;
507 string tpgid, flags, minflt, cminflt, majflt, cmajflt;
508 string utime, stime, cutime, cstime, priority, nice;
509 string O, itrealvalue, starttime;
511 // the two fields we want
516 stat_stream >> pid >> comm >> state >> ppid >> pgrp >> session >> tty_nr
517 >> tpgid >> flags >> minflt >> cminflt >> majflt >> cmajflt
518 >> utime >> stime >> cutime >> cstime >> priority >> nice
519 >> O >> itrealvalue >> starttime >> vsize >> rss; // don't care about the rest
521 long page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages
522 vm_usage = vsize / 1024.0;
523 resident_set = rss * page_size_kb;
525 mothurOut("Memory Usage: vm = " + toString(vm_usage) + " rss = " + toString(resident_set) + "\n");
529 /* //windows memory usage
530 // Get the list of process identifiers.
531 DWORD aProcesses[1024], cbNeeded, cProcesses;
533 if ( !EnumProcesses( aProcesses, sizeof(aProcesses), &cbNeeded ) ){ return 1; }
535 // Calculate how many process identifiers were returned.
536 cProcesses = cbNeeded / sizeof(DWORD);
538 // Print the memory usage for each process
539 for (int i = 0; i < cProcesses; i++ ) {
540 DWORD processID = aProcesses[i];
542 PROCESS_MEMORY_COUNTERS pmc;
544 HANDLE hProcess = OpenProcess((PROCESS_QUERY_INFORMATION | PROCESS_VM_READ), FALSE, processID);
546 // Print the process identifier.
547 printf( "\nProcess ID: %u\n", processID);
549 if (NULL != hProcess) {
551 if ( GetProcessMemoryInfo( hProcess, &pmc, sizeof(pmc)) ) {
552 printf( "\tPageFaultCount: 0x%08X\n", pmc.PageFaultCount );
553 printf( "\tPeakWorkingSetSize: 0x%08X\n", pmc.PeakWorkingSetSize );
554 printf( "\tWorkingSetSize: 0x%08X\n", pmc.WorkingSetSize );
555 printf( "\tQuotaPeakPagedPoolUsage: 0x%08X\n", pmc.QuotaPeakPagedPoolUsage );
556 printf( "\tQuotaPagedPoolUsage: 0x%08X\n", pmc.QuotaPagedPoolUsage );
557 printf( "\tQuotaPeakNonPagedPoolUsage: 0x%08X\n", pmc.QuotaPeakNonPagedPoolUsage );
558 printf( "\tQuotaNonPagedPoolUsage: 0x%08X\n", pmc.QuotaNonPagedPoolUsage );
559 printf( "\tPagefileUsage: 0x%08X\n", pmc.PagefileUsage );
560 printf( "\tPeakPagefileUsage: 0x%08X\n", pmc.PeakPagefileUsage );
562 CloseHandle(hProcess);
572 /***********************************************************************/
573 int MothurOut::openOutputFileAppend(string fileName, ofstream& fileHandle){
575 fileName = getFullPathName(fileName);
577 fileHandle.open(fileName.c_str(), ios::app);
579 mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
586 catch(exception& e) {
587 errorOut(e, "MothurOut", "openOutputFileAppend");
591 /***********************************************************************/
592 void MothurOut::gobble(istream& f){
596 while(isspace(d=f.get())) { ;}
597 if(!f.eof()) { f.putback(d); }
599 catch(exception& e) {
600 errorOut(e, "MothurOut", "gobble");
604 /***********************************************************************/
605 void MothurOut::gobble(istringstream& f){
608 while(isspace(d=f.get())) {;}
609 if(!f.eof()) { f.putback(d); }
611 catch(exception& e) {
612 errorOut(e, "MothurOut", "gobble");
617 /***********************************************************************/
619 string MothurOut::getline(istringstream& fileHandle) {
624 while (!fileHandle.eof()) {
626 char c = fileHandle.get();
628 //are you at the end of the line
629 if ((c == '\n') || (c == '\r') || (c == '\f')){ break; }
636 catch(exception& e) {
637 errorOut(e, "MothurOut", "getline");
641 /***********************************************************************/
643 string MothurOut::getline(ifstream& fileHandle) {
650 char c = fileHandle.get();
652 //are you at the end of the line
653 if ((c == '\n') || (c == '\r') || (c == '\f') || (c == EOF)){ break; }
660 catch(exception& e) {
661 errorOut(e, "MothurOut", "getline");
665 /***********************************************************************/
667 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
668 #ifdef USE_COMPRESSION
669 inline bool endsWith(string s, const char * suffix){
670 size_t suffixLength = strlen(suffix);
671 return s.size() >= suffixLength && s.substr(s.size() - suffixLength, suffixLength).compare(suffix) == 0;
676 string MothurOut::getRootName(string longName){
679 string rootName = longName;
681 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
682 #ifdef USE_COMPRESSION
683 if (endsWith(rootName, ".gz") || endsWith(rootName, ".bz2")) {
684 int pos = rootName.find_last_of('.');
685 rootName = rootName.substr(0, pos);
686 cerr << "shortening " << longName << " to " << rootName << "\n";
690 if(rootName.find_last_of(".") != rootName.npos){
691 int pos = rootName.find_last_of('.')+1;
692 rootName = rootName.substr(0, pos);
697 catch(exception& e) {
698 errorOut(e, "MothurOut", "getRootName");
702 /***********************************************************************/
704 string MothurOut::getSimpleName(string longName){
706 string simpleName = longName;
709 found=longName.find_last_of("/\\");
711 if(found != longName.npos){
712 simpleName = longName.substr(found+1);
717 catch(exception& e) {
718 errorOut(e, "MothurOut", "getSimpleName");
723 /***********************************************************************/
725 int MothurOut::getRandomIndex(int highest){
728 int random = (int) ((float)(highest+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
732 catch(exception& e) {
733 errorOut(e, "MothurOut", "getRandomIndex");
738 /**********************************************************************/
740 string MothurOut::getPathName(string longName){
742 string rootPathName = longName;
744 if(longName.find_last_of("/\\") != longName.npos){
745 int pos = longName.find_last_of("/\\")+1;
746 rootPathName = longName.substr(0, pos);
751 catch(exception& e) {
752 errorOut(e, "MothurOut", "getPathName");
757 /***********************************************************************/
759 bool MothurOut::dirCheck(string& dirName){
765 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
770 //add / to name if needed
771 string lastChar = dirName.substr(dirName.length()-1);
772 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
773 if (lastChar != "/") { dirName += "/"; }
775 if (lastChar != "\\") { dirName += "\\"; }
778 //test to make sure directory exists
779 dirName = getFullPathName(dirName);
780 string outTemp = dirName + tag + "temp";
782 out.open(outTemp.c_str(), ios::trunc);
784 mothurOut(dirName + " directory does not exist or is not writable."); mothurOutEndLine();
787 mothurRemove(outTemp);
793 catch(exception& e) {
794 errorOut(e, "MothurOut", "dirCheck");
799 /***********************************************************************/
801 string MothurOut::hasPath(string longName){
806 found=longName.find_last_of("~/\\");
808 if(found != longName.npos){
809 path = longName.substr(0, found+1);
814 catch(exception& e) {
815 errorOut(e, "MothurOut", "hasPath");
820 /***********************************************************************/
822 string MothurOut::getExtension(string longName){
824 string extension = "";
826 if(longName.find_last_of('.') != longName.npos){
827 int pos = longName.find_last_of('.');
828 extension = longName.substr(pos, longName.length());
833 catch(exception& e) {
834 errorOut(e, "MothurOut", "getExtension");
838 /***********************************************************************/
839 bool MothurOut::isBlank(string fileName){
842 fileName = getFullPathName(fileName);
845 fileHandle.open(fileName.c_str());
847 mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
850 //check for blank file
852 if (fileHandle.eof()) { fileHandle.close(); return true; }
857 catch(exception& e) {
858 errorOut(e, "MothurOut", "isBlank");
862 /***********************************************************************/
864 string MothurOut::getFullPathName(string fileName){
867 string path = hasPath(fileName);
871 if (path == "") { return fileName; } //its a simple name
872 else { //we need to complete the pathname
873 // ex. ../../../filename
874 // cwd = /user/work/desktop
877 //get current working directory
878 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
880 if (path.find("~") != -1) { //go to home directory
883 char *homepath = NULL;
884 homepath = getenv ("HOME");
885 if ( homepath != NULL) { homeDir = homepath; }
886 else { homeDir = ""; }
888 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
891 if (path.rfind("./") == string::npos) { return fileName; } //already complete name
892 else { newFileName = fileName.substr(fileName.rfind("./")+2); } //save the complete part of the name
894 //char* cwdpath = new char[1024];
896 //cwdpath=getcwd(cwdpath,size);
899 char *cwdpath = NULL;
900 cwdpath = getcwd(NULL, 0); // or _getcwd
901 if ( cwdpath != NULL) { cwd = cwdpath; }
907 if (cwd.length() > 0) { simpleCWD = cwd.substr(1); }
909 //break apart the current working directory
911 while (simpleCWD.find_first_of('/') != string::npos) {
912 string dir = simpleCWD.substr(0,simpleCWD.find_first_of('/'));
913 simpleCWD = simpleCWD.substr(simpleCWD.find_first_of('/')+1, simpleCWD.length());
916 //get last one // ex. ../../../filename = /user/work/desktop/filename
917 dirs.push_back(simpleCWD); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
920 int index = dirs.size()-1;
922 while((pos = path.rfind("./")) != string::npos) { //while you don't have a complete path
923 if (pos == 0) { break; //you are at the end
924 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
925 path = path.substr(0, pos-1);
927 if (index == 0) { break; }
928 }else if (path[(pos-1)] == '/') { //you want the current working dir ./
929 path = path.substr(0, pos);
930 }else if (pos == 1) { break; //you are at the end
931 }else { mothurOut("cannot resolve path for " + fileName + "\n"); return fileName; }
934 for (int i = index; i >= 0; i--) {
935 newFileName = dirs[i] + "/" + newFileName;
938 newFileName = "/" + newFileName;
942 if (path.find("~") != string::npos) { //go to home directory
943 string homeDir = getenv ("HOMEPATH");
944 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
947 if (path.rfind(".\\") == string::npos) { return fileName; } //already complete name
948 else { newFileName = fileName.substr(fileName.rfind(".\\")+2); } //save the complete part of the name
950 char *cwdpath = NULL;
951 cwdpath = getcwd(NULL, 0); // or _getcwd
952 if ( cwdpath != NULL) { cwd = cwdpath; }
955 //break apart the current working directory
957 while (cwd.find_first_of('\\') != -1) {
958 string dir = cwd.substr(0,cwd.find_first_of('\\'));
959 cwd = cwd.substr(cwd.find_first_of('\\')+1, cwd.length());
964 dirs.push_back(cwd); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
966 int index = dirs.size()-1;
968 while((pos = path.rfind(".\\")) != string::npos) { //while you don't have a complete path
969 if (pos == 0) { break; //you are at the end
970 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
971 path = path.substr(0, pos-1);
973 if (index == 0) { break; }
974 }else if (path[(pos-1)] == '\\') { //you want the current working dir ./
975 path = path.substr(0, pos);
976 }else if (pos == 1) { break; //you are at the end
977 }else { mothurOut("cannot resolve path for " + fileName + "\n"); return fileName; }
980 for (int i = index; i >= 0; i--) {
981 newFileName = dirs[i] + "\\" + newFileName;
990 catch(exception& e) {
991 errorOut(e, "MothurOut", "getFullPathName");
995 /***********************************************************************/
997 int MothurOut::openInputFile(string fileName, ifstream& fileHandle, string m){
1000 string completeFileName = getFullPathName(fileName);
1001 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1002 #ifdef USE_COMPRESSION
1003 // check for gzipped or bzipped file
1004 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1005 string tempName = string(tmpnam(0));
1006 mkfifo(tempName.c_str(), 0666);
1007 int fork_result = fork();
1008 if (fork_result < 0) {
1009 cerr << "Error forking.\n";
1011 } else if (fork_result == 0) {
1012 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
1013 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1014 system(command.c_str());
1015 cerr << "Done decompressing " << completeFileName << "\n";
1016 mothurRemove(tempName);
1019 cerr << "waiting on child process " << fork_result << "\n";
1020 completeFileName = tempName;
1025 fileHandle.open(completeFileName.c_str());
1027 //mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1030 //check for blank file
1035 catch(exception& e) {
1036 errorOut(e, "MothurOut", "openInputFile - no Error");
1040 /***********************************************************************/
1042 int MothurOut::openInputFile(string fileName, ifstream& fileHandle){
1045 //get full path name
1046 string completeFileName = getFullPathName(fileName);
1047 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1048 #ifdef USE_COMPRESSION
1049 // check for gzipped or bzipped file
1050 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1051 string tempName = string(tmpnam(0));
1052 mkfifo(tempName.c_str(), 0666);
1053 int fork_result = fork();
1054 if (fork_result < 0) {
1055 cerr << "Error forking.\n";
1057 } else if (fork_result == 0) {
1058 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
1059 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1060 system(command.c_str());
1061 cerr << "Done decompressing " << completeFileName << "\n";
1062 mothurRemove(tempName);
1065 cerr << "waiting on child process " << fork_result << "\n";
1066 completeFileName = tempName;
1072 fileHandle.open(completeFileName.c_str());
1074 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1078 //check for blank file
1080 if (fileHandle.eof()) { mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct."); mothurOutEndLine(); }
1085 catch(exception& e) {
1086 errorOut(e, "MothurOut", "openInputFile");
1090 /***********************************************************************/
1092 int MothurOut::renameFile(string oldName, string newName){
1095 if (oldName == newName) { return 0; }
1098 int exist = openInputFile(newName, inTest, "");
1101 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1102 if (exist == 0) { //you could open it so you want to delete it
1103 string command = "rm " + newName;
1104 system(command.c_str());
1107 string command = "mv " + oldName + " " + newName;
1108 system(command.c_str());
1110 mothurRemove(newName);
1111 int renameOk = rename(oldName.c_str(), newName.c_str());
1116 catch(exception& e) {
1117 errorOut(e, "MothurOut", "renameFile");
1122 /***********************************************************************/
1124 int MothurOut::openOutputFile(string fileName, ofstream& fileHandle){
1127 string completeFileName = getFullPathName(fileName);
1128 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1129 #ifdef USE_COMPRESSION
1130 // check for gzipped file
1131 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1132 string tempName = string(tmpnam(0));
1133 mkfifo(tempName.c_str(), 0666);
1134 cerr << "Compressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1135 int fork_result = fork();
1136 if (fork_result < 0) {
1137 cerr << "Error forking.\n";
1139 } else if (fork_result == 0) {
1140 string command = string(endsWith(completeFileName, ".gz") ? "gzip" : "bzip2") + " -v > " + completeFileName + string(" < ") + tempName;
1141 system(command.c_str());
1144 completeFileName = tempName;
1149 fileHandle.open(completeFileName.c_str(), ios::trunc);
1151 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1158 catch(exception& e) {
1159 errorOut(e, "MothurOut", "openOutputFile");
1165 /**************************************************************************************************/
1166 int MothurOut::appendFiles(string temp, string filename) {
1171 //open output file in append mode
1172 openOutputFileAppend(filename, output);
1173 int ableToOpen = openInputFile(temp, input, "no error");
1174 //int ableToOpen = openInputFile(temp, input);
1177 if (ableToOpen == 0) { //you opened it
1180 while (!input.eof()) {
1181 input.read(buffer, 4096);
1182 output.write(buffer, input.gcount());
1183 //count number of lines
1184 for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} }
1193 catch(exception& e) {
1194 errorOut(e, "MothurOut", "appendFiles");
1198 /**************************************************************************************************/
1199 int MothurOut::appendFilesWithoutHeaders(string temp, string filename) {
1204 //open output file in append mode
1205 openOutputFileAppend(filename, output);
1206 int ableToOpen = openInputFile(temp, input, "no error");
1207 //int ableToOpen = openInputFile(temp, input);
1210 if (ableToOpen == 0) { //you opened it
1212 string headers = getline(input); gobble(input);
1213 if (debug) { mothurOut("[DEBUG]: skipping headers " + headers +'\n'); }
1216 while (!input.eof()) {
1217 input.read(buffer, 4096);
1218 output.write(buffer, input.gcount());
1219 //count number of lines
1220 for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} }
1229 catch(exception& e) {
1230 errorOut(e, "MothurOut", "appendFiles");
1234 /**************************************************************************************************/
1235 string MothurOut::sortFile(string distFile, string outputDir){
1238 //if (outputDir == "") { outputDir += hasPath(distFile); }
1239 string outfile = getRootName(distFile) + "sorted.dist";
1242 //if you can, use the unix sort since its been optimized for years
1243 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1244 string command = "sort -n -k +3 " + distFile + " -o " + outfile;
1245 system(command.c_str());
1246 #else //you are stuck with my best attempt...
1247 //windows sort does not have a way to specify a column, only a character in the line
1248 //since we cannot assume that the distance will always be at the the same character location on each line
1249 //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back.
1251 //read in file line by file and put distance first
1252 string tempDistFile = distFile + ".temp";
1255 openInputFile(distFile, input);
1256 openOutputFile(tempDistFile, output);
1258 string firstName, secondName;
1260 while (!input.eof()) {
1261 input >> firstName >> secondName >> dist;
1262 output << dist << '\t' << firstName << '\t' << secondName << endl;
1269 //sort using windows sort
1270 string tempOutfile = outfile + ".temp";
1271 string command = "sort " + tempDistFile + " /O " + tempOutfile;
1272 system(command.c_str());
1274 //read in sorted file and put distance at end again
1277 openInputFile(tempOutfile, input2);
1278 openOutputFile(outfile, output2);
1280 while (!input2.eof()) {
1281 input2 >> dist >> firstName >> secondName;
1282 output2 << firstName << '\t' << secondName << '\t' << dist << endl;
1289 mothurRemove(tempDistFile);
1290 mothurRemove(tempOutfile);
1295 catch(exception& e) {
1296 errorOut(e, "MothurOut", "sortFile");
1300 /**************************************************************************************************/
1301 vector<unsigned long long> MothurOut::setFilePosFasta(string filename, int& num) {
1303 vector<unsigned long long> positions;
1305 //openInputFile(filename, inFASTA);
1306 inFASTA.open(filename.c_str(), ios::binary);
1309 unsigned long long count = 0;
1310 while(!inFASTA.eof()){
1311 //input = getline(inFASTA);
1312 //cout << input << '\t' << inFASTA.tellg() << endl;
1313 //if (input.length() != 0) {
1314 // if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); cout << (pos - input.length() - 1) << endl; }
1316 //gobble(inFASTA); //has to be here since windows line endings are 2 characters and mess up the positions
1317 char c = inFASTA.get(); count++;
1319 positions.push_back(count-1);
1320 if (debug) { mothurOut("[DEBUG]: numSeqs = " + toString(positions.size()) + " count = " + toString(count) + ".\n"); }
1325 num = positions.size();
1326 if (debug) { mothurOut("[DEBUG]: num = " + toString(num) + ".\n"); }
1328 unsigned long long size;
1330 //get num bytes in file
1331 pFile = fopen (filename.c_str(),"rb");
1332 if (pFile==NULL) perror ("Error opening file");
1334 fseek (pFile, 0, SEEK_END);
1339 /*unsigned long long size = positions[(positions.size()-1)];
1341 openInputFile(filename, in);
1346 if(in.eof()) { break; }
1351 if (debug) { mothurOut("[DEBUG]: size = " + toString(size) + ".\n"); }
1353 positions.push_back(size);
1358 catch(exception& e) {
1359 errorOut(e, "MothurOut", "setFilePosFasta");
1363 //**********************************************************************************************************************
1364 vector<consTax> MothurOut::readConsTax(string inputfile){
1367 vector<consTax> taxes;
1370 openInputFile(inputfile, in);
1377 if (control_pressed) { break; }
1379 string otu = ""; string tax = "unknown";
1382 in >> otu >> size >> tax; gobble(in);
1383 consTax temp(otu, tax, size);
1384 taxes.push_back(temp);
1390 catch(exception& e) {
1391 errorOut(e, "MothurOut", "readConsTax");
1395 //**********************************************************************************************************************
1396 int MothurOut::readConsTax(string inputfile, map<string, consTax2>& taxes){
1399 openInputFile(inputfile, in);
1406 if (control_pressed) { break; }
1408 string otu = ""; string tax = "unknown";
1411 in >> otu >> size >> tax; gobble(in);
1412 consTax2 temp(tax, size);
1419 catch(exception& e) {
1420 errorOut(e, "MothurOut", "readConsTax");
1424 /**************************************************************************************************/
1425 vector<unsigned long long> MothurOut::setFilePosEachLine(string filename, int& num) {
1427 filename = getFullPathName(filename);
1429 vector<unsigned long long> positions;
1431 //openInputFile(filename, in);
1432 in.open(filename.c_str(), ios::binary);
1435 unsigned long long count = 0;
1436 positions.push_back(0);
1439 //getline counting reads
1440 char d = in.get(); count++;
1441 while ((d != '\n') && (d != '\r') && (d != '\f') && (d != in.eof())) {
1442 //get next character
1448 d=in.get(); count++;
1449 while(isspace(d) && (d != in.eof())) { d=in.get(); count++;}
1451 positions.push_back(count-1);
1452 //cout << count-1 << endl;
1456 num = positions.size()-1;
1459 unsigned long long size;
1461 //get num bytes in file
1462 pFile = fopen (filename.c_str(),"rb");
1463 if (pFile==NULL) perror ("Error opening file");
1465 fseek (pFile, 0, SEEK_END);
1470 positions[(positions.size()-1)] = size;
1474 catch(exception& e) {
1475 errorOut(e, "MothurOut", "setFilePosEachLine");
1479 /**************************************************************************************************/
1481 vector<unsigned long long> MothurOut::divideFile(string filename, int& proc) {
1483 vector<unsigned long long> filePos;
1484 filePos.push_back(0);
1487 unsigned long long size;
1489 filename = getFullPathName(filename);
1491 //get num bytes in file
1492 pFile = fopen (filename.c_str(),"rb");
1493 if (pFile==NULL) perror ("Error opening file");
1495 fseek (pFile, 0, SEEK_END);
1500 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1502 //estimate file breaks
1503 unsigned long long chunkSize = 0;
1504 chunkSize = size / proc;
1506 //file to small to divide by processors
1507 if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; }
1509 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
1510 for (int i = 0; i < proc; i++) {
1511 unsigned long long spot = (i+1) * chunkSize;
1514 openInputFile(filename, in);
1518 unsigned long long newSpot = spot;
1522 if (c == '>') { in.putback(c); newSpot = in.tellg(); break; }
1523 else if (int(c) == -1) { break; }
1527 //there was not another sequence before the end of the file
1528 unsigned long long sanityPos = in.tellg();
1530 if (sanityPos == -1) { break; }
1531 else { filePos.push_back(newSpot); }
1537 filePos.push_back(size);
1539 //sanity check filePos
1540 for (int i = 0; i < (filePos.size()-1); i++) {
1541 if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; }
1544 proc = (filePos.size() - 1);
1546 mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine();
1548 filePos.push_back(size);
1552 catch(exception& e) {
1553 errorOut(e, "MothurOut", "divideFile");
1557 /**************************************************************************************************/
1559 vector<unsigned long long> MothurOut::divideFilePerLine(string filename, int& proc) {
1561 vector<unsigned long long> filePos;
1562 filePos.push_back(0);
1565 unsigned long long size;
1567 filename = getFullPathName(filename);
1569 //get num bytes in file
1570 pFile = fopen (filename.c_str(),"rb");
1571 if (pFile==NULL) perror ("Error opening file");
1573 fseek (pFile, 0, SEEK_END);
1578 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1580 //estimate file breaks
1581 unsigned long long chunkSize = 0;
1582 chunkSize = size / proc;
1584 //file to small to divide by processors
1585 if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; }
1587 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
1588 for (int i = 0; i < proc; i++) {
1589 unsigned long long spot = (i+1) * chunkSize;
1592 openInputFile(filename, in);
1595 //look for next line break
1596 unsigned long long newSpot = spot;
1600 if ((c == '\n') || (c == '\r') || (c == '\f')) { gobble(in); newSpot = in.tellg(); break; }
1601 else if (int(c) == -1) { break; }
1604 //there was not another line before the end of the file
1605 unsigned long long sanityPos = in.tellg();
1607 if (sanityPos == -1) { break; }
1608 else { filePos.push_back(newSpot); }
1614 filePos.push_back(size);
1616 //sanity check filePos
1617 for (int i = 0; i < (filePos.size()-1); i++) {
1618 if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; }
1621 proc = (filePos.size() - 1);
1623 mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine();
1625 filePos.push_back(size);
1629 catch(exception& e) {
1630 errorOut(e, "MothurOut", "divideFile");
1634 /**************************************************************************************************/
1635 int MothurOut::divideFile(string filename, int& proc, vector<string>& files) {
1638 vector<unsigned long long> filePos = divideFile(filename, proc);
1640 for (int i = 0; i < (filePos.size()-1); i++) {
1644 openInputFile(filename, in);
1645 in.seekg(filePos[i]);
1646 unsigned long long size = filePos[(i+1)] - filePos[i];
1647 char* chunk = new char[size];
1648 in.read(chunk, size);
1652 string fileChunkName = filename + "." + toString(i) + ".tmp";
1654 openOutputFile(fileChunkName, out);
1656 out << chunk << endl;
1661 files.push_back(fileChunkName);
1666 catch(exception& e) {
1667 errorOut(e, "MothurOut", "divideFile");
1671 /***********************************************************************/
1673 bool MothurOut::isTrue(string f){
1676 for (int i = 0; i < f.length(); i++) { f[i] = toupper(f[i]); }
1678 if ((f == "TRUE") || (f == "T")) { return true; }
1679 else { return false; }
1681 catch(exception& e) {
1682 errorOut(e, "MothurOut", "isTrue");
1687 /***********************************************************************/
1689 float MothurOut::roundDist(float dist, int precision){
1691 return int(dist * precision + 0.5)/float(precision);
1693 catch(exception& e) {
1694 errorOut(e, "MothurOut", "roundDist");
1698 /***********************************************************************/
1700 float MothurOut::ceilDist(float dist, int precision){
1702 return int(ceil(dist * precision))/float(precision);
1704 catch(exception& e) {
1705 errorOut(e, "MothurOut", "ceilDist");
1709 /***********************************************************************/
1711 vector<string> MothurOut::splitWhiteSpace(string& rest, char buffer[], int size){
1713 vector<string> pieces;
1715 for (int i = 0; i < size; i++) {
1716 if (!isspace(buffer[i])) { rest += buffer[i]; }
1718 if (rest != "") { pieces.push_back(rest); rest = ""; }
1719 while (i < size) { //gobble white space
1720 if (isspace(buffer[i])) { i++; }
1721 else { rest = buffer[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1728 catch(exception& e) {
1729 errorOut(e, "MothurOut", "splitWhiteSpace");
1733 /***********************************************************************/
1734 vector<string> MothurOut::splitWhiteSpace(string input){
1736 vector<string> pieces;
1739 for (int i = 0; i < input.length(); i++) {
1740 if (!isspace(input[i])) { rest += input[i]; }
1742 if (rest != "") { pieces.push_back(rest); rest = ""; }
1743 while (i < input.length()) { //gobble white space
1744 if (isspace(input[i])) { i++; }
1745 else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1750 if (rest != "") { pieces.push_back(rest); }
1754 catch(exception& e) {
1755 errorOut(e, "MothurOut", "splitWhiteSpace");
1759 /***********************************************************************/
1760 vector<string> MothurOut::splitWhiteSpaceWithQuotes(string input){
1762 vector<string> pieces;
1765 int pos = input.find('\'');
1766 int pos2 = input.find('\"');
1768 if ((pos == string::npos) && (pos2 == string::npos)) { return splitWhiteSpace(input); } //no quotes to worry about
1770 for (int i = 0; i < input.length(); i++) {
1771 if ((input[i] == '\'') || (input[i] == '\"') || (rest == "\'") || (rest == "\"")) { //grab everything til end or next ' or "
1773 for (int j = i+1; j < input.length(); j++) {
1774 if ((input[j] == '\'') || (input[j] == '\"')) { //then quit
1778 }else { rest += input[j]; }
1780 }else if (!isspace(input[i])) { rest += input[i]; }
1782 if (rest != "") { pieces.push_back(rest); rest = ""; }
1783 while (i < input.length()) { //gobble white space
1784 if (isspace(input[i])) { i++; }
1785 else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1790 if (rest != "") { pieces.push_back(rest); }
1794 catch(exception& e) {
1795 errorOut(e, "MothurOut", "splitWhiteSpace");
1799 //**********************************************************************************************************************
1800 int MothurOut::readTax(string namefile, map<string, string>& taxMap) {
1804 openInputFile(namefile, in);
1808 bool pairDone = false;
1809 bool columnOne = true;
1810 string firstCol, secondCol;
1813 if (control_pressed) { break; }
1815 in.read(buffer, 4096);
1816 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1818 for (int i = 0; i < pieces.size(); i++) {
1819 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1820 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1823 checkName(firstCol);
1824 //are there confidence scores, if so remove them
1825 if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
1826 map<string, string>::iterator itTax = taxMap.find(firstCol);
1828 if(itTax == taxMap.end()) {
1829 bool ignore = false;
1830 if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
1832 if (!ignore) { taxMap[firstCol] = secondCol; }
1833 if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
1835 mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true;
1844 vector<string> pieces = splitWhiteSpace(rest);
1846 for (int i = 0; i < pieces.size(); i++) {
1847 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1848 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1851 checkName(firstCol);
1852 //are there confidence scores, if so remove them
1853 if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
1854 map<string, string>::iterator itTax = taxMap.find(firstCol);
1856 if(itTax == taxMap.end()) {
1857 bool ignore = false;
1858 if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
1860 if (!ignore) { taxMap[firstCol] = secondCol; }
1861 if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
1863 mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true;
1871 return taxMap.size();
1874 catch(exception& e) {
1875 errorOut(e, "MothurOut", "readTax");
1879 /**********************************************************************************************************************/
1880 int MothurOut::readNames(string namefile, map<string, string>& nameMap, bool redund) {
1884 openInputFile(namefile, in);
1888 bool pairDone = false;
1889 bool columnOne = true;
1890 string firstCol, secondCol;
1893 if (control_pressed) { break; }
1895 in.read(buffer, 4096);
1896 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1898 for (int i = 0; i < pieces.size(); i++) {
1899 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1900 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1903 checkName(firstCol);
1904 checkName(secondCol);
1906 //parse names into vector
1907 vector<string> theseNames;
1908 splitAtComma(secondCol, theseNames);
1909 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
1917 vector<string> pieces = splitWhiteSpace(rest);
1919 for (int i = 0; i < pieces.size(); i++) {
1920 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1921 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1924 checkName(firstCol);
1925 checkName(secondCol);
1927 //parse names into vector
1928 vector<string> theseNames;
1929 splitAtComma(secondCol, theseNames);
1930 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
1936 return nameMap.size();
1939 catch(exception& e) {
1940 errorOut(e, "MothurOut", "readNames");
1944 /**********************************************************************************************************************/
1945 int MothurOut::readNames(string namefile, map<string, string>& nameMap, int flip) {
1949 openInputFile(namefile, in);
1953 bool pairDone = false;
1954 bool columnOne = true;
1955 string firstCol, secondCol;
1958 if (control_pressed) { break; }
1960 in.read(buffer, 4096);
1961 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1963 for (int i = 0; i < pieces.size(); i++) {
1964 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1965 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1968 checkName(firstCol);
1969 checkName(secondCol);
1970 nameMap[secondCol] = firstCol;
1978 vector<string> pieces = splitWhiteSpace(rest);
1980 for (int i = 0; i < pieces.size(); i++) {
1981 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1982 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1985 checkName(firstCol);
1986 checkName(secondCol);
1987 nameMap[secondCol] = firstCol;
1993 return nameMap.size();
1996 catch(exception& e) {
1997 errorOut(e, "MothurOut", "readNames");
2001 /**********************************************************************************************************************/
2002 int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<string, int>& nameCount) {
2004 nameMap.clear(); nameCount.clear();
2007 openInputFile(namefile, in);
2011 bool pairDone = false;
2012 bool columnOne = true;
2013 string firstCol, secondCol;
2016 if (control_pressed) { break; }
2018 in.read(buffer, 4096);
2019 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2021 for (int i = 0; i < pieces.size(); i++) {
2022 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2023 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2026 checkName(firstCol);
2027 checkName(secondCol);
2028 //parse names into vector
2029 vector<string> theseNames;
2030 splitAtComma(secondCol, theseNames);
2031 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
2032 nameCount[firstCol] = theseNames.size();
2040 vector<string> pieces = splitWhiteSpace(rest);
2042 for (int i = 0; i < pieces.size(); i++) {
2043 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2044 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2047 checkName(firstCol);
2048 checkName(secondCol);
2049 //parse names into vector
2050 vector<string> theseNames;
2051 splitAtComma(secondCol, theseNames);
2052 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
2053 nameCount[firstCol] = theseNames.size();
2059 return nameMap.size();
2062 catch(exception& e) {
2063 errorOut(e, "MothurOut", "readNames");
2067 /**********************************************************************************************************************/
2068 int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
2072 openInputFile(namefile, in);
2076 bool pairDone = false;
2077 bool columnOne = true;
2078 string firstCol, secondCol;
2081 if (control_pressed) { break; }
2083 in.read(buffer, 4096);
2084 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2086 for (int i = 0; i < pieces.size(); i++) {
2087 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2088 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2091 checkName(firstCol);
2092 checkName(secondCol);
2093 nameMap[firstCol] = secondCol; pairDone = false; }
2099 vector<string> pieces = splitWhiteSpace(rest);
2101 for (int i = 0; i < pieces.size(); i++) {
2102 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2103 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2106 checkName(firstCol);
2107 checkName(secondCol);
2108 nameMap[firstCol] = secondCol; pairDone = false; }
2112 return nameMap.size();
2115 catch(exception& e) {
2116 errorOut(e, "MothurOut", "readNames");
2120 /**********************************************************************************************************************/
2121 int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap) {
2125 openInputFile(namefile, in);
2129 bool pairDone = false;
2130 bool columnOne = true;
2131 string firstCol, secondCol;
2134 if (control_pressed) { break; }
2136 in.read(buffer, 4096);
2137 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2139 for (int i = 0; i < pieces.size(); i++) {
2140 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2141 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2144 checkName(firstCol);
2145 checkName(secondCol);
2146 vector<string> temp;
2147 splitAtComma(secondCol, temp);
2148 nameMap[firstCol] = temp;
2156 vector<string> pieces = splitWhiteSpace(rest);
2158 for (int i = 0; i < pieces.size(); i++) {
2159 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2160 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2163 checkName(firstCol);
2164 checkName(secondCol);
2165 vector<string> temp;
2166 splitAtComma(secondCol, temp);
2167 nameMap[firstCol] = temp;
2173 return nameMap.size();
2175 catch(exception& e) {
2176 errorOut(e, "MothurOut", "readNames");
2180 /**********************************************************************************************************************/
2181 map<string, int> MothurOut::readNames(string namefile) {
2183 map<string, int> nameMap;
2187 openInputFile(namefile, in);
2191 bool pairDone = false;
2192 bool columnOne = true;
2193 string firstCol, secondCol;
2196 if (control_pressed) { break; }
2198 in.read(buffer, 4096);
2199 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2201 for (int i = 0; i < pieces.size(); i++) {
2202 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2203 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2206 checkName(firstCol);
2207 checkName(secondCol);
2208 int num = getNumNames(secondCol);
2209 nameMap[firstCol] = num;
2217 vector<string> pieces = splitWhiteSpace(rest);
2218 for (int i = 0; i < pieces.size(); i++) {
2219 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2220 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2223 checkName(firstCol);
2224 checkName(secondCol);
2225 int num = getNumNames(secondCol);
2226 nameMap[firstCol] = num;
2235 catch(exception& e) {
2236 errorOut(e, "MothurOut", "readNames");
2240 /**********************************************************************************************************************/
2241 map<string, int> MothurOut::readNames(string namefile, unsigned long int& numSeqs) {
2243 map<string, int> nameMap;
2248 openInputFile(namefile, in);
2252 bool pairDone = false;
2253 bool columnOne = true;
2254 string firstCol, secondCol;
2257 if (control_pressed) { break; }
2259 in.read(buffer, 4096);
2260 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2262 for (int i = 0; i < pieces.size(); i++) {
2263 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2264 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2267 checkName(firstCol);
2268 checkName(secondCol);
2269 int num = getNumNames(secondCol);
2270 nameMap[firstCol] = num;
2279 vector<string> pieces = splitWhiteSpace(rest);
2280 for (int i = 0; i < pieces.size(); i++) {
2281 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2282 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2285 checkName(firstCol);
2286 checkName(secondCol);
2287 int num = getNumNames(secondCol);
2288 nameMap[firstCol] = num;
2298 catch(exception& e) {
2299 errorOut(e, "MothurOut", "readNames");
2303 /************************************************************/
2304 int MothurOut::checkName(string& name) {
2307 for (int i = 0; i < name.length(); i++) {
2308 if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; }
2313 catch(exception& e) {
2314 errorOut(e, "MothurOut", "checkName");
2318 /**********************************************************************************************************************/
2319 int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, map<string, string>& fastamap) {
2325 openInputFile(namefile, in);
2329 bool pairDone = false;
2330 bool columnOne = true;
2331 string firstCol, secondCol;
2334 if (control_pressed) { break; }
2336 in.read(buffer, 4096);
2337 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2339 for (int i = 0; i < pieces.size(); i++) {
2340 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2341 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2344 checkName(firstCol);
2345 checkName(secondCol);
2346 int num = getNumNames(secondCol);
2348 map<string, string>::iterator it = fastamap.find(firstCol);
2349 if (it == fastamap.end()) {
2351 mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
2353 seqPriorityNode temp(num, it->second, firstCol);
2354 nameVector.push_back(temp);
2364 vector<string> pieces = splitWhiteSpace(rest);
2366 for (int i = 0; i < pieces.size(); i++) {
2367 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2368 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2371 checkName(firstCol);
2372 checkName(secondCol);
2373 int num = getNumNames(secondCol);
2375 map<string, string>::iterator it = fastamap.find(firstCol);
2376 if (it == fastamap.end()) {
2378 mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
2380 seqPriorityNode temp(num, it->second, firstCol);
2381 nameVector.push_back(temp);
2390 catch(exception& e) {
2391 errorOut(e, "MothurOut", "readNames");
2395 //**********************************************************************************************************************
2396 set<string> MothurOut::readAccnos(string accnosfile){
2400 openInputFile(accnosfile, in);
2407 if (control_pressed) { break; }
2409 in.read(buffer, 4096);
2410 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2412 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]);
2413 names.insert(pieces[i]);
2419 vector<string> pieces = splitWhiteSpace(rest);
2420 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); }
2424 catch(exception& e) {
2425 errorOut(e, "MothurOut", "readAccnos");
2429 //**********************************************************************************************************************
2430 int MothurOut::readAccnos(string accnosfile, vector<string>& names){
2434 openInputFile(accnosfile, in);
2441 if (control_pressed) { break; }
2443 in.read(buffer, 4096);
2444 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2446 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
2451 vector<string> pieces = splitWhiteSpace(rest);
2452 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
2457 catch(exception& e) {
2458 errorOut(e, "MothurOut", "readAccnos");
2462 /***********************************************************************/
2464 int MothurOut::getNumNames(string names){
2470 for(int i=0;i<names.size();i++){
2471 if(names[i] == ','){
2479 catch(exception& e) {
2480 errorOut(e, "MothurOut", "getNumNames");
2484 /***********************************************************************/
2486 int MothurOut::getNumChar(string line, char c){
2491 for(int i=0;i<line.size();i++){
2500 catch(exception& e) {
2501 errorOut(e, "MothurOut", "getNumChar");
2505 //**********************************************************************************************************************
2506 bool MothurOut::isSubset(vector<string> bigset, vector<string> subset) {
2510 if (subset.size() > bigset.size()) { return false; }
2512 //check if each guy in suset is also in bigset
2513 for (int i = 0; i < subset.size(); i++) {
2515 for (int j = 0; j < bigset.size(); j++) {
2516 if (subset[i] == bigset[j]) { match = true; break; }
2519 //you have a guy in subset that had no match in bigset
2520 if (match == false) { return false; }
2526 catch(exception& e) {
2527 errorOut(e, "MothurOut", "isSubset");
2531 /***********************************************************************/
2532 int MothurOut::mothurRemove(string filename){
2534 filename = getFullPathName(filename);
2535 int error = remove(filename.c_str());
2537 // if (errno != ENOENT) { //ENOENT == file does not exist
2538 // string message = "Error deleting file " + filename;
2539 // perror(message.c_str());
2544 catch(exception& e) {
2545 errorOut(e, "MothurOut", "mothurRemove");
2549 /***********************************************************************/
2550 bool MothurOut::mothurConvert(string item, int& num){
2554 if (isNumeric1(item)) {
2559 mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine();
2560 commandInputsConvertError = true;
2565 catch(exception& e) {
2566 errorOut(e, "MothurOut", "mothurConvert");
2570 /***********************************************************************/
2571 bool MothurOut::mothurConvert(string item, intDist& num){
2575 if (isNumeric1(item)) {
2580 mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine();
2581 commandInputsConvertError = true;
2586 catch(exception& e) {
2587 errorOut(e, "MothurOut", "mothurConvert");
2592 /***********************************************************************/
2593 bool MothurOut::isNumeric1(string stringToCheck){
2595 bool numeric = false;
2597 if(stringToCheck.find_first_not_of("0123456789.-") == string::npos) { numeric = true; }
2601 catch(exception& e) {
2602 errorOut(e, "MothurOut", "isNumeric1");
2607 /***********************************************************************/
2608 bool MothurOut::mothurConvert(string item, float& num){
2612 if (isNumeric1(item)) {
2617 mothurOut("[ERROR]: cannot convert " + item + " to a float."); mothurOutEndLine();
2618 commandInputsConvertError = true;
2623 catch(exception& e) {
2624 errorOut(e, "MothurOut", "mothurConvert");
2628 /***********************************************************************/
2629 bool MothurOut::mothurConvert(string item, double& num){
2633 if (isNumeric1(item)) {
2638 mothurOut("[ERROR]: cannot convert " + item + " to a double."); mothurOutEndLine();
2639 commandInputsConvertError = true;
2644 catch(exception& e) {
2645 errorOut(e, "MothurOut", "mothurConvert");
2649 /**************************************************************************************************/
2651 vector<vector<double> > MothurOut::binomial(int maxOrder){
2653 vector<vector<double> > binomial(maxOrder+1);
2655 for(int i=0;i<=maxOrder;i++){
2656 binomial[i].resize(maxOrder+1);
2665 for(int i=2;i<=maxOrder;i++){
2669 for(int i=2;i<=maxOrder;i++){
2670 for(int j=1;j<=maxOrder;j++){
2671 if(i==j){ binomial[i][j]=1; }
2672 if(j>i) { binomial[i][j]=0; }
2673 else { binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j]; }
2680 catch(exception& e) {
2681 errorOut(e, "MothurOut", "binomial");
2685 /**************************************************************************************************/
2686 unsigned int MothurOut::fromBase36(string base36){
2688 unsigned int num = 0;
2690 map<char, int> converts;
2755 while (i < base36.length()) {
2757 num = 36 * num + converts[c];
2764 catch(exception& e) {
2765 errorOut(e, "MothurOut", "fromBase36");
2769 /***********************************************************************/
2771 int MothurOut::factorial(int num){
2775 for (int i = 1; i <= num; i++) {
2781 catch(exception& e) {
2782 errorOut(e, "MothurOut", "factorial");
2786 /***********************************************************************/
2788 int MothurOut::getNumSeqs(ifstream& file){
2790 int numSeqs = count(istreambuf_iterator<char>(file),istreambuf_iterator<char>(), '>');
2794 catch(exception& e) {
2795 errorOut(e, "MothurOut", "getNumSeqs");
2799 /***********************************************************************/
2800 void MothurOut::getNumSeqs(ifstream& file, int& numSeqs){
2805 input = getline(file);
2806 if (input.length() != 0) {
2807 if(input[0] == '>'){ numSeqs++; }
2811 catch(exception& e) {
2812 errorOut(e, "MothurOut", "getNumSeqs");
2816 /***********************************************************************/
2818 //This function parses the estimator options and puts them in a vector
2819 void MothurOut::splitAtChar(string& estim, vector<string>& container, char symbol) {
2822 if (symbol == '-') { splitAtDash(estim, container); return; }
2824 string individual = "";
2825 int estimLength = estim.size();
2826 for(int i=0;i<estimLength;i++){
2827 if(estim[i] == symbol){
2828 container.push_back(individual);
2832 individual += estim[i];
2835 container.push_back(individual);
2838 catch(exception& e) {
2839 errorOut(e, "MothurOut", "splitAtChar");
2844 /***********************************************************************/
2846 //This function parses the estimator options and puts them in a vector
2847 void MothurOut::splitAtDash(string& estim, vector<string>& container) {
2849 string individual = "";
2850 int estimLength = estim.size();
2851 bool prevEscape = false;
2852 /*for(int i=0;i<estimLength;i++){
2854 individual += estim[i];
2858 if(estim[i] == '\\'){
2861 else if(estim[i] == '-'){
2862 container.push_back(individual);
2867 individual += estim[i];
2874 for(int i=0;i<estimLength;i++){
2875 if(estim[i] == '-'){
2876 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
2878 container.push_back(individual);
2881 }else if(estim[i] == '\\'){
2882 if (i < estimLength-1) {
2883 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
2884 else { individual += estim[i]; prevEscape = false; } //if no, add in
2885 }else { individual += estim[i]; }
2887 individual += estim[i];
2893 container.push_back(individual);
2895 catch(exception& e) {
2896 errorOut(e, "MothurOut", "splitAtDash");
2901 /***********************************************************************/
2902 //This function parses the label options and puts them in a set
2903 void MothurOut::splitAtDash(string& estim, set<string>& container) {
2905 string individual = "";
2906 int estimLength = estim.size();
2907 bool prevEscape = false;
2909 for(int i=0;i<estimLength;i++){
2911 individual += estim[i];
2915 if(estim[i] == '\\'){
2918 else if(estim[i] == '-'){
2919 container.insert(individual);
2924 individual += estim[i];
2931 for(int i=0;i<estimLength;i++){
2932 if(estim[i] == '-'){
2933 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
2935 container.insert(individual);
2938 }else if(estim[i] == '\\'){
2939 if (i < estimLength-1) {
2940 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
2941 else { individual += estim[i]; prevEscape = false; } //if no, add in
2942 }else { individual += estim[i]; }
2944 individual += estim[i];
2947 container.insert(individual);
2950 catch(exception& e) {
2951 errorOut(e, "MothurOut", "splitAtDash");
2955 /***********************************************************************/
2956 //This function parses the line options and puts them in a set
2957 void MothurOut::splitAtDash(string& estim, set<int>& container) {
2959 string individual = "";
2961 int estimLength = estim.size();
2962 bool prevEscape = false;
2964 for(int i=0;i<estimLength;i++){
2966 individual += estim[i];
2970 if(estim[i] == '\\'){
2973 else if(estim[i] == '-'){
2974 convert(individual, lineNum); //convert the string to int
2975 container.insert(lineNum);
2980 individual += estim[i];
2986 for(int i=0;i<estimLength;i++){
2987 if(estim[i] == '-'){
2988 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
2990 convert(individual, lineNum); //convert the string to int
2991 container.insert(lineNum);
2994 }else if(estim[i] == '\\'){
2995 if (i < estimLength-1) {
2996 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
2997 else { individual += estim[i]; prevEscape = false; } //if no, add in
2998 }else { individual += estim[i]; }
3000 individual += estim[i];
3004 convert(individual, lineNum); //convert the string to int
3005 container.insert(lineNum);
3007 catch(exception& e) {
3008 errorOut(e, "MothurOut", "splitAtDash");
3013 /***********************************************************************/
3014 string MothurOut::makeList(vector<string>& names) {
3018 if (names.size() == 0) { return list; }
3020 for (int i = 0; i < names.size()-1; i++) { list += names[i] + ","; }
3023 list += names[names.size()-1];
3027 catch(exception& e) {
3028 errorOut(e, "MothurOut", "makeList");
3033 /***********************************************************************/
3034 //This function parses the a string and puts peices in a vector
3035 void MothurOut::splitAtComma(string& estim, vector<string>& container) {
3037 string individual = "";
3038 int estimLength = estim.size();
3039 for(int i=0;i<estimLength;i++){
3040 if(estim[i] == ','){
3041 container.push_back(individual);
3045 individual += estim[i];
3048 container.push_back(individual);
3053 // string individual;
3055 // while (estim.find_first_of(',') != -1) {
3056 // individual = estim.substr(0,estim.find_first_of(','));
3057 // if ((estim.find_first_of(',')+1) <= estim.length()) { //checks to make sure you don't have comma at end of string
3058 // estim = estim.substr(estim.find_first_of(',')+1, estim.length());
3059 // container.push_back(individual);
3063 // container.push_back(estim);
3065 catch(exception& e) {
3066 errorOut(e, "MothurOut", "splitAtComma");
3070 /***********************************************************************/
3071 //This function splits up the various option parameters
3072 void MothurOut::splitAtChar(string& prefix, string& suffix, char c){
3074 prefix = suffix.substr(0,suffix.find_first_of(c));
3075 if ((suffix.find_first_of(c)+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
3076 suffix = suffix.substr(suffix.find_first_of(c)+1, suffix.length());
3078 while(suffix.at(0) == ' ')
3079 suffix = suffix.substr(1, suffix.length());
3080 }else { suffix = ""; }
3083 catch(exception& e) {
3084 errorOut(e, "MothurOut", "splitAtChar");
3089 /***********************************************************************/
3091 //This function splits up the various option parameters
3092 void MothurOut::splitAtComma(string& prefix, string& suffix){
3094 prefix = suffix.substr(0,suffix.find_first_of(','));
3095 if ((suffix.find_first_of(',')+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
3096 suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
3098 while(suffix.at(0) == ' ')
3099 suffix = suffix.substr(1, suffix.length());
3100 }else { suffix = ""; }
3103 catch(exception& e) {
3104 errorOut(e, "MothurOut", "splitAtComma");
3108 /***********************************************************************/
3110 //This function separates the key value from the option value i.e. dist=96_...
3111 void MothurOut::splitAtEquals(string& key, string& value){
3113 if(value.find_first_of('=') != -1){
3114 key = value.substr(0,value.find_first_of('='));
3115 if ((value.find_first_of('=')+1) <= value.length()) {
3116 value = value.substr(value.find_first_of('=')+1, value.length());
3123 catch(exception& e) {
3124 errorOut(e, "MothurOut", "splitAtEquals");
3129 /**************************************************************************************************/
3131 bool MothurOut::inUsersGroups(string groupname, vector<string> Groups) {
3133 for (int i = 0; i < Groups.size(); i++) {
3134 if (groupname == Groups[i]) { return true; }
3138 catch(exception& e) {
3139 errorOut(e, "MothurOut", "inUsersGroups");
3143 /**************************************************************************************************/
3145 bool MothurOut::inUsersGroups(vector<int> set, vector< vector<int> > sets) {
3147 for (int i = 0; i < sets.size(); i++) {
3148 if (set == sets[i]) { return true; }
3152 catch(exception& e) {
3153 errorOut(e, "MothurOut", "inUsersGroups");
3157 /**************************************************************************************************/
3159 bool MothurOut::inUsersGroups(int groupname, vector<int> Groups) {
3161 for (int i = 0; i < Groups.size(); i++) {
3162 if (groupname == Groups[i]) { return true; }
3166 catch(exception& e) {
3167 errorOut(e, "MothurOut", "inUsersGroups");
3172 /**************************************************************************************************/
3173 //returns true if any of the strings in first vector are in second vector
3174 bool MothurOut::inUsersGroups(vector<string> groupnames, vector<string> Groups) {
3177 for (int i = 0; i < groupnames.size(); i++) {
3178 if (inUsersGroups(groupnames[i], Groups)) { return true; }
3182 catch(exception& e) {
3183 errorOut(e, "MothurOut", "inUsersGroups");
3187 /***********************************************************************/
3188 //this function determines if the user has given us labels that are smaller than the given label.
3189 //if so then it returns true so that the calling function can run the previous valid distance.
3190 //it's a "smart" distance function. It also checks for invalid labels.
3191 bool MothurOut::anyLabelsToProcess(string label, set<string>& userLabels, string errorOff) {
3194 set<string>::iterator it;
3195 vector<float> orderFloat;
3196 map<string, float> userMap; //the conversion process removes trailing 0's which we need to put back
3197 map<string, float>::iterator it2;
3199 bool smaller = false;
3201 //unique is the smallest line
3202 if (label == "unique") { return false; }
3204 if (convertTestFloat(label, labelFloat)) {
3205 convert(label, labelFloat);
3206 }else { //cant convert
3211 //go through users set and make them floats
3212 for(it = userLabels.begin(); it != userLabels.end();) {
3215 if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){
3217 orderFloat.push_back(temp);
3218 userMap[*it] = temp;
3220 }else if (*it == "unique") {
3221 orderFloat.push_back(-1.0);
3222 userMap["unique"] = -1.0;
3225 if (errorOff == "") { mothurOut(*it + " is not a valid label."); mothurOutEndLine(); }
3226 userLabels.erase(it++);
3231 sort(orderFloat.begin(), orderFloat.end());
3233 /*************************************************/
3234 //is this label bigger than any of the users labels
3235 /*************************************************/
3237 //loop through order until you find a label greater than label
3238 for (int i = 0; i < orderFloat.size(); i++) {
3239 if (orderFloat[i] < labelFloat) {
3241 if (orderFloat[i] == -1) {
3242 if (errorOff == "") { mothurOut("Your file does not include the label unique."); mothurOutEndLine(); }
3243 userLabels.erase("unique");
3246 if (errorOff == "") { mothurOut("Your file does not include the label "); mothurOutEndLine(); }
3248 for (it2 = userMap.begin(); it2!= userMap.end(); it2++) {
3249 if (it2->second == orderFloat[i]) {
3251 //remove small labels
3252 userLabels.erase(s);
3256 if (errorOff == "") {mothurOut( s + ". I will use the next smallest distance. "); mothurOutEndLine(); }
3258 //since they are sorted once you find a bigger one stop looking
3265 catch(exception& e) {
3266 errorOut(e, "MothurOut", "anyLabelsToProcess");
3271 /**************************************************************************************************/
3272 bool MothurOut::checkReleaseVersion(ifstream& file, string version) {
3277 string line = getline(file);
3279 //before we added this check
3280 if (line[0] != '#') { good = false; }
3283 line = line.substr(1);
3285 vector<string> versionVector;
3286 splitAtChar(version, versionVector, '.');
3288 //check file version
3289 vector<string> linesVector;
3290 splitAtChar(line, linesVector, '.');
3292 if (versionVector.size() != linesVector.size()) { good = false; }
3294 for (int j = 0; j < versionVector.size(); j++) {
3296 convert(versionVector[j], num1);
3297 convert(linesVector[j], num2);
3299 //if mothurs version is newer than this files version, then we want to remake it
3300 if (num1 > num2) { good = false; break; }
3306 if (!good) { file.close(); }
3307 else { file.seekg(0); }
3311 catch(exception& e) {
3312 errorOut(e, "MothurOut", "checkReleaseVersion");
3316 /**************************************************************************************************/
3317 vector<double> MothurOut::getAverages(vector< vector<double> >& dists) {
3319 vector<double> averages; //averages.resize(numComp, 0.0);
3320 for (int i = 0; i < dists[0].size(); i++) { averages.push_back(0.0); }
3322 for (int thisIter = 0; thisIter < dists.size(); thisIter++) {
3323 for (int i = 0; i < dists[thisIter].size(); i++) {
3324 averages[i] += dists[thisIter][i];
3329 for (int i = 0; i < averages.size(); i++) { averages[i] /= (double) dists.size(); }
3333 catch(exception& e) {
3334 errorOut(e, "MothurOut", "getAverages");
3338 /**************************************************************************************************/
3339 double MothurOut::getAverage(vector<double> dists) {
3343 for (int i = 0; i < dists.size(); i++) {
3344 average += dists[i];
3348 average /= (double) dists.size();
3352 catch(exception& e) {
3353 errorOut(e, "MothurOut", "getAverage");
3358 /**************************************************************************************************/
3359 vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists) {
3362 vector<double> averages = getAverages(dists);
3364 //find standard deviation
3365 vector<double> stdDev; //stdDev.resize(numComp, 0.0);
3366 for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
3368 for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3369 for (int j = 0; j < dists[thisIter].size(); j++) {
3370 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
3373 for (int i = 0; i < stdDev.size(); i++) {
3374 stdDev[i] /= (double) dists.size();
3375 stdDev[i] = sqrt(stdDev[i]);
3380 catch(exception& e) {
3381 errorOut(e, "MothurOut", "getAverages");
3385 /**************************************************************************************************/
3386 vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists, vector<double>& averages) {
3388 //find standard deviation
3389 vector<double> stdDev; //stdDev.resize(numComp, 0.0);
3390 for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
3392 for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3393 for (int j = 0; j < dists[thisIter].size(); j++) {
3394 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
3397 for (int i = 0; i < stdDev.size(); i++) {
3398 stdDev[i] /= (double) dists.size();
3399 stdDev[i] = sqrt(stdDev[i]);
3404 catch(exception& e) {
3405 errorOut(e, "MothurOut", "getAverages");
3409 /**************************************************************************************************/
3410 vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals, string mode) {
3413 vector< vector<seqDist> > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
3414 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3415 //calcAverages[i].resize(calcDistsTotals[0][i].size());
3416 vector<seqDist> temp;
3417 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3419 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3420 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3421 tempDist.dist = 0.0;
3422 temp.push_back(tempDist);
3424 calcAverages.push_back(temp);
3427 if (mode == "average") {
3428 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
3429 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
3430 for (int j = 0; j < calcAverages[i].size(); j++) {
3431 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
3436 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
3437 for (int j = 0; j < calcAverages[i].size(); j++) {
3438 calcAverages[i][j].dist /= (float) calcDistsTotals.size();
3441 }else { //find median
3442 for (int i = 0; i < calcAverages.size(); i++) { //for each calc
3443 for (int j = 0; j < calcAverages[i].size(); j++) { //for each comparison
3444 vector<double> dists;
3445 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //for each subsample
3446 dists.push_back(calcDistsTotals[thisIter][i][j].dist);
3448 sort(dists.begin(), dists.end());
3449 calcAverages[i][j].dist = dists[(calcDistsTotals.size()/2)];
3454 return calcAverages;
3456 catch(exception& e) {
3457 errorOut(e, "MothurOut", "getAverages");
3461 /**************************************************************************************************/
3462 vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals) {
3465 vector< vector<seqDist> > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
3466 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3467 //calcAverages[i].resize(calcDistsTotals[0][i].size());
3468 vector<seqDist> temp;
3469 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3471 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3472 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3473 tempDist.dist = 0.0;
3474 temp.push_back(tempDist);
3476 calcAverages.push_back(temp);
3480 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
3481 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
3482 for (int j = 0; j < calcAverages[i].size(); j++) {
3483 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
3488 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
3489 for (int j = 0; j < calcAverages[i].size(); j++) {
3490 calcAverages[i][j].dist /= (float) calcDistsTotals.size();
3494 return calcAverages;
3496 catch(exception& e) {
3497 errorOut(e, "MothurOut", "getAverages");
3501 /**************************************************************************************************/
3502 vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals) {
3505 vector< vector<seqDist> > calcAverages = getAverages(calcDistsTotals);
3507 //find standard deviation
3508 vector< vector<seqDist> > stdDev;
3509 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3510 vector<seqDist> temp;
3511 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3513 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3514 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3515 tempDist.dist = 0.0;
3516 temp.push_back(tempDist);
3518 stdDev.push_back(temp);
3521 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3522 for (int i = 0; i < stdDev.size(); i++) {
3523 for (int j = 0; j < stdDev[i].size(); j++) {
3524 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
3529 for (int i = 0; i < stdDev.size(); i++) { //finds average.
3530 for (int j = 0; j < stdDev[i].size(); j++) {
3531 stdDev[i][j].dist /= (float) calcDistsTotals.size();
3532 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
3538 catch(exception& e) {
3539 errorOut(e, "MothurOut", "getAverages");
3543 /**************************************************************************************************/
3544 vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals, vector< vector<seqDist> >& calcAverages) {
3546 //find standard deviation
3547 vector< vector<seqDist> > stdDev;
3548 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3549 vector<seqDist> temp;
3550 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3552 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3553 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3554 tempDist.dist = 0.0;
3555 temp.push_back(tempDist);
3557 stdDev.push_back(temp);
3560 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3561 for (int i = 0; i < stdDev.size(); i++) {
3562 for (int j = 0; j < stdDev[i].size(); j++) {
3563 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
3568 for (int i = 0; i < stdDev.size(); i++) { //finds average.
3569 for (int j = 0; j < stdDev[i].size(); j++) {
3570 stdDev[i][j].dist /= (float) calcDistsTotals.size();
3571 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
3577 catch(exception& e) {
3578 errorOut(e, "MothurOut", "getAverages");
3583 /**************************************************************************************************/
3584 bool MothurOut::isContainingOnlyDigits(string input) {
3587 //are you a digit in ascii code
3588 for (int i = 0;i < input.length(); i++){
3589 if( input[i]>47 && input[i]<58){}
3590 else { return false; }
3595 catch(exception& e) {
3596 errorOut(e, "MothurOut", "isContainingOnlyDigits");
3600 /**************************************************************************************************/
3601 int MothurOut::removeConfidences(string& tax) {
3607 while (tax.find_first_of(';') != -1) {
3609 if (control_pressed) { return 0; }
3612 taxon = tax.substr(0,tax.find_first_of(';'));
3614 int pos = taxon.find_last_of('(');
3617 int pos2 = taxon.find_last_of(')');
3619 string confidenceScore = taxon.substr(pos+1, (pos2-(pos+1)));
3620 if (isNumeric1(confidenceScore)) {
3621 taxon = taxon.substr(0, pos); //rip off confidence
3627 tax = tax.substr(tax.find_first_of(';')+1, tax.length());
3635 catch(exception& e) {
3636 errorOut(e, "MothurOut", "removeConfidences");
3640 /**************************************************************************************************/
3641 string MothurOut::removeQuotes(string tax) {
3647 for (int i = 0; i < tax.length(); i++) {
3649 if (control_pressed) { return newTax; }
3651 if ((tax[i] != '\'') && (tax[i] != '\"')) { newTax += tax[i]; }
3657 catch(exception& e) {
3658 errorOut(e, "MothurOut", "removeQuotes");
3662 /**************************************************************************************************/
3663 // function for calculating standard deviation
3664 double MothurOut::getStandardDeviation(vector<int>& featureVector){
3668 for (int i = 0; i < featureVector.size(); i++) { average += featureVector[i]; }
3669 average /= (double) featureVector.size();
3671 //find standard deviation
3673 for (int i = 0; i < featureVector.size(); i++) { //compute the difference of each dist from the mean, and square the result of each
3674 stdDev += ((featureVector[i] - average) * (featureVector[i] - average));
3677 stdDev /= (double) featureVector.size();
3678 stdDev = sqrt(stdDev);
3682 catch(exception& e) {
3683 errorOut(e, "MothurOut", "getStandardDeviation");
3687 /**************************************************************************************************/