5 * Created by westcott on 2/25/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "mothurout.h"
13 /******************************************************/
14 MothurOut* MothurOut::getInstance() {
15 if( _uniqueInstance == 0) {
16 _uniqueInstance = new MothurOut();
18 return _uniqueInstance;
20 /*********************************************************************************************/
21 set<string> MothurOut::getCurrentTypes() {
25 types.insert("fasta");
26 types.insert("summary");
27 types.insert("accnos");
28 types.insert("column");
29 types.insert("design");
30 types.insert("group");
33 types.insert("oligos");
34 types.insert("order");
35 types.insert("ordergroup");
36 types.insert("phylip");
37 types.insert("qfile");
38 types.insert("relabund");
39 types.insert("sabund");
40 types.insert("rabund");
42 types.insert("shared");
43 types.insert("taxonomy");
47 types.insert("count");
48 types.insert("processors");
53 errorOut(e, "MothurOut", "getCurrentTypes");
57 /*********************************************************************************************/
58 void MothurOut::printCurrentFiles() {
62 if (accnosfile != "") { mothurOut("accnos=" + accnosfile); mothurOutEndLine(); }
63 if (columnfile != "") { mothurOut("column=" + columnfile); mothurOutEndLine(); }
64 if (designfile != "") { mothurOut("design=" + designfile); mothurOutEndLine(); }
65 if (fastafile != "") { mothurOut("fasta=" + fastafile); mothurOutEndLine(); }
66 if (groupfile != "") { mothurOut("group=" + groupfile); mothurOutEndLine(); }
67 if (listfile != "") { mothurOut("list=" + listfile); mothurOutEndLine(); }
68 if (namefile != "") { mothurOut("name=" + namefile); mothurOutEndLine(); }
69 if (oligosfile != "") { mothurOut("oligos=" + oligosfile); mothurOutEndLine(); }
70 if (orderfile != "") { mothurOut("order=" + orderfile); mothurOutEndLine(); }
71 if (ordergroupfile != "") { mothurOut("ordergroup=" + ordergroupfile); mothurOutEndLine(); }
72 if (phylipfile != "") { mothurOut("phylip=" + phylipfile); mothurOutEndLine(); }
73 if (qualfile != "") { mothurOut("qfile=" + qualfile); mothurOutEndLine(); }
74 if (rabundfile != "") { mothurOut("rabund=" + rabundfile); mothurOutEndLine(); }
75 if (relabundfile != "") { mothurOut("relabund=" + relabundfile); mothurOutEndLine(); }
76 if (sabundfile != "") { mothurOut("sabund=" + sabundfile); mothurOutEndLine(); }
77 if (sfffile != "") { mothurOut("sff=" + sfffile); mothurOutEndLine(); }
78 if (sharedfile != "") { mothurOut("shared=" + sharedfile); mothurOutEndLine(); }
79 if (taxonomyfile != "") { mothurOut("taxonomy=" + taxonomyfile); mothurOutEndLine(); }
80 if (treefile != "") { mothurOut("tree=" + treefile); mothurOutEndLine(); }
81 if (flowfile != "") { mothurOut("flow=" + flowfile); mothurOutEndLine(); }
82 if (biomfile != "") { mothurOut("biom=" + biomfile); mothurOutEndLine(); }
83 if (counttablefile != "") { mothurOut("count=" + counttablefile); mothurOutEndLine(); }
84 if (processors != "1") { mothurOut("processors=" + processors); mothurOutEndLine(); }
85 if (summaryfile != "") { mothurOut("summary=" + summaryfile); mothurOutEndLine(); }
89 errorOut(e, "MothurOut", "printCurrentFiles");
93 /*********************************************************************************************/
94 bool MothurOut::hasCurrentFiles() {
96 bool hasCurrent = false;
98 if (accnosfile != "") { return true; }
99 if (columnfile != "") { return true; }
100 if (designfile != "") { return true; }
101 if (fastafile != "") { return true; }
102 if (groupfile != "") { return true; }
103 if (listfile != "") { return true; }
104 if (namefile != "") { return true; }
105 if (oligosfile != "") { return true; }
106 if (orderfile != "") { return true; }
107 if (ordergroupfile != "") { return true; }
108 if (phylipfile != "") { return true; }
109 if (qualfile != "") { return true; }
110 if (rabundfile != "") { return true; }
111 if (relabundfile != "") { return true; }
112 if (sabundfile != "") { return true; }
113 if (sfffile != "") { return true; }
114 if (sharedfile != "") { return true; }
115 if (taxonomyfile != "") { return true; }
116 if (treefile != "") { return true; }
117 if (flowfile != "") { return true; }
118 if (biomfile != "") { return true; }
119 if (counttablefile != "") { return true; }
120 if (summaryfile != "") { return true; }
121 if (processors != "1") { return true; }
126 catch(exception& e) {
127 errorOut(e, "MothurOut", "hasCurrentFiles");
132 /*********************************************************************************************/
133 void MothurOut::clearCurrentFiles() {
160 catch(exception& e) {
161 errorOut(e, "MothurOut", "clearCurrentFiles");
165 /***********************************************************************/
166 string MothurOut::findProgramPath(string programName){
169 string envPath = getenv("PATH");
172 //delimiting path char
174 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
180 //break apart path variable by ':'
182 splitAtChar(envPath, dirs, delim);
184 if (debug) { mothurOut("[DEBUG]: dir's in path: \n"); }
186 //get path related to mothur
187 for (int i = 0; i < dirs.size(); i++) {
189 if (debug) { mothurOut("[DEBUG]: " + dirs[i] + "\n"); }
191 //to lower so we can find it
192 string tempLower = "";
193 for (int j = 0; j < dirs[i].length(); j++) { tempLower += tolower(dirs[i][j]); }
195 //is this mothurs path?
196 if (tempLower.find(programName) != -1) { pPath = dirs[i]; break; }
199 if (debug) { mothurOut("[DEBUG]: programPath = " + pPath + "\n"); }
202 //add programName so it looks like what argv would look like
203 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
204 pPath += "/" + programName;
206 pPath += "\\" + programName;
209 //okay programName is not in the path, so the folder programName is in must be in the path
210 //lets find out which one
212 //get path related to the program
213 for (int i = 0; i < dirs.size(); i++) {
215 if (debug) { mothurOut("[DEBUG]: looking in " + dirs[i] + " for " + programName + " \n"); }
217 //is this the programs path?
219 string tempIn = dirs[i];
220 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
221 tempIn += "/" + programName;
223 tempIn += "\\" + programName;
225 openInputFile(tempIn, in, "");
227 //if this file exists
228 if (in) { in.close(); pPath = tempIn; if (debug) { mothurOut("[DEBUG]: found it, programPath = " + pPath + "\n"); } break; }
235 catch(exception& e) {
236 errorOut(e, "MothurOut", "findProgramPath");
240 /*********************************************************************************************/
241 void MothurOut::setFileName(string filename) {
243 logFileName = filename;
247 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
249 if (pid == 0) { //only one process should output to screen
252 openOutputFile(filename, out);
258 catch(exception& e) {
259 errorOut(e, "MothurOut", "setFileName");
263 /*********************************************************************************************/
264 void MothurOut::setDefaultPath(string pathname) {
267 //add / to name if needed
268 string lastChar = pathname.substr(pathname.length()-1);
269 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
270 if (lastChar != "/") { pathname += "/"; }
272 if (lastChar != "\\") { pathname += "\\"; }
275 defaultPath = pathname;
278 catch(exception& e) {
279 errorOut(e, "MothurOut", "setDefaultPath");
283 /*********************************************************************************************/
284 void MothurOut::setOutputDir(string pathname) {
286 outputDir = pathname;
288 catch(exception& e) {
289 errorOut(e, "MothurOut", "setOutputDir");
293 /*********************************************************************************************/
294 void MothurOut::closeLog() {
299 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
301 if (pid == 0) { //only one process should output to screen
310 catch(exception& e) {
311 errorOut(e, "MothurOut", "closeLog");
316 /*********************************************************************************************/
317 MothurOut::~MothurOut() {
322 catch(exception& e) {
323 errorOut(e, "MothurOut", "MothurOut");
327 /*********************************************************************************************/
328 void MothurOut::mothurOut(string output) {
333 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
335 if (pid == 0) { //only one process should output to screen
345 catch(exception& e) {
346 errorOut(e, "MothurOut", "MothurOut");
350 /*********************************************************************************************/
351 void MothurOut::mothurOutJustToScreen(string output) {
356 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
358 if (pid == 0) { //only one process should output to screen
366 catch(exception& e) {
367 errorOut(e, "MothurOut", "MothurOut");
371 /*********************************************************************************************/
372 void MothurOut::mothurOutEndLine() {
376 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
378 if (pid == 0) { //only one process should output to screen
388 catch(exception& e) {
389 errorOut(e, "MothurOut", "MothurOutEndLine");
393 /*********************************************************************************************/
394 void MothurOut::mothurOut(string output, ofstream& outputFile) {
399 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
401 if (pid == 0) { //only one process should output to screen
406 outputFile << output;
414 catch(exception& e) {
415 errorOut(e, "MothurOut", "MothurOut");
419 /*********************************************************************************************/
420 void MothurOut::mothurOutEndLine(ofstream& outputFile) {
424 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
426 if (pid == 0) { //only one process should output to screen
437 catch(exception& e) {
438 errorOut(e, "MothurOut", "MothurOutEndLine");
442 /*********************************************************************************************/
443 void MothurOut::mothurOutJustToLog(string output) {
447 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
449 if (pid == 0) { //only one process should output to screen
458 catch(exception& e) {
459 errorOut(e, "MothurOut", "MothurOutJustToLog");
463 /*********************************************************************************************/
464 void MothurOut::errorOut(exception& e, string object, string function) {
466 //mem_usage(vm, rss);
468 string errorType = toString(e.what());
470 int pos = errorType.find("bad_alloc");
471 mothurOut("[ERROR]: ");
472 mothurOut(errorType);
474 if (pos == string::npos) { //not bad_alloc
475 mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
478 if (object == "cluster"){
479 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. There are two common causes for this, file size and format.\n\nFile Size:\nThe cluster command loads your distance matrix into RAM, and your distance file is most likely too large to fit in RAM. There are two options to help with this. The first is to use a cutoff. By using a cutoff mothur will only load distances that are below the cutoff. If that is still not enough, there is a command called cluster.split, http://www.mothur.org/wiki/cluster.split which divides the distance matrix, and clusters the smaller pieces separately. You may also be able to reduce the size of the original distance matrix by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. \n\nWrong Format:\nThis error can be caused by trying to read a column formatted distance matrix using the phylip parameter. By default, the dist.seqs command generates a column formatted distance matrix. To make a phylip formatted matrix set the dist.seqs command parameter output to lt. \n\nIf you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
480 }else if (object == "shhh.flows"){
481 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. The shhh.flows command is very memory intensive. This error is most commonly caused by trying to process a dataset too large, using multiple processors, or failing to run trim.flows before shhh.flows. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Running trim.flows with an oligos file, and then shhh.flows with the file option may also resolve the issue. If for some reason you are unable to run shhh.flows with your data, a good alternative is to use the trim.seqs command using a 50-bp sliding window and to trim the sequence when the average quality score over that window drops below 35. Our results suggest that the sequencing error rates by this method are very good, but not quite as good as by shhh.flows and that the resulting sequences tend to be a bit shorter. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry. ");
483 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. This is most commonly caused by trying to process a dataset too large, using multiple processors, or a file format issue. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Also, you may be able to reduce the size of your dataset by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
487 /*********************************************************************************************/
488 //The following was originally from http://stackoverflow.com/questions/669438/how-to-get-memory-usage-at-run-time-in-c
489 // process_mem_usage(double &, double &) - takes two doubles by reference,
490 // attempts to read the system-dependent data for a process' virtual memory
491 // size and resident set size, and return the results in KB.
493 // On failure, returns 0.0, 0.0
494 int MothurOut::mem_usage(double& vm_usage, double& resident_set) {
495 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
500 // 'file' stat seems to give the most reliable results
502 ifstream stat_stream("/proc/self/stat",ios_base::in);
504 // dummy vars for leading entries in stat that we don't care about
506 string pid, comm, state, ppid, pgrp, session, tty_nr;
507 string tpgid, flags, minflt, cminflt, majflt, cmajflt;
508 string utime, stime, cutime, cstime, priority, nice;
509 string O, itrealvalue, starttime;
511 // the two fields we want
516 stat_stream >> pid >> comm >> state >> ppid >> pgrp >> session >> tty_nr
517 >> tpgid >> flags >> minflt >> cminflt >> majflt >> cmajflt
518 >> utime >> stime >> cutime >> cstime >> priority >> nice
519 >> O >> itrealvalue >> starttime >> vsize >> rss; // don't care about the rest
521 long page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages
522 vm_usage = vsize / 1024.0;
523 resident_set = rss * page_size_kb;
525 mothurOut("Memory Usage: vm = " + toString(vm_usage) + " rss = " + toString(resident_set) + "\n");
529 /* //windows memory usage
530 // Get the list of process identifiers.
531 DWORD aProcesses[1024], cbNeeded, cProcesses;
533 if ( !EnumProcesses( aProcesses, sizeof(aProcesses), &cbNeeded ) ){ return 1; }
535 // Calculate how many process identifiers were returned.
536 cProcesses = cbNeeded / sizeof(DWORD);
538 // Print the memory usage for each process
539 for (int i = 0; i < cProcesses; i++ ) {
540 DWORD processID = aProcesses[i];
542 PROCESS_MEMORY_COUNTERS pmc;
544 HANDLE hProcess = OpenProcess((PROCESS_QUERY_INFORMATION | PROCESS_VM_READ), FALSE, processID);
546 // Print the process identifier.
547 printf( "\nProcess ID: %u\n", processID);
549 if (NULL != hProcess) {
551 if ( GetProcessMemoryInfo( hProcess, &pmc, sizeof(pmc)) ) {
552 printf( "\tPageFaultCount: 0x%08X\n", pmc.PageFaultCount );
553 printf( "\tPeakWorkingSetSize: 0x%08X\n", pmc.PeakWorkingSetSize );
554 printf( "\tWorkingSetSize: 0x%08X\n", pmc.WorkingSetSize );
555 printf( "\tQuotaPeakPagedPoolUsage: 0x%08X\n", pmc.QuotaPeakPagedPoolUsage );
556 printf( "\tQuotaPagedPoolUsage: 0x%08X\n", pmc.QuotaPagedPoolUsage );
557 printf( "\tQuotaPeakNonPagedPoolUsage: 0x%08X\n", pmc.QuotaPeakNonPagedPoolUsage );
558 printf( "\tQuotaNonPagedPoolUsage: 0x%08X\n", pmc.QuotaNonPagedPoolUsage );
559 printf( "\tPagefileUsage: 0x%08X\n", pmc.PagefileUsage );
560 printf( "\tPeakPagefileUsage: 0x%08X\n", pmc.PeakPagefileUsage );
562 CloseHandle(hProcess);
572 /***********************************************************************/
573 int MothurOut::openOutputFileAppend(string fileName, ofstream& fileHandle){
575 fileName = getFullPathName(fileName);
577 fileHandle.open(fileName.c_str(), ios::app);
579 mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
586 catch(exception& e) {
587 errorOut(e, "MothurOut", "openOutputFileAppend");
591 /***********************************************************************/
592 void MothurOut::gobble(istream& f){
596 while(isspace(d=f.get())) { ;}
597 if(!f.eof()) { f.putback(d); }
599 catch(exception& e) {
600 errorOut(e, "MothurOut", "gobble");
604 /***********************************************************************/
605 void MothurOut::gobble(istringstream& f){
608 while(isspace(d=f.get())) {;}
609 if(!f.eof()) { f.putback(d); }
611 catch(exception& e) {
612 errorOut(e, "MothurOut", "gobble");
617 /***********************************************************************/
619 string MothurOut::getline(istringstream& fileHandle) {
624 while (!fileHandle.eof()) {
626 char c = fileHandle.get();
628 //are you at the end of the line
629 if ((c == '\n') || (c == '\r') || (c == '\f')){ break; }
636 catch(exception& e) {
637 errorOut(e, "MothurOut", "getline");
641 /***********************************************************************/
643 string MothurOut::getline(ifstream& fileHandle) {
650 char c = fileHandle.get();
652 //are you at the end of the line
653 if ((c == '\n') || (c == '\r') || (c == '\f') || (c == EOF)){ break; }
660 catch(exception& e) {
661 errorOut(e, "MothurOut", "getline");
665 /***********************************************************************/
667 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
668 #ifdef USE_COMPRESSION
669 inline bool endsWith(string s, const char * suffix){
670 size_t suffixLength = strlen(suffix);
671 return s.size() >= suffixLength && s.substr(s.size() - suffixLength, suffixLength).compare(suffix) == 0;
676 string MothurOut::getRootName(string longName){
679 string rootName = longName;
681 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
682 #ifdef USE_COMPRESSION
683 if (endsWith(rootName, ".gz") || endsWith(rootName, ".bz2")) {
684 int pos = rootName.find_last_of('.');
685 rootName = rootName.substr(0, pos);
686 cerr << "shortening " << longName << " to " << rootName << "\n";
690 if(rootName.find_last_of(".") != rootName.npos){
691 int pos = rootName.find_last_of('.')+1;
692 rootName = rootName.substr(0, pos);
697 catch(exception& e) {
698 errorOut(e, "MothurOut", "getRootName");
702 /***********************************************************************/
704 string MothurOut::getSimpleName(string longName){
706 string simpleName = longName;
709 found=longName.find_last_of("/\\");
711 if(found != longName.npos){
712 simpleName = longName.substr(found+1);
717 catch(exception& e) {
718 errorOut(e, "MothurOut", "getSimpleName");
723 /***********************************************************************/
725 int MothurOut::getRandomIndex(int highest){
728 int random = (int) ((float)(highest+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
732 catch(exception& e) {
733 errorOut(e, "MothurOut", "getRandomIndex");
738 /**********************************************************************/
740 string MothurOut::getPathName(string longName){
742 string rootPathName = longName;
744 if(longName.find_last_of("/\\") != longName.npos){
745 int pos = longName.find_last_of("/\\")+1;
746 rootPathName = longName.substr(0, pos);
751 catch(exception& e) {
752 errorOut(e, "MothurOut", "getPathName");
757 /***********************************************************************/
759 bool MothurOut::dirCheck(string& dirName){
765 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
770 //add / to name if needed
771 string lastChar = dirName.substr(dirName.length()-1);
772 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
773 if (lastChar != "/") { dirName += "/"; }
775 if (lastChar != "\\") { dirName += "\\"; }
778 //test to make sure directory exists
779 dirName = getFullPathName(dirName);
780 string outTemp = dirName + tag + "temp";
782 out.open(outTemp.c_str(), ios::trunc);
784 mothurOut(dirName + " directory does not exist or is not writable."); mothurOutEndLine();
787 mothurRemove(outTemp);
793 catch(exception& e) {
794 errorOut(e, "MothurOut", "dirCheck");
799 /***********************************************************************/
801 string MothurOut::hasPath(string longName){
806 found=longName.find_last_of("~/\\");
808 if(found != longName.npos){
809 path = longName.substr(0, found+1);
814 catch(exception& e) {
815 errorOut(e, "MothurOut", "hasPath");
820 /***********************************************************************/
822 string MothurOut::getExtension(string longName){
824 string extension = "";
826 if(longName.find_last_of('.') != longName.npos){
827 int pos = longName.find_last_of('.');
828 extension = longName.substr(pos, longName.length());
833 catch(exception& e) {
834 errorOut(e, "MothurOut", "getExtension");
838 /***********************************************************************/
839 bool MothurOut::isBlank(string fileName){
842 fileName = getFullPathName(fileName);
845 fileHandle.open(fileName.c_str());
847 mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
850 //check for blank file
852 if (fileHandle.eof()) { fileHandle.close(); return true; }
857 catch(exception& e) {
858 errorOut(e, "MothurOut", "isBlank");
862 /***********************************************************************/
864 string MothurOut::getFullPathName(string fileName){
867 string path = hasPath(fileName);
871 if (path == "") { return fileName; } //its a simple name
872 else { //we need to complete the pathname
873 // ex. ../../../filename
874 // cwd = /user/work/desktop
877 //get current working directory
878 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
880 if (path.find("~") != -1) { //go to home directory
883 char *homepath = NULL;
884 homepath = getenv ("HOME");
885 if ( homepath != NULL) { homeDir = homepath; }
886 else { homeDir = ""; }
888 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
891 if (path.rfind("./") == string::npos) { return fileName; } //already complete name
892 else { newFileName = fileName.substr(fileName.rfind("./")+2); } //save the complete part of the name
894 //char* cwdpath = new char[1024];
896 //cwdpath=getcwd(cwdpath,size);
899 char *cwdpath = NULL;
900 cwdpath = getcwd(NULL, 0); // or _getcwd
901 if ( cwdpath != NULL) { cwd = cwdpath; }
907 if (cwd.length() > 0) { simpleCWD = cwd.substr(1); }
909 //break apart the current working directory
911 while (simpleCWD.find_first_of('/') != string::npos) {
912 string dir = simpleCWD.substr(0,simpleCWD.find_first_of('/'));
913 simpleCWD = simpleCWD.substr(simpleCWD.find_first_of('/')+1, simpleCWD.length());
916 //get last one // ex. ../../../filename = /user/work/desktop/filename
917 dirs.push_back(simpleCWD); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
920 int index = dirs.size()-1;
922 while((pos = path.rfind("./")) != string::npos) { //while you don't have a complete path
923 if (pos == 0) { break; //you are at the end
924 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
925 path = path.substr(0, pos-1);
927 if (index == 0) { break; }
928 }else if (path[(pos-1)] == '/') { //you want the current working dir ./
929 path = path.substr(0, pos);
930 }else if (pos == 1) { break; //you are at the end
931 }else { mothurOut("cannot resolve path for " + fileName + "\n"); return fileName; }
934 for (int i = index; i >= 0; i--) {
935 newFileName = dirs[i] + "/" + newFileName;
938 newFileName = "/" + newFileName;
942 if (path.find("~") != string::npos) { //go to home directory
943 string homeDir = getenv ("HOMEPATH");
944 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
947 if (path.rfind(".\\") == string::npos) { return fileName; } //already complete name
948 else { newFileName = fileName.substr(fileName.rfind(".\\")+2); } //save the complete part of the name
950 char *cwdpath = NULL;
951 cwdpath = getcwd(NULL, 0); // or _getcwd
952 if ( cwdpath != NULL) { cwd = cwdpath; }
955 //break apart the current working directory
957 while (cwd.find_first_of('\\') != -1) {
958 string dir = cwd.substr(0,cwd.find_first_of('\\'));
959 cwd = cwd.substr(cwd.find_first_of('\\')+1, cwd.length());
964 dirs.push_back(cwd); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
966 int index = dirs.size()-1;
968 while((pos = path.rfind(".\\")) != string::npos) { //while you don't have a complete path
969 if (pos == 0) { break; //you are at the end
970 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
971 path = path.substr(0, pos-1);
973 if (index == 0) { break; }
974 }else if (path[(pos-1)] == '\\') { //you want the current working dir ./
975 path = path.substr(0, pos);
976 }else if (pos == 1) { break; //you are at the end
977 }else { mothurOut("cannot resolve path for " + fileName + "\n"); return fileName; }
980 for (int i = index; i >= 0; i--) {
981 newFileName = dirs[i] + "\\" + newFileName;
990 catch(exception& e) {
991 errorOut(e, "MothurOut", "getFullPathName");
995 /***********************************************************************/
997 int MothurOut::openInputFile(string fileName, ifstream& fileHandle, string m){
1000 string completeFileName = getFullPathName(fileName);
1001 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1002 #ifdef USE_COMPRESSION
1003 // check for gzipped or bzipped file
1004 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1005 string tempName = string(tmpnam(0));
1006 mkfifo(tempName.c_str(), 0666);
1007 int fork_result = fork();
1008 if (fork_result < 0) {
1009 cerr << "Error forking.\n";
1011 } else if (fork_result == 0) {
1012 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
1013 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1014 system(command.c_str());
1015 cerr << "Done decompressing " << completeFileName << "\n";
1016 mothurRemove(tempName);
1019 cerr << "waiting on child process " << fork_result << "\n";
1020 completeFileName = tempName;
1025 fileHandle.open(completeFileName.c_str());
1027 //mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1030 //check for blank file
1035 catch(exception& e) {
1036 errorOut(e, "MothurOut", "openInputFile - no Error");
1040 /***********************************************************************/
1042 int MothurOut::openInputFile(string fileName, ifstream& fileHandle){
1045 //get full path name
1046 string completeFileName = getFullPathName(fileName);
1047 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1048 #ifdef USE_COMPRESSION
1049 // check for gzipped or bzipped file
1050 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1051 string tempName = string(tmpnam(0));
1052 mkfifo(tempName.c_str(), 0666);
1053 int fork_result = fork();
1054 if (fork_result < 0) {
1055 cerr << "Error forking.\n";
1057 } else if (fork_result == 0) {
1058 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
1059 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1060 system(command.c_str());
1061 cerr << "Done decompressing " << completeFileName << "\n";
1062 mothurRemove(tempName);
1065 cerr << "waiting on child process " << fork_result << "\n";
1066 completeFileName = tempName;
1072 fileHandle.open(completeFileName.c_str());
1074 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1078 //check for blank file
1080 if (fileHandle.eof()) { mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct."); mothurOutEndLine(); }
1085 catch(exception& e) {
1086 errorOut(e, "MothurOut", "openInputFile");
1090 /***********************************************************************/
1092 int MothurOut::renameFile(string oldName, string newName){
1095 if (oldName == newName) { return 0; }
1098 int exist = openInputFile(newName, inTest, "");
1101 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1102 if (exist == 0) { //you could open it so you want to delete it
1103 string command = "rm " + newName;
1104 system(command.c_str());
1107 string command = "mv " + oldName + " " + newName;
1108 system(command.c_str());
1110 mothurRemove(newName);
1111 int renameOk = rename(oldName.c_str(), newName.c_str());
1116 catch(exception& e) {
1117 errorOut(e, "MothurOut", "renameFile");
1122 /***********************************************************************/
1124 int MothurOut::openOutputFile(string fileName, ofstream& fileHandle){
1127 string completeFileName = getFullPathName(fileName);
1128 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1129 #ifdef USE_COMPRESSION
1130 // check for gzipped file
1131 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1132 string tempName = string(tmpnam(0));
1133 mkfifo(tempName.c_str(), 0666);
1134 cerr << "Compressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1135 int fork_result = fork();
1136 if (fork_result < 0) {
1137 cerr << "Error forking.\n";
1139 } else if (fork_result == 0) {
1140 string command = string(endsWith(completeFileName, ".gz") ? "gzip" : "bzip2") + " -v > " + completeFileName + string(" < ") + tempName;
1141 system(command.c_str());
1144 completeFileName = tempName;
1149 fileHandle.open(completeFileName.c_str(), ios::trunc);
1151 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1158 catch(exception& e) {
1159 errorOut(e, "MothurOut", "openOutputFile");
1165 /**************************************************************************************************/
1166 int MothurOut::appendFiles(string temp, string filename) {
1171 //open output file in append mode
1172 openOutputFileAppend(filename, output);
1173 int ableToOpen = openInputFile(temp, input, "no error");
1174 //int ableToOpen = openInputFile(temp, input);
1177 if (ableToOpen == 0) { //you opened it
1180 while (!input.eof()) {
1181 input.read(buffer, 4096);
1182 output.write(buffer, input.gcount());
1183 //count number of lines
1184 for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} }
1193 catch(exception& e) {
1194 errorOut(e, "MothurOut", "appendFiles");
1198 /**************************************************************************************************/
1199 int MothurOut::appendFilesWithoutHeaders(string temp, string filename) {
1204 //open output file in append mode
1205 openOutputFileAppend(filename, output);
1206 int ableToOpen = openInputFile(temp, input, "no error");
1207 //int ableToOpen = openInputFile(temp, input);
1210 if (ableToOpen == 0) { //you opened it
1212 string headers = getline(input); gobble(input);
1213 if (debug) { mothurOut("[DEBUG]: skipping headers " + headers +'\n'); }
1216 while (!input.eof()) {
1217 input.read(buffer, 4096);
1218 output.write(buffer, input.gcount());
1219 //count number of lines
1220 for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} }
1229 catch(exception& e) {
1230 errorOut(e, "MothurOut", "appendFiles");
1234 /**************************************************************************************************/
1235 string MothurOut::sortFile(string distFile, string outputDir){
1238 //if (outputDir == "") { outputDir += hasPath(distFile); }
1239 string outfile = getRootName(distFile) + "sorted.dist";
1242 //if you can, use the unix sort since its been optimized for years
1243 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1244 string command = "sort -n -k +3 " + distFile + " -o " + outfile;
1245 system(command.c_str());
1246 #else //you are stuck with my best attempt...
1247 //windows sort does not have a way to specify a column, only a character in the line
1248 //since we cannot assume that the distance will always be at the the same character location on each line
1249 //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back.
1251 //read in file line by file and put distance first
1252 string tempDistFile = distFile + ".temp";
1255 openInputFile(distFile, input);
1256 openOutputFile(tempDistFile, output);
1258 string firstName, secondName;
1260 while (!input.eof()) {
1261 input >> firstName >> secondName >> dist;
1262 output << dist << '\t' << firstName << '\t' << secondName << endl;
1269 //sort using windows sort
1270 string tempOutfile = outfile + ".temp";
1271 string command = "sort " + tempDistFile + " /O " + tempOutfile;
1272 system(command.c_str());
1274 //read in sorted file and put distance at end again
1277 openInputFile(tempOutfile, input2);
1278 openOutputFile(outfile, output2);
1280 while (!input2.eof()) {
1281 input2 >> dist >> firstName >> secondName;
1282 output2 << firstName << '\t' << secondName << '\t' << dist << endl;
1289 mothurRemove(tempDistFile);
1290 mothurRemove(tempOutfile);
1295 catch(exception& e) {
1296 errorOut(e, "MothurOut", "sortFile");
1300 /**************************************************************************************************/
1301 vector<unsigned long long> MothurOut::setFilePosFasta(string filename, int& num) {
1303 vector<unsigned long long> positions;
1305 //openInputFile(filename, inFASTA);
1306 inFASTA.open(filename.c_str(), ios::binary);
1309 unsigned long long count = 0;
1310 while(!inFASTA.eof()){
1311 //input = getline(inFASTA);
1312 //cout << input << '\t' << inFASTA.tellg() << endl;
1313 //if (input.length() != 0) {
1314 // if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); cout << (pos - input.length() - 1) << endl; }
1316 //gobble(inFASTA); //has to be here since windows line endings are 2 characters and mess up the positions
1317 char c = inFASTA.get(); count++;
1319 positions.push_back(count-1);
1320 if (debug) { mothurOut("[DEBUG]: numSeqs = " + toString(positions.size()) + " count = " + toString(count) + ".\n"); }
1325 num = positions.size();
1326 if (debug) { mothurOut("[DEBUG]: num = " + toString(num) + ".\n"); }
1328 unsigned long long size;
1330 //get num bytes in file
1331 pFile = fopen (filename.c_str(),"rb");
1332 if (pFile==NULL) perror ("Error opening file");
1334 fseek (pFile, 0, SEEK_END);
1339 /*unsigned long long size = positions[(positions.size()-1)];
1341 openInputFile(filename, in);
1346 if(in.eof()) { break; }
1351 if (debug) { mothurOut("[DEBUG]: size = " + toString(size) + ".\n"); }
1353 positions.push_back(size);
1358 catch(exception& e) {
1359 errorOut(e, "MothurOut", "setFilePosFasta");
1363 /**************************************************************************************************/
1364 vector<unsigned long long> MothurOut::setFilePosEachLine(string filename, int& num) {
1366 filename = getFullPathName(filename);
1368 vector<unsigned long long> positions;
1370 //openInputFile(filename, in);
1371 in.open(filename.c_str(), ios::binary);
1374 unsigned long long count = 0;
1375 positions.push_back(0);
1378 //getline counting reads
1379 char d = in.get(); count++;
1380 while ((d != '\n') && (d != '\r') && (d != '\f') && (d != in.eof())) {
1381 //get next character
1387 d=in.get(); count++;
1388 while(isspace(d) && (d != in.eof())) { d=in.get(); count++;}
1390 positions.push_back(count-1);
1391 //cout << count-1 << endl;
1395 num = positions.size()-1;
1398 unsigned long long size;
1400 //get num bytes in file
1401 pFile = fopen (filename.c_str(),"rb");
1402 if (pFile==NULL) perror ("Error opening file");
1404 fseek (pFile, 0, SEEK_END);
1409 positions[(positions.size()-1)] = size;
1413 catch(exception& e) {
1414 errorOut(e, "MothurOut", "setFilePosEachLine");
1418 /**************************************************************************************************/
1420 vector<unsigned long long> MothurOut::divideFile(string filename, int& proc) {
1422 vector<unsigned long long> filePos;
1423 filePos.push_back(0);
1426 unsigned long long size;
1428 filename = getFullPathName(filename);
1430 //get num bytes in file
1431 pFile = fopen (filename.c_str(),"rb");
1432 if (pFile==NULL) perror ("Error opening file");
1434 fseek (pFile, 0, SEEK_END);
1439 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1441 //estimate file breaks
1442 unsigned long long chunkSize = 0;
1443 chunkSize = size / proc;
1445 //file to small to divide by processors
1446 if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; }
1448 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
1449 for (int i = 0; i < proc; i++) {
1450 unsigned long long spot = (i+1) * chunkSize;
1453 openInputFile(filename, in);
1457 unsigned long long newSpot = spot;
1461 if (c == '>') { in.putback(c); newSpot = in.tellg(); break; }
1462 else if (int(c) == -1) { break; }
1466 //there was not another sequence before the end of the file
1467 unsigned long long sanityPos = in.tellg();
1469 if (sanityPos == -1) { break; }
1470 else { filePos.push_back(newSpot); }
1476 filePos.push_back(size);
1478 //sanity check filePos
1479 for (int i = 0; i < (filePos.size()-1); i++) {
1480 if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; }
1483 proc = (filePos.size() - 1);
1485 mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine();
1487 filePos.push_back(size);
1491 catch(exception& e) {
1492 errorOut(e, "MothurOut", "divideFile");
1496 /**************************************************************************************************/
1498 vector<unsigned long long> MothurOut::divideFilePerLine(string filename, int& proc) {
1500 vector<unsigned long long> filePos;
1501 filePos.push_back(0);
1504 unsigned long long size;
1506 filename = getFullPathName(filename);
1508 //get num bytes in file
1509 pFile = fopen (filename.c_str(),"rb");
1510 if (pFile==NULL) perror ("Error opening file");
1512 fseek (pFile, 0, SEEK_END);
1517 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1519 //estimate file breaks
1520 unsigned long long chunkSize = 0;
1521 chunkSize = size / proc;
1523 //file to small to divide by processors
1524 if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; }
1526 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
1527 for (int i = 0; i < proc; i++) {
1528 unsigned long long spot = (i+1) * chunkSize;
1531 openInputFile(filename, in);
1534 //look for next line break
1535 unsigned long long newSpot = spot;
1539 if ((c == '\n') || (c == '\r') || (c == '\f')) { gobble(in); newSpot = in.tellg(); break; }
1540 else if (int(c) == -1) { break; }
1543 //there was not another line before the end of the file
1544 unsigned long long sanityPos = in.tellg();
1546 if (sanityPos == -1) { break; }
1547 else { filePos.push_back(newSpot); }
1553 filePos.push_back(size);
1555 //sanity check filePos
1556 for (int i = 0; i < (filePos.size()-1); i++) {
1557 if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; }
1560 proc = (filePos.size() - 1);
1562 mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine();
1564 filePos.push_back(size);
1568 catch(exception& e) {
1569 errorOut(e, "MothurOut", "divideFile");
1573 /**************************************************************************************************/
1574 int MothurOut::divideFile(string filename, int& proc, vector<string>& files) {
1577 vector<unsigned long long> filePos = divideFile(filename, proc);
1579 for (int i = 0; i < (filePos.size()-1); i++) {
1583 openInputFile(filename, in);
1584 in.seekg(filePos[i]);
1585 unsigned long long size = filePos[(i+1)] - filePos[i];
1586 char* chunk = new char[size];
1587 in.read(chunk, size);
1591 string fileChunkName = filename + "." + toString(i) + ".tmp";
1593 openOutputFile(fileChunkName, out);
1595 out << chunk << endl;
1600 files.push_back(fileChunkName);
1605 catch(exception& e) {
1606 errorOut(e, "MothurOut", "divideFile");
1610 /***********************************************************************/
1612 bool MothurOut::isTrue(string f){
1615 for (int i = 0; i < f.length(); i++) { f[i] = toupper(f[i]); }
1617 if ((f == "TRUE") || (f == "T")) { return true; }
1618 else { return false; }
1620 catch(exception& e) {
1621 errorOut(e, "MothurOut", "isTrue");
1626 /***********************************************************************/
1628 float MothurOut::roundDist(float dist, int precision){
1630 return int(dist * precision + 0.5)/float(precision);
1632 catch(exception& e) {
1633 errorOut(e, "MothurOut", "roundDist");
1637 /***********************************************************************/
1639 float MothurOut::ceilDist(float dist, int precision){
1641 return int(ceil(dist * precision))/float(precision);
1643 catch(exception& e) {
1644 errorOut(e, "MothurOut", "ceilDist");
1648 /***********************************************************************/
1650 vector<string> MothurOut::splitWhiteSpace(string& rest, char buffer[], int size){
1652 vector<string> pieces;
1654 for (int i = 0; i < size; i++) {
1655 if (!isspace(buffer[i])) { rest += buffer[i]; }
1657 if (rest != "") { pieces.push_back(rest); rest = ""; }
1658 while (i < size) { //gobble white space
1659 if (isspace(buffer[i])) { i++; }
1660 else { rest = buffer[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1667 catch(exception& e) {
1668 errorOut(e, "MothurOut", "splitWhiteSpace");
1672 /***********************************************************************/
1673 vector<string> MothurOut::splitWhiteSpace(string input){
1675 vector<string> pieces;
1678 for (int i = 0; i < input.length(); i++) {
1679 if (!isspace(input[i])) { rest += input[i]; }
1681 if (rest != "") { pieces.push_back(rest); rest = ""; }
1682 while (i < input.length()) { //gobble white space
1683 if (isspace(input[i])) { i++; }
1684 else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1689 if (rest != "") { pieces.push_back(rest); }
1693 catch(exception& e) {
1694 errorOut(e, "MothurOut", "splitWhiteSpace");
1698 /***********************************************************************/
1699 vector<string> MothurOut::splitWhiteSpaceWithQuotes(string input){
1701 vector<string> pieces;
1704 int pos = input.find('\'');
1705 int pos2 = input.find('\"');
1707 if ((pos == string::npos) && (pos2 == string::npos)) { return splitWhiteSpace(input); } //no quotes to worry about
1709 for (int i = 0; i < input.length(); i++) {
1710 if ((input[i] == '\'') || (input[i] == '\"') || (rest == "\'") || (rest == "\"")) { //grab everything til end or next ' or "
1712 for (int j = i+1; j < input.length(); j++) {
1713 if ((input[j] == '\'') || (input[j] == '\"')) { //then quit
1717 }else { rest += input[j]; }
1719 }else if (!isspace(input[i])) { rest += input[i]; }
1721 if (rest != "") { pieces.push_back(rest); rest = ""; }
1722 while (i < input.length()) { //gobble white space
1723 if (isspace(input[i])) { i++; }
1724 else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1729 if (rest != "") { pieces.push_back(rest); }
1733 catch(exception& e) {
1734 errorOut(e, "MothurOut", "splitWhiteSpace");
1738 //**********************************************************************************************************************
1739 int MothurOut::readTax(string namefile, map<string, string>& taxMap) {
1743 openInputFile(namefile, in);
1747 bool pairDone = false;
1748 bool columnOne = true;
1749 string firstCol, secondCol;
1752 if (control_pressed) { break; }
1754 in.read(buffer, 4096);
1755 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1757 for (int i = 0; i < pieces.size(); i++) {
1758 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1759 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1762 checkName(firstCol);
1763 //are there confidence scores, if so remove them
1764 if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
1765 map<string, string>::iterator itTax = taxMap.find(firstCol);
1767 if(itTax == taxMap.end()) {
1768 bool ignore = false;
1769 if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
1771 if (!ignore) { taxMap[firstCol] = secondCol; }
1772 if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
1774 mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true;
1783 vector<string> pieces = splitWhiteSpace(rest);
1785 for (int i = 0; i < pieces.size(); i++) {
1786 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1787 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1790 checkName(firstCol);
1791 //are there confidence scores, if so remove them
1792 if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
1793 map<string, string>::iterator itTax = taxMap.find(firstCol);
1795 if(itTax == taxMap.end()) {
1796 bool ignore = false;
1797 if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
1799 if (!ignore) { taxMap[firstCol] = secondCol; }
1800 if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
1802 mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true;
1810 return taxMap.size();
1813 catch(exception& e) {
1814 errorOut(e, "MothurOut", "readTax");
1818 /**********************************************************************************************************************/
1819 int MothurOut::readNames(string namefile, map<string, string>& nameMap, bool redund) {
1823 openInputFile(namefile, in);
1827 bool pairDone = false;
1828 bool columnOne = true;
1829 string firstCol, secondCol;
1832 if (control_pressed) { break; }
1834 in.read(buffer, 4096);
1835 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1837 for (int i = 0; i < pieces.size(); i++) {
1838 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1839 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1842 checkName(firstCol);
1843 checkName(secondCol);
1845 //parse names into vector
1846 vector<string> theseNames;
1847 splitAtComma(secondCol, theseNames);
1848 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
1856 vector<string> pieces = splitWhiteSpace(rest);
1858 for (int i = 0; i < pieces.size(); i++) {
1859 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1860 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1863 checkName(firstCol);
1864 checkName(secondCol);
1866 //parse names into vector
1867 vector<string> theseNames;
1868 splitAtComma(secondCol, theseNames);
1869 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
1875 return nameMap.size();
1878 catch(exception& e) {
1879 errorOut(e, "MothurOut", "readNames");
1883 /**********************************************************************************************************************/
1884 int MothurOut::readNames(string namefile, map<string, string>& nameMap, int flip) {
1888 openInputFile(namefile, in);
1892 bool pairDone = false;
1893 bool columnOne = true;
1894 string firstCol, secondCol;
1897 if (control_pressed) { break; }
1899 in.read(buffer, 4096);
1900 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1902 for (int i = 0; i < pieces.size(); i++) {
1903 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1904 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1907 checkName(firstCol);
1908 checkName(secondCol);
1909 nameMap[secondCol] = firstCol;
1917 vector<string> pieces = splitWhiteSpace(rest);
1919 for (int i = 0; i < pieces.size(); i++) {
1920 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1921 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1924 checkName(firstCol);
1925 checkName(secondCol);
1926 nameMap[secondCol] = firstCol;
1932 return nameMap.size();
1935 catch(exception& e) {
1936 errorOut(e, "MothurOut", "readNames");
1940 /**********************************************************************************************************************/
1941 int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<string, int>& nameCount) {
1943 nameMap.clear(); nameCount.clear();
1946 openInputFile(namefile, in);
1950 bool pairDone = false;
1951 bool columnOne = true;
1952 string firstCol, secondCol;
1955 if (control_pressed) { break; }
1957 in.read(buffer, 4096);
1958 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1960 for (int i = 0; i < pieces.size(); i++) {
1961 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1962 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1965 checkName(firstCol);
1966 checkName(secondCol);
1967 //parse names into vector
1968 vector<string> theseNames;
1969 splitAtComma(secondCol, theseNames);
1970 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
1971 nameCount[firstCol] = theseNames.size();
1979 vector<string> pieces = splitWhiteSpace(rest);
1981 for (int i = 0; i < pieces.size(); i++) {
1982 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1983 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1986 checkName(firstCol);
1987 checkName(secondCol);
1988 //parse names into vector
1989 vector<string> theseNames;
1990 splitAtComma(secondCol, theseNames);
1991 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
1992 nameCount[firstCol] = theseNames.size();
1998 return nameMap.size();
2001 catch(exception& e) {
2002 errorOut(e, "MothurOut", "readNames");
2006 /**********************************************************************************************************************/
2007 int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
2011 openInputFile(namefile, in);
2015 bool pairDone = false;
2016 bool columnOne = true;
2017 string firstCol, secondCol;
2020 if (control_pressed) { break; }
2022 in.read(buffer, 4096);
2023 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2025 for (int i = 0; i < pieces.size(); i++) {
2026 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2027 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2030 checkName(firstCol);
2031 checkName(secondCol);
2032 nameMap[firstCol] = secondCol; pairDone = false; }
2038 vector<string> pieces = splitWhiteSpace(rest);
2040 for (int i = 0; i < pieces.size(); i++) {
2041 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2042 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2045 checkName(firstCol);
2046 checkName(secondCol);
2047 nameMap[firstCol] = secondCol; pairDone = false; }
2051 return nameMap.size();
2054 catch(exception& e) {
2055 errorOut(e, "MothurOut", "readNames");
2059 /**********************************************************************************************************************/
2060 int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap) {
2064 openInputFile(namefile, in);
2068 bool pairDone = false;
2069 bool columnOne = true;
2070 string firstCol, secondCol;
2073 if (control_pressed) { break; }
2075 in.read(buffer, 4096);
2076 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2078 for (int i = 0; i < pieces.size(); i++) {
2079 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2080 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2083 checkName(firstCol);
2084 checkName(secondCol);
2085 vector<string> temp;
2086 splitAtComma(secondCol, temp);
2087 nameMap[firstCol] = temp;
2095 vector<string> pieces = splitWhiteSpace(rest);
2097 for (int i = 0; i < pieces.size(); i++) {
2098 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2099 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2102 checkName(firstCol);
2103 checkName(secondCol);
2104 vector<string> temp;
2105 splitAtComma(secondCol, temp);
2106 nameMap[firstCol] = temp;
2112 return nameMap.size();
2114 catch(exception& e) {
2115 errorOut(e, "MothurOut", "readNames");
2119 /**********************************************************************************************************************/
2120 map<string, int> MothurOut::readNames(string namefile) {
2122 map<string, int> nameMap;
2126 openInputFile(namefile, in);
2130 bool pairDone = false;
2131 bool columnOne = true;
2132 string firstCol, secondCol;
2135 if (control_pressed) { break; }
2137 in.read(buffer, 4096);
2138 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2140 for (int i = 0; i < pieces.size(); i++) {
2141 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2142 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2145 checkName(firstCol);
2146 checkName(secondCol);
2147 int num = getNumNames(secondCol);
2148 nameMap[firstCol] = num;
2156 vector<string> pieces = splitWhiteSpace(rest);
2157 for (int i = 0; i < pieces.size(); i++) {
2158 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2159 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2162 checkName(firstCol);
2163 checkName(secondCol);
2164 int num = getNumNames(secondCol);
2165 nameMap[firstCol] = num;
2174 catch(exception& e) {
2175 errorOut(e, "MothurOut", "readNames");
2179 /**********************************************************************************************************************/
2180 map<string, int> MothurOut::readNames(string namefile, unsigned long int& numSeqs) {
2182 map<string, int> nameMap;
2187 openInputFile(namefile, in);
2191 bool pairDone = false;
2192 bool columnOne = true;
2193 string firstCol, secondCol;
2196 if (control_pressed) { break; }
2198 in.read(buffer, 4096);
2199 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2201 for (int i = 0; i < pieces.size(); i++) {
2202 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2203 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2206 checkName(firstCol);
2207 checkName(secondCol);
2208 int num = getNumNames(secondCol);
2209 nameMap[firstCol] = num;
2218 vector<string> pieces = splitWhiteSpace(rest);
2219 for (int i = 0; i < pieces.size(); i++) {
2220 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2221 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2224 checkName(firstCol);
2225 checkName(secondCol);
2226 int num = getNumNames(secondCol);
2227 nameMap[firstCol] = num;
2237 catch(exception& e) {
2238 errorOut(e, "MothurOut", "readNames");
2242 /************************************************************/
2243 int MothurOut::checkName(string& name) {
2245 for (int i = 0; i < name.length(); i++) {
2246 if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; }
2250 catch(exception& e) {
2251 errorOut(e, "MothurOut", "checkName");
2255 /**********************************************************************************************************************/
2256 int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, map<string, string>& fastamap) {
2262 openInputFile(namefile, in);
2266 bool pairDone = false;
2267 bool columnOne = true;
2268 string firstCol, secondCol;
2271 if (control_pressed) { break; }
2273 in.read(buffer, 4096);
2274 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2276 for (int i = 0; i < pieces.size(); i++) {
2277 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2278 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2281 checkName(firstCol);
2282 checkName(secondCol);
2283 int num = getNumNames(secondCol);
2285 map<string, string>::iterator it = fastamap.find(firstCol);
2286 if (it == fastamap.end()) {
2288 mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
2290 seqPriorityNode temp(num, it->second, firstCol);
2291 nameVector.push_back(temp);
2301 vector<string> pieces = splitWhiteSpace(rest);
2303 for (int i = 0; i < pieces.size(); i++) {
2304 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2305 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2308 checkName(firstCol);
2309 checkName(secondCol);
2310 int num = getNumNames(secondCol);
2312 map<string, string>::iterator it = fastamap.find(firstCol);
2313 if (it == fastamap.end()) {
2315 mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
2317 seqPriorityNode temp(num, it->second, firstCol);
2318 nameVector.push_back(temp);
2327 catch(exception& e) {
2328 errorOut(e, "MothurOut", "readNames");
2332 //**********************************************************************************************************************
2333 set<string> MothurOut::readAccnos(string accnosfile){
2337 openInputFile(accnosfile, in);
2344 if (control_pressed) { break; }
2346 in.read(buffer, 4096);
2347 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2349 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); }
2354 vector<string> pieces = splitWhiteSpace(rest);
2355 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); }
2359 catch(exception& e) {
2360 errorOut(e, "MothurOut", "readAccnos");
2364 //**********************************************************************************************************************
2365 int MothurOut::readAccnos(string accnosfile, vector<string>& names){
2369 openInputFile(accnosfile, in);
2376 if (control_pressed) { break; }
2378 in.read(buffer, 4096);
2379 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2381 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
2386 vector<string> pieces = splitWhiteSpace(rest);
2387 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
2392 catch(exception& e) {
2393 errorOut(e, "MothurOut", "readAccnos");
2397 /***********************************************************************/
2399 int MothurOut::getNumNames(string names){
2405 for(int i=0;i<names.size();i++){
2406 if(names[i] == ','){
2414 catch(exception& e) {
2415 errorOut(e, "MothurOut", "getNumNames");
2419 /***********************************************************************/
2421 int MothurOut::getNumChar(string line, char c){
2426 for(int i=0;i<line.size();i++){
2435 catch(exception& e) {
2436 errorOut(e, "MothurOut", "getNumChar");
2440 //**********************************************************************************************************************
2441 bool MothurOut::isSubset(vector<string> bigset, vector<string> subset) {
2445 if (subset.size() > bigset.size()) { return false; }
2447 //check if each guy in suset is also in bigset
2448 for (int i = 0; i < subset.size(); i++) {
2450 for (int j = 0; j < bigset.size(); j++) {
2451 if (subset[i] == bigset[j]) { match = true; break; }
2454 //you have a guy in subset that had no match in bigset
2455 if (match == false) { return false; }
2461 catch(exception& e) {
2462 errorOut(e, "MothurOut", "isSubset");
2466 /***********************************************************************/
2467 int MothurOut::mothurRemove(string filename){
2469 filename = getFullPathName(filename);
2470 int error = remove(filename.c_str());
2472 // if (errno != ENOENT) { //ENOENT == file does not exist
2473 // string message = "Error deleting file " + filename;
2474 // perror(message.c_str());
2479 catch(exception& e) {
2480 errorOut(e, "MothurOut", "mothurRemove");
2484 /***********************************************************************/
2485 bool MothurOut::mothurConvert(string item, int& num){
2489 if (isNumeric1(item)) {
2494 mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine();
2495 commandInputsConvertError = true;
2500 catch(exception& e) {
2501 errorOut(e, "MothurOut", "mothurConvert");
2505 /***********************************************************************/
2506 bool MothurOut::mothurConvert(string item, intDist& num){
2510 if (isNumeric1(item)) {
2515 mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine();
2516 commandInputsConvertError = true;
2521 catch(exception& e) {
2522 errorOut(e, "MothurOut", "mothurConvert");
2527 /***********************************************************************/
2528 bool MothurOut::isNumeric1(string stringToCheck){
2530 bool numeric = false;
2532 if(stringToCheck.find_first_not_of("0123456789.-") == string::npos) { numeric = true; }
2536 catch(exception& e) {
2537 errorOut(e, "MothurOut", "isNumeric1");
2542 /***********************************************************************/
2543 bool MothurOut::mothurConvert(string item, float& num){
2547 if (isNumeric1(item)) {
2552 mothurOut("[ERROR]: cannot convert " + item + " to a float."); mothurOutEndLine();
2553 commandInputsConvertError = true;
2558 catch(exception& e) {
2559 errorOut(e, "MothurOut", "mothurConvert");
2563 /***********************************************************************/
2564 bool MothurOut::mothurConvert(string item, double& num){
2568 if (isNumeric1(item)) {
2573 mothurOut("[ERROR]: cannot convert " + item + " to a double."); mothurOutEndLine();
2574 commandInputsConvertError = true;
2579 catch(exception& e) {
2580 errorOut(e, "MothurOut", "mothurConvert");
2584 /**************************************************************************************************/
2586 vector<vector<double> > MothurOut::binomial(int maxOrder){
2588 vector<vector<double> > binomial(maxOrder+1);
2590 for(int i=0;i<=maxOrder;i++){
2591 binomial[i].resize(maxOrder+1);
2600 for(int i=2;i<=maxOrder;i++){
2604 for(int i=2;i<=maxOrder;i++){
2605 for(int j=1;j<=maxOrder;j++){
2606 if(i==j){ binomial[i][j]=1; }
2607 if(j>i) { binomial[i][j]=0; }
2608 else { binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j]; }
2615 catch(exception& e) {
2616 errorOut(e, "MothurOut", "binomial");
2620 /**************************************************************************************************/
2621 unsigned int MothurOut::fromBase36(string base36){
2623 unsigned int num = 0;
2625 map<char, int> converts;
2690 while (i < base36.length()) {
2692 num = 36 * num + converts[c];
2699 catch(exception& e) {
2700 errorOut(e, "MothurOut", "fromBase36");
2704 /***********************************************************************/
2706 int MothurOut::factorial(int num){
2710 for (int i = 1; i <= num; i++) {
2716 catch(exception& e) {
2717 errorOut(e, "MothurOut", "factorial");
2721 /***********************************************************************/
2723 int MothurOut::getNumSeqs(ifstream& file){
2725 int numSeqs = count(istreambuf_iterator<char>(file),istreambuf_iterator<char>(), '>');
2729 catch(exception& e) {
2730 errorOut(e, "MothurOut", "getNumSeqs");
2734 /***********************************************************************/
2735 void MothurOut::getNumSeqs(ifstream& file, int& numSeqs){
2740 input = getline(file);
2741 if (input.length() != 0) {
2742 if(input[0] == '>'){ numSeqs++; }
2746 catch(exception& e) {
2747 errorOut(e, "MothurOut", "getNumSeqs");
2751 /***********************************************************************/
2753 //This function parses the estimator options and puts them in a vector
2754 void MothurOut::splitAtChar(string& estim, vector<string>& container, char symbol) {
2757 if (symbol == '-') { splitAtDash(estim, container); return; }
2759 string individual = "";
2760 int estimLength = estim.size();
2761 for(int i=0;i<estimLength;i++){
2762 if(estim[i] == symbol){
2763 container.push_back(individual);
2767 individual += estim[i];
2770 container.push_back(individual);
2773 catch(exception& e) {
2774 errorOut(e, "MothurOut", "splitAtChar");
2779 /***********************************************************************/
2781 //This function parses the estimator options and puts them in a vector
2782 void MothurOut::splitAtDash(string& estim, vector<string>& container) {
2784 string individual = "";
2785 int estimLength = estim.size();
2786 bool prevEscape = false;
2787 /*for(int i=0;i<estimLength;i++){
2789 individual += estim[i];
2793 if(estim[i] == '\\'){
2796 else if(estim[i] == '-'){
2797 container.push_back(individual);
2802 individual += estim[i];
2809 for(int i=0;i<estimLength;i++){
2810 if(estim[i] == '-'){
2811 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
2813 container.push_back(individual);
2816 }else if(estim[i] == '\\'){
2817 if (i < estimLength-1) {
2818 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
2819 else { individual += estim[i]; prevEscape = false; } //if no, add in
2820 }else { individual += estim[i]; }
2822 individual += estim[i];
2828 container.push_back(individual);
2830 catch(exception& e) {
2831 errorOut(e, "MothurOut", "splitAtDash");
2836 /***********************************************************************/
2837 //This function parses the label options and puts them in a set
2838 void MothurOut::splitAtDash(string& estim, set<string>& container) {
2840 string individual = "";
2841 int estimLength = estim.size();
2842 bool prevEscape = false;
2844 for(int i=0;i<estimLength;i++){
2846 individual += estim[i];
2850 if(estim[i] == '\\'){
2853 else if(estim[i] == '-'){
2854 container.insert(individual);
2859 individual += estim[i];
2866 for(int i=0;i<estimLength;i++){
2867 if(estim[i] == '-'){
2868 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
2870 container.insert(individual);
2873 }else if(estim[i] == '\\'){
2874 if (i < estimLength-1) {
2875 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
2876 else { individual += estim[i]; prevEscape = false; } //if no, add in
2877 }else { individual += estim[i]; }
2879 individual += estim[i];
2882 container.insert(individual);
2885 catch(exception& e) {
2886 errorOut(e, "MothurOut", "splitAtDash");
2890 /***********************************************************************/
2891 //This function parses the line options and puts them in a set
2892 void MothurOut::splitAtDash(string& estim, set<int>& container) {
2894 string individual = "";
2896 int estimLength = estim.size();
2897 bool prevEscape = false;
2899 for(int i=0;i<estimLength;i++){
2901 individual += estim[i];
2905 if(estim[i] == '\\'){
2908 else if(estim[i] == '-'){
2909 convert(individual, lineNum); //convert the string to int
2910 container.insert(lineNum);
2915 individual += estim[i];
2921 for(int i=0;i<estimLength;i++){
2922 if(estim[i] == '-'){
2923 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
2925 convert(individual, lineNum); //convert the string to int
2926 container.insert(lineNum);
2929 }else if(estim[i] == '\\'){
2930 if (i < estimLength-1) {
2931 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
2932 else { individual += estim[i]; prevEscape = false; } //if no, add in
2933 }else { individual += estim[i]; }
2935 individual += estim[i];
2939 convert(individual, lineNum); //convert the string to int
2940 container.insert(lineNum);
2942 catch(exception& e) {
2943 errorOut(e, "MothurOut", "splitAtDash");
2948 /***********************************************************************/
2949 string MothurOut::makeList(vector<string>& names) {
2953 if (names.size() == 0) { return list; }
2955 for (int i = 0; i < names.size()-1; i++) { list += names[i] + ","; }
2958 list += names[names.size()-1];
2962 catch(exception& e) {
2963 errorOut(e, "MothurOut", "makeList");
2968 /***********************************************************************/
2969 //This function parses the a string and puts peices in a vector
2970 void MothurOut::splitAtComma(string& estim, vector<string>& container) {
2972 string individual = "";
2973 int estimLength = estim.size();
2974 for(int i=0;i<estimLength;i++){
2975 if(estim[i] == ','){
2976 container.push_back(individual);
2980 individual += estim[i];
2983 container.push_back(individual);
2988 // string individual;
2990 // while (estim.find_first_of(',') != -1) {
2991 // individual = estim.substr(0,estim.find_first_of(','));
2992 // if ((estim.find_first_of(',')+1) <= estim.length()) { //checks to make sure you don't have comma at end of string
2993 // estim = estim.substr(estim.find_first_of(',')+1, estim.length());
2994 // container.push_back(individual);
2998 // container.push_back(estim);
3000 catch(exception& e) {
3001 errorOut(e, "MothurOut", "splitAtComma");
3005 /***********************************************************************/
3006 //This function splits up the various option parameters
3007 void MothurOut::splitAtChar(string& prefix, string& suffix, char c){
3009 prefix = suffix.substr(0,suffix.find_first_of(c));
3010 if ((suffix.find_first_of(c)+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
3011 suffix = suffix.substr(suffix.find_first_of(c)+1, suffix.length());
3013 while(suffix.at(0) == ' ')
3014 suffix = suffix.substr(1, suffix.length());
3015 }else { suffix = ""; }
3018 catch(exception& e) {
3019 errorOut(e, "MothurOut", "splitAtChar");
3024 /***********************************************************************/
3026 //This function splits up the various option parameters
3027 void MothurOut::splitAtComma(string& prefix, string& suffix){
3029 prefix = suffix.substr(0,suffix.find_first_of(','));
3030 if ((suffix.find_first_of(',')+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
3031 suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
3033 while(suffix.at(0) == ' ')
3034 suffix = suffix.substr(1, suffix.length());
3035 }else { suffix = ""; }
3038 catch(exception& e) {
3039 errorOut(e, "MothurOut", "splitAtComma");
3043 /***********************************************************************/
3045 //This function separates the key value from the option value i.e. dist=96_...
3046 void MothurOut::splitAtEquals(string& key, string& value){
3048 if(value.find_first_of('=') != -1){
3049 key = value.substr(0,value.find_first_of('='));
3050 if ((value.find_first_of('=')+1) <= value.length()) {
3051 value = value.substr(value.find_first_of('=')+1, value.length());
3058 catch(exception& e) {
3059 errorOut(e, "MothurOut", "splitAtEquals");
3064 /**************************************************************************************************/
3066 bool MothurOut::inUsersGroups(string groupname, vector<string> Groups) {
3068 for (int i = 0; i < Groups.size(); i++) {
3069 if (groupname == Groups[i]) { return true; }
3073 catch(exception& e) {
3074 errorOut(e, "MothurOut", "inUsersGroups");
3078 /**************************************************************************************************/
3080 bool MothurOut::inUsersGroups(vector<int> set, vector< vector<int> > sets) {
3082 for (int i = 0; i < sets.size(); i++) {
3083 if (set == sets[i]) { return true; }
3087 catch(exception& e) {
3088 errorOut(e, "MothurOut", "inUsersGroups");
3092 /**************************************************************************************************/
3094 bool MothurOut::inUsersGroups(int groupname, vector<int> Groups) {
3096 for (int i = 0; i < Groups.size(); i++) {
3097 if (groupname == Groups[i]) { return true; }
3101 catch(exception& e) {
3102 errorOut(e, "MothurOut", "inUsersGroups");
3107 /**************************************************************************************************/
3108 //returns true if any of the strings in first vector are in second vector
3109 bool MothurOut::inUsersGroups(vector<string> groupnames, vector<string> Groups) {
3112 for (int i = 0; i < groupnames.size(); i++) {
3113 if (inUsersGroups(groupnames[i], Groups)) { return true; }
3117 catch(exception& e) {
3118 errorOut(e, "MothurOut", "inUsersGroups");
3122 /***********************************************************************/
3123 //this function determines if the user has given us labels that are smaller than the given label.
3124 //if so then it returns true so that the calling function can run the previous valid distance.
3125 //it's a "smart" distance function. It also checks for invalid labels.
3126 bool MothurOut::anyLabelsToProcess(string label, set<string>& userLabels, string errorOff) {
3129 set<string>::iterator it;
3130 vector<float> orderFloat;
3131 map<string, float> userMap; //the conversion process removes trailing 0's which we need to put back
3132 map<string, float>::iterator it2;
3134 bool smaller = false;
3136 //unique is the smallest line
3137 if (label == "unique") { return false; }
3139 if (convertTestFloat(label, labelFloat)) {
3140 convert(label, labelFloat);
3141 }else { //cant convert
3146 //go through users set and make them floats
3147 for(it = userLabels.begin(); it != userLabels.end();) {
3150 if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){
3152 orderFloat.push_back(temp);
3153 userMap[*it] = temp;
3155 }else if (*it == "unique") {
3156 orderFloat.push_back(-1.0);
3157 userMap["unique"] = -1.0;
3160 if (errorOff == "") { mothurOut(*it + " is not a valid label."); mothurOutEndLine(); }
3161 userLabels.erase(it++);
3166 sort(orderFloat.begin(), orderFloat.end());
3168 /*************************************************/
3169 //is this label bigger than any of the users labels
3170 /*************************************************/
3172 //loop through order until you find a label greater than label
3173 for (int i = 0; i < orderFloat.size(); i++) {
3174 if (orderFloat[i] < labelFloat) {
3176 if (orderFloat[i] == -1) {
3177 if (errorOff == "") { mothurOut("Your file does not include the label unique."); mothurOutEndLine(); }
3178 userLabels.erase("unique");
3181 if (errorOff == "") { mothurOut("Your file does not include the label "); mothurOutEndLine(); }
3183 for (it2 = userMap.begin(); it2!= userMap.end(); it2++) {
3184 if (it2->second == orderFloat[i]) {
3186 //remove small labels
3187 userLabels.erase(s);
3191 if (errorOff == "") {mothurOut( s + ". I will use the next smallest distance. "); mothurOutEndLine(); }
3193 //since they are sorted once you find a bigger one stop looking
3200 catch(exception& e) {
3201 errorOut(e, "MothurOut", "anyLabelsToProcess");
3206 /**************************************************************************************************/
3207 bool MothurOut::checkReleaseVersion(ifstream& file, string version) {
3212 string line = getline(file);
3214 //before we added this check
3215 if (line[0] != '#') { good = false; }
3218 line = line.substr(1);
3220 vector<string> versionVector;
3221 splitAtChar(version, versionVector, '.');
3223 //check file version
3224 vector<string> linesVector;
3225 splitAtChar(line, linesVector, '.');
3227 if (versionVector.size() != linesVector.size()) { good = false; }
3229 for (int j = 0; j < versionVector.size(); j++) {
3231 convert(versionVector[j], num1);
3232 convert(linesVector[j], num2);
3234 //if mothurs version is newer than this files version, then we want to remake it
3235 if (num1 > num2) { good = false; break; }
3241 if (!good) { file.close(); }
3242 else { file.seekg(0); }
3246 catch(exception& e) {
3247 errorOut(e, "MothurOut", "checkReleaseVersion");
3251 /**************************************************************************************************/
3252 vector<double> MothurOut::getAverages(vector< vector<double> >& dists) {
3254 vector<double> averages; //averages.resize(numComp, 0.0);
3255 for (int i = 0; i < dists[0].size(); i++) { averages.push_back(0.0); }
3257 for (int thisIter = 0; thisIter < dists.size(); thisIter++) {
3258 for (int i = 0; i < dists[thisIter].size(); i++) {
3259 averages[i] += dists[thisIter][i];
3264 for (int i = 0; i < averages.size(); i++) { averages[i] /= (double) dists.size(); }
3268 catch(exception& e) {
3269 errorOut(e, "MothurOut", "getAverages");
3273 /**************************************************************************************************/
3274 vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists) {
3277 vector<double> averages = getAverages(dists);
3279 //find standard deviation
3280 vector<double> stdDev; //stdDev.resize(numComp, 0.0);
3281 for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
3283 for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3284 for (int j = 0; j < dists[thisIter].size(); j++) {
3285 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
3288 for (int i = 0; i < stdDev.size(); i++) {
3289 stdDev[i] /= (double) dists.size();
3290 stdDev[i] = sqrt(stdDev[i]);
3295 catch(exception& e) {
3296 errorOut(e, "MothurOut", "getAverages");
3300 /**************************************************************************************************/
3301 vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists, vector<double>& averages) {
3303 //find standard deviation
3304 vector<double> stdDev; //stdDev.resize(numComp, 0.0);
3305 for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
3307 for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3308 for (int j = 0; j < dists[thisIter].size(); j++) {
3309 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
3312 for (int i = 0; i < stdDev.size(); i++) {
3313 stdDev[i] /= (double) dists.size();
3314 stdDev[i] = sqrt(stdDev[i]);
3319 catch(exception& e) {
3320 errorOut(e, "MothurOut", "getAverages");
3324 /**************************************************************************************************/
3325 vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals, string mode) {
3328 vector< vector<seqDist> > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
3329 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3330 //calcAverages[i].resize(calcDistsTotals[0][i].size());
3331 vector<seqDist> temp;
3332 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3334 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3335 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3336 tempDist.dist = 0.0;
3337 temp.push_back(tempDist);
3339 calcAverages.push_back(temp);
3342 if (mode == "average") {
3343 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
3344 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
3345 for (int j = 0; j < calcAverages[i].size(); j++) {
3346 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
3351 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
3352 for (int j = 0; j < calcAverages[i].size(); j++) {
3353 calcAverages[i][j].dist /= (float) calcDistsTotals.size();
3356 }else { //find median
3357 for (int i = 0; i < calcAverages.size(); i++) { //for each calc
3358 for (int j = 0; j < calcAverages[i].size(); j++) { //for each comparison
3359 vector<double> dists;
3360 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //for each subsample
3361 dists.push_back(calcDistsTotals[thisIter][i][j].dist);
3363 sort(dists.begin(), dists.end());
3364 calcAverages[i][j].dist = dists[(calcDistsTotals.size()/2)];
3369 return calcAverages;
3371 catch(exception& e) {
3372 errorOut(e, "MothurOut", "getAverages");
3376 /**************************************************************************************************/
3377 vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals) {
3380 vector< vector<seqDist> > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
3381 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3382 //calcAverages[i].resize(calcDistsTotals[0][i].size());
3383 vector<seqDist> temp;
3384 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3386 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3387 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3388 tempDist.dist = 0.0;
3389 temp.push_back(tempDist);
3391 calcAverages.push_back(temp);
3395 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
3396 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
3397 for (int j = 0; j < calcAverages[i].size(); j++) {
3398 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
3403 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
3404 for (int j = 0; j < calcAverages[i].size(); j++) {
3405 calcAverages[i][j].dist /= (float) calcDistsTotals.size();
3409 return calcAverages;
3411 catch(exception& e) {
3412 errorOut(e, "MothurOut", "getAverages");
3416 /**************************************************************************************************/
3417 vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals) {
3420 vector< vector<seqDist> > calcAverages = getAverages(calcDistsTotals);
3422 //find standard deviation
3423 vector< vector<seqDist> > stdDev;
3424 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3425 vector<seqDist> temp;
3426 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3428 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3429 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3430 tempDist.dist = 0.0;
3431 temp.push_back(tempDist);
3433 stdDev.push_back(temp);
3436 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3437 for (int i = 0; i < stdDev.size(); i++) {
3438 for (int j = 0; j < stdDev[i].size(); j++) {
3439 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
3444 for (int i = 0; i < stdDev.size(); i++) { //finds average.
3445 for (int j = 0; j < stdDev[i].size(); j++) {
3446 stdDev[i][j].dist /= (float) calcDistsTotals.size();
3447 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
3453 catch(exception& e) {
3454 errorOut(e, "MothurOut", "getAverages");
3458 /**************************************************************************************************/
3459 vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals, vector< vector<seqDist> >& calcAverages) {
3461 //find standard deviation
3462 vector< vector<seqDist> > stdDev;
3463 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3464 vector<seqDist> temp;
3465 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3467 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3468 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3469 tempDist.dist = 0.0;
3470 temp.push_back(tempDist);
3472 stdDev.push_back(temp);
3475 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3476 for (int i = 0; i < stdDev.size(); i++) {
3477 for (int j = 0; j < stdDev[i].size(); j++) {
3478 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
3483 for (int i = 0; i < stdDev.size(); i++) { //finds average.
3484 for (int j = 0; j < stdDev[i].size(); j++) {
3485 stdDev[i][j].dist /= (float) calcDistsTotals.size();
3486 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
3492 catch(exception& e) {
3493 errorOut(e, "MothurOut", "getAverages");
3498 /**************************************************************************************************/
3499 bool MothurOut::isContainingOnlyDigits(string input) {
3502 //are you a digit in ascii code
3503 for (int i = 0;i < input.length(); i++){
3504 if( input[i]>47 && input[i]<58){}
3505 else { return false; }
3510 catch(exception& e) {
3511 errorOut(e, "MothurOut", "isContainingOnlyDigits");
3515 /**************************************************************************************************/
3516 int MothurOut::removeConfidences(string& tax) {
3522 while (tax.find_first_of(';') != -1) {
3524 if (control_pressed) { return 0; }
3527 taxon = tax.substr(0,tax.find_first_of(';'));
3529 int pos = taxon.find_last_of('(');
3532 int pos2 = taxon.find_last_of(')');
3534 string confidenceScore = taxon.substr(pos+1, (pos2-(pos+1)));
3535 if (isNumeric1(confidenceScore)) {
3536 taxon = taxon.substr(0, pos); //rip off confidence
3542 tax = tax.substr(tax.find_first_of(';')+1, tax.length());
3550 catch(exception& e) {
3551 errorOut(e, "MothurOut", "removeConfidences");
3555 /**************************************************************************************************/
3556 string MothurOut::removeQuotes(string tax) {
3562 for (int i = 0; i < tax.length(); i++) {
3564 if (control_pressed) { return newTax; }
3566 if ((tax[i] != '\'') && (tax[i] != '\"')) { newTax += tax[i]; }
3572 catch(exception& e) {
3573 errorOut(e, "MothurOut", "removeQuotes");
3577 /**************************************************************************************************/
3578 // function for calculating standard deviation
3579 double MothurOut::getStandardDeviation(vector<int>& featureVector){
3583 for (int i = 0; i < featureVector.size(); i++) { average += featureVector[i]; }
3584 average /= (double) featureVector.size();
3586 //find standard deviation
3588 for (int i = 0; i < featureVector.size(); i++) { //compute the difference of each dist from the mean, and square the result of each
3589 stdDev += ((featureVector[i] - average) * (featureVector[i] - average));
3592 stdDev /= (double) featureVector.size();
3593 stdDev = sqrt(stdDev);
3597 catch(exception& e) {
3598 errorOut(e, "MothurOut", "getStandardDeviation");
3602 /**************************************************************************************************/