5 * Created by westcott on 2/25/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "mothurout.h"
13 /******************************************************/
14 MothurOut* MothurOut::getInstance() {
15 if( _uniqueInstance == 0) {
16 _uniqueInstance = new MothurOut();
18 return _uniqueInstance;
20 /*********************************************************************************************/
21 set<string> MothurOut::getCurrentTypes() {
25 types.insert("fasta");
26 types.insert("summary");
27 types.insert("accnos");
28 types.insert("column");
29 types.insert("design");
30 types.insert("group");
33 types.insert("oligos");
34 types.insert("order");
35 types.insert("ordergroup");
36 types.insert("phylip");
37 types.insert("qfile");
38 types.insert("relabund");
39 types.insert("sabund");
40 types.insert("rabund");
42 types.insert("shared");
43 types.insert("taxonomy");
47 types.insert("count");
48 types.insert("processors");
53 errorOut(e, "MothurOut", "getCurrentTypes");
57 /*********************************************************************************************/
58 void MothurOut::printCurrentFiles() {
62 if (accnosfile != "") { mothurOut("accnos=" + accnosfile); mothurOutEndLine(); }
63 if (columnfile != "") { mothurOut("column=" + columnfile); mothurOutEndLine(); }
64 if (designfile != "") { mothurOut("design=" + designfile); mothurOutEndLine(); }
65 if (fastafile != "") { mothurOut("fasta=" + fastafile); mothurOutEndLine(); }
66 if (groupfile != "") { mothurOut("group=" + groupfile); mothurOutEndLine(); }
67 if (listfile != "") { mothurOut("list=" + listfile); mothurOutEndLine(); }
68 if (namefile != "") { mothurOut("name=" + namefile); mothurOutEndLine(); }
69 if (oligosfile != "") { mothurOut("oligos=" + oligosfile); mothurOutEndLine(); }
70 if (orderfile != "") { mothurOut("order=" + orderfile); mothurOutEndLine(); }
71 if (ordergroupfile != "") { mothurOut("ordergroup=" + ordergroupfile); mothurOutEndLine(); }
72 if (phylipfile != "") { mothurOut("phylip=" + phylipfile); mothurOutEndLine(); }
73 if (qualfile != "") { mothurOut("qfile=" + qualfile); mothurOutEndLine(); }
74 if (rabundfile != "") { mothurOut("rabund=" + rabundfile); mothurOutEndLine(); }
75 if (relabundfile != "") { mothurOut("relabund=" + relabundfile); mothurOutEndLine(); }
76 if (sabundfile != "") { mothurOut("sabund=" + sabundfile); mothurOutEndLine(); }
77 if (sfffile != "") { mothurOut("sff=" + sfffile); mothurOutEndLine(); }
78 if (sharedfile != "") { mothurOut("shared=" + sharedfile); mothurOutEndLine(); }
79 if (taxonomyfile != "") { mothurOut("taxonomy=" + taxonomyfile); mothurOutEndLine(); }
80 if (treefile != "") { mothurOut("tree=" + treefile); mothurOutEndLine(); }
81 if (flowfile != "") { mothurOut("flow=" + flowfile); mothurOutEndLine(); }
82 if (biomfile != "") { mothurOut("biom=" + biomfile); mothurOutEndLine(); }
83 if (counttablefile != "") { mothurOut("count=" + counttablefile); mothurOutEndLine(); }
84 if (processors != "1") { mothurOut("processors=" + processors); mothurOutEndLine(); }
85 if (summaryfile != "") { mothurOut("summary=" + summaryfile); mothurOutEndLine(); }
89 errorOut(e, "MothurOut", "printCurrentFiles");
93 /*********************************************************************************************/
94 bool MothurOut::hasCurrentFiles() {
96 bool hasCurrent = false;
98 if (accnosfile != "") { return true; }
99 if (columnfile != "") { return true; }
100 if (designfile != "") { return true; }
101 if (fastafile != "") { return true; }
102 if (groupfile != "") { return true; }
103 if (listfile != "") { return true; }
104 if (namefile != "") { return true; }
105 if (oligosfile != "") { return true; }
106 if (orderfile != "") { return true; }
107 if (ordergroupfile != "") { return true; }
108 if (phylipfile != "") { return true; }
109 if (qualfile != "") { return true; }
110 if (rabundfile != "") { return true; }
111 if (relabundfile != "") { return true; }
112 if (sabundfile != "") { return true; }
113 if (sfffile != "") { return true; }
114 if (sharedfile != "") { return true; }
115 if (taxonomyfile != "") { return true; }
116 if (treefile != "") { return true; }
117 if (flowfile != "") { return true; }
118 if (biomfile != "") { return true; }
119 if (counttablefile != "") { return true; }
120 if (summaryfile != "") { return true; }
121 if (processors != "1") { return true; }
126 catch(exception& e) {
127 errorOut(e, "MothurOut", "hasCurrentFiles");
132 /*********************************************************************************************/
133 void MothurOut::clearCurrentFiles() {
160 catch(exception& e) {
161 errorOut(e, "MothurOut", "clearCurrentFiles");
165 /***********************************************************************/
166 string MothurOut::findProgramPath(string programName){
169 string envPath = getenv("PATH");
172 //delimiting path char
174 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
180 //break apart path variable by ':'
182 splitAtChar(envPath, dirs, delim);
184 if (debug) { mothurOut("[DEBUG]: dir's in path: \n"); }
186 //get path related to mothur
187 for (int i = 0; i < dirs.size(); i++) {
189 if (debug) { mothurOut("[DEBUG]: " + dirs[i] + "\n"); }
191 //to lower so we can find it
192 string tempLower = "";
193 for (int j = 0; j < dirs[i].length(); j++) { tempLower += tolower(dirs[i][j]); }
195 //is this mothurs path?
196 if (tempLower.find(programName) != -1) { pPath = dirs[i]; break; }
199 if (debug) { mothurOut("[DEBUG]: programPath = " + pPath + "\n"); }
202 //add programName so it looks like what argv would look like
203 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
204 pPath += "/" + programName;
206 pPath += "\\" + programName;
209 //okay programName is not in the path, so the folder programName is in must be in the path
210 //lets find out which one
212 //get path related to the program
213 for (int i = 0; i < dirs.size(); i++) {
215 if (debug) { mothurOut("[DEBUG]: looking in " + dirs[i] + " for " + programName + " \n"); }
217 //is this the programs path?
219 string tempIn = dirs[i];
220 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
221 tempIn += "/" + programName;
223 tempIn += "\\" + programName;
225 openInputFile(tempIn, in, "");
227 //if this file exists
228 if (in) { in.close(); pPath = tempIn; if (debug) { mothurOut("[DEBUG]: found it, programPath = " + pPath + "\n"); } break; }
235 catch(exception& e) {
236 errorOut(e, "MothurOut", "findProgramPath");
240 /*********************************************************************************************/
241 void MothurOut::setFileName(string filename) {
243 logFileName = filename;
247 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
249 if (pid == 0) { //only one process should output to screen
252 openOutputFile(filename, out);
258 catch(exception& e) {
259 errorOut(e, "MothurOut", "setFileName");
263 /*********************************************************************************************/
264 void MothurOut::setDefaultPath(string pathname) {
267 //add / to name if needed
268 string lastChar = pathname.substr(pathname.length()-1);
269 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
270 if (lastChar != "/") { pathname += "/"; }
272 if (lastChar != "\\") { pathname += "\\"; }
275 defaultPath = pathname;
278 catch(exception& e) {
279 errorOut(e, "MothurOut", "setDefaultPath");
283 /*********************************************************************************************/
284 void MothurOut::setOutputDir(string pathname) {
286 outputDir = pathname;
288 catch(exception& e) {
289 errorOut(e, "MothurOut", "setOutputDir");
293 /*********************************************************************************************/
294 void MothurOut::closeLog() {
299 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
301 if (pid == 0) { //only one process should output to screen
310 catch(exception& e) {
311 errorOut(e, "MothurOut", "closeLog");
316 /*********************************************************************************************/
317 MothurOut::~MothurOut() {
322 catch(exception& e) {
323 errorOut(e, "MothurOut", "MothurOut");
327 /*********************************************************************************************/
328 void MothurOut::mothurOut(string output) {
333 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
335 if (pid == 0) { //only one process should output to screen
345 catch(exception& e) {
346 errorOut(e, "MothurOut", "MothurOut");
350 /*********************************************************************************************/
351 void MothurOut::mothurOutJustToScreen(string output) {
356 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
358 if (pid == 0) { //only one process should output to screen
366 catch(exception& e) {
367 errorOut(e, "MothurOut", "MothurOut");
371 /*********************************************************************************************/
372 void MothurOut::mothurOutEndLine() {
376 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
378 if (pid == 0) { //only one process should output to screen
388 catch(exception& e) {
389 errorOut(e, "MothurOut", "MothurOutEndLine");
393 /*********************************************************************************************/
394 void MothurOut::mothurOut(string output, ofstream& outputFile) {
399 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
401 if (pid == 0) { //only one process should output to screen
406 outputFile << output;
414 catch(exception& e) {
415 errorOut(e, "MothurOut", "MothurOut");
419 /*********************************************************************************************/
420 void MothurOut::mothurOutEndLine(ofstream& outputFile) {
424 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
426 if (pid == 0) { //only one process should output to screen
437 catch(exception& e) {
438 errorOut(e, "MothurOut", "MothurOutEndLine");
442 /*********************************************************************************************/
443 void MothurOut::mothurOutJustToLog(string output) {
447 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
449 if (pid == 0) { //only one process should output to screen
458 catch(exception& e) {
459 errorOut(e, "MothurOut", "MothurOutJustToLog");
463 /*********************************************************************************************/
464 void MothurOut::errorOut(exception& e, string object, string function) {
466 //mem_usage(vm, rss);
468 string errorType = toString(e.what());
470 int pos = errorType.find("bad_alloc");
471 mothurOut("[ERROR]: ");
472 mothurOut(errorType);
474 if (pos == string::npos) { //not bad_alloc
475 mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
478 if (object == "cluster"){
479 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. There are two common causes for this, file size and format.\n\nFile Size:\nThe cluster command loads your distance matrix into RAM, and your distance file is most likely too large to fit in RAM. There are two options to help with this. The first is to use a cutoff. By using a cutoff mothur will only load distances that are below the cutoff. If that is still not enough, there is a command called cluster.split, http://www.mothur.org/wiki/cluster.split which divides the distance matrix, and clusters the smaller pieces separately. You may also be able to reduce the size of the original distance matrix by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. \n\nWrong Format:\nThis error can be caused by trying to read a column formatted distance matrix using the phylip parameter. By default, the dist.seqs command generates a column formatted distance matrix. To make a phylip formatted matrix set the dist.seqs command parameter output to lt. \n\nIf you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
480 }else if (object == "shhh.flows"){
481 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. The shhh.flows command is very memory intensive. This error is most commonly caused by trying to process a dataset too large, using multiple processors, or failing to run trim.flows before shhh.flows. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Running trim.flows with an oligos file, and then shhh.flows with the file option may also resolve the issue. If for some reason you are unable to run shhh.flows with your data, a good alternative is to use the trim.seqs command using a 50-bp sliding window and to trim the sequence when the average quality score over that window drops below 35. Our results suggest that the sequencing error rates by this method are very good, but not quite as good as by shhh.flows and that the resulting sequences tend to be a bit shorter. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry. ");
483 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. This is most commonly caused by trying to process a dataset too large, using multiple processors, or a file format issue. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Also, you may be able to reduce the size of your dataset by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
487 /*********************************************************************************************/
488 //The following was originally from http://stackoverflow.com/questions/669438/how-to-get-memory-usage-at-run-time-in-c
489 // process_mem_usage(double &, double &) - takes two doubles by reference,
490 // attempts to read the system-dependent data for a process' virtual memory
491 // size and resident set size, and return the results in KB.
493 // On failure, returns 0.0, 0.0
494 int MothurOut::mem_usage(double& vm_usage, double& resident_set) {
495 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
500 // 'file' stat seems to give the most reliable results
502 ifstream stat_stream("/proc/self/stat",ios_base::in);
504 // dummy vars for leading entries in stat that we don't care about
506 string pid, comm, state, ppid, pgrp, session, tty_nr;
507 string tpgid, flags, minflt, cminflt, majflt, cmajflt;
508 string utime, stime, cutime, cstime, priority, nice;
509 string O, itrealvalue, starttime;
511 // the two fields we want
516 stat_stream >> pid >> comm >> state >> ppid >> pgrp >> session >> tty_nr
517 >> tpgid >> flags >> minflt >> cminflt >> majflt >> cmajflt
518 >> utime >> stime >> cutime >> cstime >> priority >> nice
519 >> O >> itrealvalue >> starttime >> vsize >> rss; // don't care about the rest
521 long page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages
522 vm_usage = vsize / 1024.0;
523 resident_set = rss * page_size_kb;
525 mothurOut("Memory Usage: vm = " + toString(vm_usage) + " rss = " + toString(resident_set) + "\n");
529 /* //windows memory usage
530 // Get the list of process identifiers.
531 DWORD aProcesses[1024], cbNeeded, cProcesses;
533 if ( !EnumProcesses( aProcesses, sizeof(aProcesses), &cbNeeded ) ){ return 1; }
535 // Calculate how many process identifiers were returned.
536 cProcesses = cbNeeded / sizeof(DWORD);
538 // Print the memory usage for each process
539 for (int i = 0; i < cProcesses; i++ ) {
540 DWORD processID = aProcesses[i];
542 PROCESS_MEMORY_COUNTERS pmc;
544 HANDLE hProcess = OpenProcess((PROCESS_QUERY_INFORMATION | PROCESS_VM_READ), FALSE, processID);
546 // Print the process identifier.
547 printf( "\nProcess ID: %u\n", processID);
549 if (NULL != hProcess) {
551 if ( GetProcessMemoryInfo( hProcess, &pmc, sizeof(pmc)) ) {
552 printf( "\tPageFaultCount: 0x%08X\n", pmc.PageFaultCount );
553 printf( "\tPeakWorkingSetSize: 0x%08X\n", pmc.PeakWorkingSetSize );
554 printf( "\tWorkingSetSize: 0x%08X\n", pmc.WorkingSetSize );
555 printf( "\tQuotaPeakPagedPoolUsage: 0x%08X\n", pmc.QuotaPeakPagedPoolUsage );
556 printf( "\tQuotaPagedPoolUsage: 0x%08X\n", pmc.QuotaPagedPoolUsage );
557 printf( "\tQuotaPeakNonPagedPoolUsage: 0x%08X\n", pmc.QuotaPeakNonPagedPoolUsage );
558 printf( "\tQuotaNonPagedPoolUsage: 0x%08X\n", pmc.QuotaNonPagedPoolUsage );
559 printf( "\tPagefileUsage: 0x%08X\n", pmc.PagefileUsage );
560 printf( "\tPeakPagefileUsage: 0x%08X\n", pmc.PeakPagefileUsage );
562 CloseHandle(hProcess);
572 /***********************************************************************/
573 int MothurOut::openOutputFileAppend(string fileName, ofstream& fileHandle){
575 fileName = getFullPathName(fileName);
577 fileHandle.open(fileName.c_str(), ios::app);
579 mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
586 catch(exception& e) {
587 errorOut(e, "MothurOut", "openOutputFileAppend");
591 /***********************************************************************/
592 void MothurOut::gobble(istream& f){
596 while(isspace(d=f.get())) { ;}
597 if(!f.eof()) { f.putback(d); }
599 catch(exception& e) {
600 errorOut(e, "MothurOut", "gobble");
604 /***********************************************************************/
605 void MothurOut::gobble(istringstream& f){
608 while(isspace(d=f.get())) {;}
609 if(!f.eof()) { f.putback(d); }
611 catch(exception& e) {
612 errorOut(e, "MothurOut", "gobble");
617 /***********************************************************************/
619 string MothurOut::getline(istringstream& fileHandle) {
624 while (!fileHandle.eof()) {
626 char c = fileHandle.get();
628 //are you at the end of the line
629 if ((c == '\n') || (c == '\r') || (c == '\f')){ break; }
636 catch(exception& e) {
637 errorOut(e, "MothurOut", "getline");
641 /***********************************************************************/
643 string MothurOut::getline(ifstream& fileHandle) {
650 char c = fileHandle.get();
652 //are you at the end of the line
653 if ((c == '\n') || (c == '\r') || (c == '\f') || (c == EOF)){ break; }
660 catch(exception& e) {
661 errorOut(e, "MothurOut", "getline");
665 /***********************************************************************/
667 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
668 #ifdef USE_COMPRESSION
669 inline bool endsWith(string s, const char * suffix){
670 size_t suffixLength = strlen(suffix);
671 return s.size() >= suffixLength && s.substr(s.size() - suffixLength, suffixLength).compare(suffix) == 0;
676 string MothurOut::getRootName(string longName){
679 string rootName = longName;
681 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
682 #ifdef USE_COMPRESSION
683 if (endsWith(rootName, ".gz") || endsWith(rootName, ".bz2")) {
684 int pos = rootName.find_last_of('.');
685 rootName = rootName.substr(0, pos);
686 cerr << "shortening " << longName << " to " << rootName << "\n";
690 if(rootName.find_last_of(".") != rootName.npos){
691 int pos = rootName.find_last_of('.')+1;
692 rootName = rootName.substr(0, pos);
697 catch(exception& e) {
698 errorOut(e, "MothurOut", "getRootName");
702 /***********************************************************************/
704 string MothurOut::getSimpleName(string longName){
706 string simpleName = longName;
709 found=longName.find_last_of("/\\");
711 if(found != longName.npos){
712 simpleName = longName.substr(found+1);
717 catch(exception& e) {
718 errorOut(e, "MothurOut", "getSimpleName");
723 /***********************************************************************/
725 int MothurOut::getRandomIndex(int highest){
728 int random = (int) ((float)(highest+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
732 catch(exception& e) {
733 errorOut(e, "MothurOut", "getRandomIndex");
738 /**********************************************************************/
740 string MothurOut::getPathName(string longName){
742 string rootPathName = longName;
744 if(longName.find_last_of("/\\") != longName.npos){
745 int pos = longName.find_last_of("/\\")+1;
746 rootPathName = longName.substr(0, pos);
751 catch(exception& e) {
752 errorOut(e, "MothurOut", "getPathName");
757 /***********************************************************************/
759 bool MothurOut::dirCheck(string& dirName){
765 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
770 //add / to name if needed
771 string lastChar = dirName.substr(dirName.length()-1);
772 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
773 if (lastChar != "/") { dirName += "/"; }
775 if (lastChar != "\\") { dirName += "\\"; }
778 //test to make sure directory exists
779 dirName = getFullPathName(dirName);
780 string outTemp = dirName + tag + "temp";
782 out.open(outTemp.c_str(), ios::trunc);
784 mothurOut(dirName + " directory does not exist or is not writable."); mothurOutEndLine();
787 mothurRemove(outTemp);
793 catch(exception& e) {
794 errorOut(e, "MothurOut", "dirCheck");
799 /***********************************************************************/
801 string MothurOut::hasPath(string longName){
806 found=longName.find_last_of("~/\\");
808 if(found != longName.npos){
809 path = longName.substr(0, found+1);
814 catch(exception& e) {
815 errorOut(e, "MothurOut", "hasPath");
820 /***********************************************************************/
822 string MothurOut::getExtension(string longName){
824 string extension = "";
826 if(longName.find_last_of('.') != longName.npos){
827 int pos = longName.find_last_of('.');
828 extension = longName.substr(pos, longName.length());
833 catch(exception& e) {
834 errorOut(e, "MothurOut", "getExtension");
838 /***********************************************************************/
839 bool MothurOut::isBlank(string fileName){
842 fileName = getFullPathName(fileName);
845 fileHandle.open(fileName.c_str());
847 mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
850 //check for blank file
852 if (fileHandle.eof()) { fileHandle.close(); return true; }
857 catch(exception& e) {
858 errorOut(e, "MothurOut", "isBlank");
862 /***********************************************************************/
864 string MothurOut::getFullPathName(string fileName){
867 string path = hasPath(fileName);
871 if (path == "") { return fileName; } //its a simple name
872 else { //we need to complete the pathname
873 // ex. ../../../filename
874 // cwd = /user/work/desktop
877 //get current working directory
878 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
880 if (path.find("~") != -1) { //go to home directory
883 char *homepath = NULL;
884 homepath = getenv ("HOME");
885 if ( homepath != NULL) { homeDir = homepath; }
886 else { homeDir = ""; }
888 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
891 if (path.rfind("./") == string::npos) { return fileName; } //already complete name
892 else { newFileName = fileName.substr(fileName.rfind("./")+2); } //save the complete part of the name
894 //char* cwdpath = new char[1024];
896 //cwdpath=getcwd(cwdpath,size);
899 char *cwdpath = NULL;
900 cwdpath = getcwd(NULL, 0); // or _getcwd
901 if ( cwdpath != NULL) { cwd = cwdpath; }
907 if (cwd.length() > 0) { simpleCWD = cwd.substr(1); }
909 //break apart the current working directory
911 while (simpleCWD.find_first_of('/') != string::npos) {
912 string dir = simpleCWD.substr(0,simpleCWD.find_first_of('/'));
913 simpleCWD = simpleCWD.substr(simpleCWD.find_first_of('/')+1, simpleCWD.length());
916 //get last one // ex. ../../../filename = /user/work/desktop/filename
917 dirs.push_back(simpleCWD); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
920 int index = dirs.size()-1;
922 while((pos = path.rfind("./")) != string::npos) { //while you don't have a complete path
923 if (pos == 0) { break; //you are at the end
924 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
925 path = path.substr(0, pos-1);
927 if (index == 0) { break; }
928 }else if (path[(pos-1)] == '/') { //you want the current working dir ./
929 path = path.substr(0, pos);
930 }else if (pos == 1) { break; //you are at the end
931 }else { mothurOut("cannot resolve path for " + fileName + "\n"); return fileName; }
934 for (int i = index; i >= 0; i--) {
935 newFileName = dirs[i] + "/" + newFileName;
938 newFileName = "/" + newFileName;
942 if (path.find("~") != string::npos) { //go to home directory
943 string homeDir = getenv ("HOMEPATH");
944 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
947 if (path.rfind(".\\") == string::npos) { return fileName; } //already complete name
948 else { newFileName = fileName.substr(fileName.rfind(".\\")+2); } //save the complete part of the name
950 char *cwdpath = NULL;
951 cwdpath = getcwd(NULL, 0); // or _getcwd
952 if ( cwdpath != NULL) { cwd = cwdpath; }
955 //break apart the current working directory
957 while (cwd.find_first_of('\\') != -1) {
958 string dir = cwd.substr(0,cwd.find_first_of('\\'));
959 cwd = cwd.substr(cwd.find_first_of('\\')+1, cwd.length());
964 dirs.push_back(cwd); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
966 int index = dirs.size()-1;
968 while((pos = path.rfind(".\\")) != string::npos) { //while you don't have a complete path
969 if (pos == 0) { break; //you are at the end
970 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
971 path = path.substr(0, pos-1);
973 if (index == 0) { break; }
974 }else if (path[(pos-1)] == '\\') { //you want the current working dir ./
975 path = path.substr(0, pos);
976 }else if (pos == 1) { break; //you are at the end
977 }else { mothurOut("cannot resolve path for " + fileName + "\n"); return fileName; }
980 for (int i = index; i >= 0; i--) {
981 newFileName = dirs[i] + "\\" + newFileName;
990 catch(exception& e) {
991 errorOut(e, "MothurOut", "getFullPathName");
995 /***********************************************************************/
997 int MothurOut::openInputFile(string fileName, ifstream& fileHandle, string m){
1000 string completeFileName = getFullPathName(fileName);
1001 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1002 #ifdef USE_COMPRESSION
1003 // check for gzipped or bzipped file
1004 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1005 string tempName = string(tmpnam(0));
1006 mkfifo(tempName.c_str(), 0666);
1007 int fork_result = fork();
1008 if (fork_result < 0) {
1009 cerr << "Error forking.\n";
1011 } else if (fork_result == 0) {
1012 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
1013 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1014 system(command.c_str());
1015 cerr << "Done decompressing " << completeFileName << "\n";
1016 mothurRemove(tempName);
1019 cerr << "waiting on child process " << fork_result << "\n";
1020 completeFileName = tempName;
1025 fileHandle.open(completeFileName.c_str());
1027 //mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1030 //check for blank file
1035 catch(exception& e) {
1036 errorOut(e, "MothurOut", "openInputFile - no Error");
1040 /***********************************************************************/
1042 int MothurOut::openInputFile(string fileName, ifstream& fileHandle){
1045 //get full path name
1046 string completeFileName = getFullPathName(fileName);
1047 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1048 #ifdef USE_COMPRESSION
1049 // check for gzipped or bzipped file
1050 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1051 string tempName = string(tmpnam(0));
1052 mkfifo(tempName.c_str(), 0666);
1053 int fork_result = fork();
1054 if (fork_result < 0) {
1055 cerr << "Error forking.\n";
1057 } else if (fork_result == 0) {
1058 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
1059 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1060 system(command.c_str());
1061 cerr << "Done decompressing " << completeFileName << "\n";
1062 mothurRemove(tempName);
1065 cerr << "waiting on child process " << fork_result << "\n";
1066 completeFileName = tempName;
1072 fileHandle.open(completeFileName.c_str());
1074 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1078 //check for blank file
1080 if (fileHandle.eof()) { mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct."); mothurOutEndLine(); }
1085 catch(exception& e) {
1086 errorOut(e, "MothurOut", "openInputFile");
1090 /***********************************************************************/
1092 int MothurOut::renameFile(string oldName, string newName){
1095 if (oldName == newName) { return 0; }
1098 int exist = openInputFile(newName, inTest, "");
1101 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1102 if (exist == 0) { //you could open it so you want to delete it
1103 string command = "rm " + newName;
1104 system(command.c_str());
1107 string command = "mv " + oldName + " " + newName;
1108 system(command.c_str());
1110 mothurRemove(newName);
1111 int renameOk = rename(oldName.c_str(), newName.c_str());
1116 catch(exception& e) {
1117 errorOut(e, "MothurOut", "renameFile");
1122 /***********************************************************************/
1124 int MothurOut::openOutputFile(string fileName, ofstream& fileHandle){
1127 string completeFileName = getFullPathName(fileName);
1128 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1129 #ifdef USE_COMPRESSION
1130 // check for gzipped file
1131 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1132 string tempName = string(tmpnam(0));
1133 mkfifo(tempName.c_str(), 0666);
1134 cerr << "Compressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1135 int fork_result = fork();
1136 if (fork_result < 0) {
1137 cerr << "Error forking.\n";
1139 } else if (fork_result == 0) {
1140 string command = string(endsWith(completeFileName, ".gz") ? "gzip" : "bzip2") + " -v > " + completeFileName + string(" < ") + tempName;
1141 system(command.c_str());
1144 completeFileName = tempName;
1149 fileHandle.open(completeFileName.c_str(), ios::trunc);
1151 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1158 catch(exception& e) {
1159 errorOut(e, "MothurOut", "openOutputFile");
1165 /**************************************************************************************************/
1166 int MothurOut::appendFiles(string temp, string filename) {
1171 //open output file in append mode
1172 openOutputFileAppend(filename, output);
1173 int ableToOpen = openInputFile(temp, input, "no error");
1174 //int ableToOpen = openInputFile(temp, input);
1177 if (ableToOpen == 0) { //you opened it
1180 while (!input.eof()) {
1181 input.read(buffer, 4096);
1182 output.write(buffer, input.gcount());
1183 //count number of lines
1184 for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} }
1193 catch(exception& e) {
1194 errorOut(e, "MothurOut", "appendFiles");
1198 /**************************************************************************************************/
1199 int MothurOut::appendFilesWithoutHeaders(string temp, string filename) {
1204 //open output file in append mode
1205 openOutputFileAppend(filename, output);
1206 int ableToOpen = openInputFile(temp, input, "no error");
1207 //int ableToOpen = openInputFile(temp, input);
1210 if (ableToOpen == 0) { //you opened it
1212 string headers = getline(input); gobble(input);
1213 if (debug) { mothurOut("[DEBUG]: skipping headers " + headers +'\n'); }
1216 while (!input.eof()) {
1217 input.read(buffer, 4096);
1218 output.write(buffer, input.gcount());
1219 //count number of lines
1220 for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} }
1229 catch(exception& e) {
1230 errorOut(e, "MothurOut", "appendFiles");
1234 /**************************************************************************************************/
1235 string MothurOut::sortFile(string distFile, string outputDir){
1238 //if (outputDir == "") { outputDir += hasPath(distFile); }
1239 string outfile = getRootName(distFile) + "sorted.dist";
1242 //if you can, use the unix sort since its been optimized for years
1243 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1244 string command = "sort -n -k +3 " + distFile + " -o " + outfile;
1245 system(command.c_str());
1246 #else //you are stuck with my best attempt...
1247 //windows sort does not have a way to specify a column, only a character in the line
1248 //since we cannot assume that the distance will always be at the the same character location on each line
1249 //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back.
1251 //read in file line by file and put distance first
1252 string tempDistFile = distFile + ".temp";
1255 openInputFile(distFile, input);
1256 openOutputFile(tempDistFile, output);
1258 string firstName, secondName;
1260 while (!input.eof()) {
1261 input >> firstName >> secondName >> dist;
1262 output << dist << '\t' << firstName << '\t' << secondName << endl;
1269 //sort using windows sort
1270 string tempOutfile = outfile + ".temp";
1271 string command = "sort " + tempDistFile + " /O " + tempOutfile;
1272 system(command.c_str());
1274 //read in sorted file and put distance at end again
1277 openInputFile(tempOutfile, input2);
1278 openOutputFile(outfile, output2);
1280 while (!input2.eof()) {
1281 input2 >> dist >> firstName >> secondName;
1282 output2 << firstName << '\t' << secondName << '\t' << dist << endl;
1289 mothurRemove(tempDistFile);
1290 mothurRemove(tempOutfile);
1295 catch(exception& e) {
1296 errorOut(e, "MothurOut", "sortFile");
1300 /**************************************************************************************************/
1301 vector<unsigned long long> MothurOut::setFilePosFasta(string filename, int& num) {
1303 vector<unsigned long long> positions;
1305 //openInputFile(filename, inFASTA);
1306 inFASTA.open(filename.c_str(), ios::binary);
1309 unsigned long long count = 0;
1310 while(!inFASTA.eof()){
1311 //input = getline(inFASTA);
1312 //cout << input << '\t' << inFASTA.tellg() << endl;
1313 //if (input.length() != 0) {
1314 // if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); cout << (pos - input.length() - 1) << endl; }
1316 //gobble(inFASTA); //has to be here since windows line endings are 2 characters and mess up the positions
1317 char c = inFASTA.get(); count++;
1319 positions.push_back(count-1);
1320 if (debug) { mothurOut("[DEBUG]: numSeqs = " + toString(positions.size()) + " count = " + toString(count) + ".\n"); }
1325 num = positions.size();
1326 if (debug) { mothurOut("[DEBUG]: num = " + toString(num) + ".\n"); }
1328 unsigned long long size;
1330 //get num bytes in file
1331 pFile = fopen (filename.c_str(),"rb");
1332 if (pFile==NULL) perror ("Error opening file");
1334 fseek (pFile, 0, SEEK_END);
1339 /*unsigned long long size = positions[(positions.size()-1)];
1341 openInputFile(filename, in);
1346 if(in.eof()) { break; }
1351 if (debug) { mothurOut("[DEBUG]: size = " + toString(size) + ".\n"); }
1353 positions.push_back(size);
1358 catch(exception& e) {
1359 errorOut(e, "MothurOut", "setFilePosFasta");
1363 /**************************************************************************************************/
1364 vector<unsigned long long> MothurOut::setFilePosEachLine(string filename, int& num) {
1366 filename = getFullPathName(filename);
1368 vector<unsigned long long> positions;
1370 //openInputFile(filename, in);
1371 in.open(filename.c_str(), ios::binary);
1374 unsigned long long count = 0;
1375 positions.push_back(0);
1378 //getline counting reads
1379 char d = in.get(); count++;
1380 while ((d != '\n') && (d != '\r') && (d != '\f') && (d != in.eof())) {
1381 //get next character
1387 d=in.get(); count++;
1388 while(isspace(d) && (d != in.eof())) { d=in.get(); count++;}
1390 positions.push_back(count-1);
1391 //cout << count-1 << endl;
1395 num = positions.size()-1;
1398 unsigned long long size;
1400 //get num bytes in file
1401 pFile = fopen (filename.c_str(),"rb");
1402 if (pFile==NULL) perror ("Error opening file");
1404 fseek (pFile, 0, SEEK_END);
1409 positions[(positions.size()-1)] = size;
1413 catch(exception& e) {
1414 errorOut(e, "MothurOut", "setFilePosEachLine");
1418 /**************************************************************************************************/
1420 vector<unsigned long long> MothurOut::divideFile(string filename, int& proc) {
1422 vector<unsigned long long> filePos;
1423 filePos.push_back(0);
1426 unsigned long long size;
1428 filename = getFullPathName(filename);
1430 //get num bytes in file
1431 pFile = fopen (filename.c_str(),"rb");
1432 if (pFile==NULL) perror ("Error opening file");
1434 fseek (pFile, 0, SEEK_END);
1439 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1441 //estimate file breaks
1442 unsigned long long chunkSize = 0;
1443 chunkSize = size / proc;
1445 //file to small to divide by processors
1446 if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; }
1448 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
1449 for (int i = 0; i < proc; i++) {
1450 unsigned long long spot = (i+1) * chunkSize;
1453 openInputFile(filename, in);
1457 unsigned long long newSpot = spot;
1461 if (c == '>') { in.putback(c); newSpot = in.tellg(); break; }
1462 else if (int(c) == -1) { break; }
1466 //there was not another sequence before the end of the file
1467 unsigned long long sanityPos = in.tellg();
1469 if (sanityPos == -1) { break; }
1470 else { filePos.push_back(newSpot); }
1476 filePos.push_back(size);
1478 //sanity check filePos
1479 for (int i = 0; i < (filePos.size()-1); i++) {
1480 if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; }
1483 proc = (filePos.size() - 1);
1485 mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine();
1487 filePos.push_back(size);
1491 catch(exception& e) {
1492 errorOut(e, "MothurOut", "divideFile");
1496 /**************************************************************************************************/
1498 vector<unsigned long long> MothurOut::divideFilePerLine(string filename, int& proc) {
1500 vector<unsigned long long> filePos;
1501 filePos.push_back(0);
1504 unsigned long long size;
1506 filename = getFullPathName(filename);
1508 //get num bytes in file
1509 pFile = fopen (filename.c_str(),"rb");
1510 if (pFile==NULL) perror ("Error opening file");
1512 fseek (pFile, 0, SEEK_END);
1517 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1519 //estimate file breaks
1520 unsigned long long chunkSize = 0;
1521 chunkSize = size / proc;
1523 //file to small to divide by processors
1524 if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; }
1526 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
1527 for (int i = 0; i < proc; i++) {
1528 unsigned long long spot = (i+1) * chunkSize;
1531 openInputFile(filename, in);
1534 //look for next line break
1535 unsigned long long newSpot = spot;
1539 if ((c == '\n') || (c == '\r') || (c == '\f')) { gobble(in); newSpot = in.tellg(); break; }
1540 else if (int(c) == -1) { break; }
1543 //there was not another line before the end of the file
1544 unsigned long long sanityPos = in.tellg();
1546 if (sanityPos == -1) { break; }
1547 else { filePos.push_back(newSpot); }
1553 filePos.push_back(size);
1555 //sanity check filePos
1556 for (int i = 0; i < (filePos.size()-1); i++) {
1557 if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; }
1560 proc = (filePos.size() - 1);
1562 mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine();
1564 filePos.push_back(size);
1568 catch(exception& e) {
1569 errorOut(e, "MothurOut", "divideFile");
1573 /**************************************************************************************************/
1574 int MothurOut::divideFile(string filename, int& proc, vector<string>& files) {
1577 vector<unsigned long long> filePos = divideFile(filename, proc);
1579 for (int i = 0; i < (filePos.size()-1); i++) {
1583 openInputFile(filename, in);
1584 in.seekg(filePos[i]);
1585 unsigned long long size = filePos[(i+1)] - filePos[i];
1586 char* chunk = new char[size];
1587 in.read(chunk, size);
1591 string fileChunkName = filename + "." + toString(i) + ".tmp";
1593 openOutputFile(fileChunkName, out);
1595 out << chunk << endl;
1600 files.push_back(fileChunkName);
1605 catch(exception& e) {
1606 errorOut(e, "MothurOut", "divideFile");
1610 /***********************************************************************/
1612 bool MothurOut::isTrue(string f){
1615 for (int i = 0; i < f.length(); i++) { f[i] = toupper(f[i]); }
1617 if ((f == "TRUE") || (f == "T")) { return true; }
1618 else { return false; }
1620 catch(exception& e) {
1621 errorOut(e, "MothurOut", "isTrue");
1626 /***********************************************************************/
1628 float MothurOut::roundDist(float dist, int precision){
1630 return int(dist * precision + 0.5)/float(precision);
1632 catch(exception& e) {
1633 errorOut(e, "MothurOut", "roundDist");
1637 /***********************************************************************/
1639 float MothurOut::ceilDist(float dist, int precision){
1641 return int(ceil(dist * precision))/float(precision);
1643 catch(exception& e) {
1644 errorOut(e, "MothurOut", "ceilDist");
1648 /***********************************************************************/
1650 vector<string> MothurOut::splitWhiteSpace(string& rest, char buffer[], int size){
1652 vector<string> pieces;
1654 for (int i = 0; i < size; i++) {
1655 if (!isspace(buffer[i])) { rest += buffer[i]; }
1657 if (rest != "") { pieces.push_back(rest); rest = ""; }
1658 while (i < size) { //gobble white space
1659 if (isspace(buffer[i])) { i++; }
1660 else { rest = buffer[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1667 catch(exception& e) {
1668 errorOut(e, "MothurOut", "splitWhiteSpace");
1672 /***********************************************************************/
1673 vector<string> MothurOut::splitWhiteSpace(string input){
1675 vector<string> pieces;
1678 for (int i = 0; i < input.length(); i++) {
1679 if (!isspace(input[i])) { rest += input[i]; }
1681 if (rest != "") { pieces.push_back(rest); rest = ""; }
1682 while (i < input.length()) { //gobble white space
1683 if (isspace(input[i])) { i++; }
1684 else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1689 if (rest != "") { pieces.push_back(rest); }
1693 catch(exception& e) {
1694 errorOut(e, "MothurOut", "splitWhiteSpace");
1698 /***********************************************************************/
1699 vector<string> MothurOut::splitWhiteSpaceWithQuotes(string input){
1701 vector<string> pieces;
1704 int pos = input.find('\'');
1705 int pos2 = input.find('\"');
1707 if ((pos == string::npos) && (pos2 == string::npos)) { return splitWhiteSpace(input); } //no quotes to worry about
1709 for (int i = 0; i < input.length(); i++) {
1710 if ((input[i] == '\'') || (input[i] == '\"') || (rest == "\'") || (rest == "\"")) { //grab everything til end or next ' or "
1712 for (int j = i+1; j < input.length(); j++) {
1713 if ((input[j] == '\'') || (input[j] == '\"')) { //then quit
1717 }else { rest += input[j]; }
1719 }else if (!isspace(input[i])) { rest += input[i]; }
1721 if (rest != "") { pieces.push_back(rest); rest = ""; }
1722 while (i < input.length()) { //gobble white space
1723 if (isspace(input[i])) { i++; }
1724 else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1729 if (rest != "") { pieces.push_back(rest); }
1733 catch(exception& e) {
1734 errorOut(e, "MothurOut", "splitWhiteSpace");
1738 //**********************************************************************************************************************
1739 int MothurOut::readTax(string namefile, map<string, string>& taxMap) {
1743 openInputFile(namefile, in);
1747 bool pairDone = false;
1748 bool columnOne = true;
1749 string firstCol, secondCol;
1752 if (control_pressed) { break; }
1754 in.read(buffer, 4096);
1755 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1757 for (int i = 0; i < pieces.size(); i++) {
1758 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1759 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1762 checkName(firstCol);
1763 //are there confidence scores, if so remove them
1764 if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
1765 map<string, string>::iterator itTax = taxMap.find(firstCol);
1767 if(itTax == taxMap.end()) {
1768 bool ignore = false;
1769 if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
1771 if (!ignore) { taxMap[firstCol] = secondCol; }
1772 if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
1774 mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true;
1783 vector<string> pieces = splitWhiteSpace(rest);
1785 for (int i = 0; i < pieces.size(); i++) {
1786 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1787 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1790 checkName(firstCol);
1791 //are there confidence scores, if so remove them
1792 if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
1793 map<string, string>::iterator itTax = taxMap.find(firstCol);
1795 if(itTax == taxMap.end()) {
1796 bool ignore = false;
1797 if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
1799 if (!ignore) { taxMap[firstCol] = secondCol; }
1800 if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
1802 mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true;
1810 return taxMap.size();
1813 catch(exception& e) {
1814 errorOut(e, "MothurOut", "readTax");
1818 /**********************************************************************************************************************/
1819 int MothurOut::readNames(string namefile, map<string, string>& nameMap, bool redund) {
1823 openInputFile(namefile, in);
1827 bool pairDone = false;
1828 bool columnOne = true;
1829 string firstCol, secondCol;
1832 if (control_pressed) { break; }
1834 in.read(buffer, 4096);
1835 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1837 for (int i = 0; i < pieces.size(); i++) {
1838 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1839 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1842 checkName(firstCol);
1843 checkName(secondCol);
1845 //parse names into vector
1846 vector<string> theseNames;
1847 splitAtComma(secondCol, theseNames);
1848 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
1856 vector<string> pieces = splitWhiteSpace(rest);
1858 for (int i = 0; i < pieces.size(); i++) {
1859 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1860 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1863 checkName(firstCol);
1864 checkName(secondCol);
1866 //parse names into vector
1867 vector<string> theseNames;
1868 splitAtComma(secondCol, theseNames);
1869 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
1875 return nameMap.size();
1878 catch(exception& e) {
1879 errorOut(e, "MothurOut", "readNames");
1883 /**********************************************************************************************************************/
1884 int MothurOut::readNames(string namefile, map<string, string>& nameMap, int flip) {
1888 openInputFile(namefile, in);
1892 bool pairDone = false;
1893 bool columnOne = true;
1894 string firstCol, secondCol;
1897 if (control_pressed) { break; }
1899 in.read(buffer, 4096);
1900 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1902 for (int i = 0; i < pieces.size(); i++) {
1903 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1904 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1907 checkName(firstCol);
1908 checkName(secondCol);
1909 nameMap[secondCol] = firstCol;
1917 vector<string> pieces = splitWhiteSpace(rest);
1919 for (int i = 0; i < pieces.size(); i++) {
1920 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1921 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1924 checkName(firstCol);
1925 checkName(secondCol);
1926 nameMap[secondCol] = firstCol;
1932 return nameMap.size();
1935 catch(exception& e) {
1936 errorOut(e, "MothurOut", "readNames");
1940 /**********************************************************************************************************************/
1941 int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<string, int>& nameCount) {
1943 nameMap.clear(); nameCount.clear();
1946 openInputFile(namefile, in);
1950 bool pairDone = false;
1951 bool columnOne = true;
1952 string firstCol, secondCol;
1955 if (control_pressed) { break; }
1957 in.read(buffer, 4096);
1958 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1960 for (int i = 0; i < pieces.size(); i++) {
1961 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1962 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1965 checkName(firstCol);
1966 checkName(secondCol);
1967 //parse names into vector
1968 vector<string> theseNames;
1969 splitAtComma(secondCol, theseNames);
1970 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
1971 nameCount[firstCol] = theseNames.size();
1979 vector<string> pieces = splitWhiteSpace(rest);
1981 for (int i = 0; i < pieces.size(); i++) {
1982 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1983 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1986 checkName(firstCol);
1987 checkName(secondCol);
1988 //parse names into vector
1989 vector<string> theseNames;
1990 splitAtComma(secondCol, theseNames);
1991 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
1992 nameCount[firstCol] = theseNames.size();
1998 return nameMap.size();
2001 catch(exception& e) {
2002 errorOut(e, "MothurOut", "readNames");
2006 /**********************************************************************************************************************/
2007 int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
2011 openInputFile(namefile, in);
2015 bool pairDone = false;
2016 bool columnOne = true;
2017 string firstCol, secondCol;
2020 if (control_pressed) { break; }
2022 in.read(buffer, 4096);
2023 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2025 for (int i = 0; i < pieces.size(); i++) {
2026 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2027 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2030 checkName(firstCol);
2031 checkName(secondCol);
2032 nameMap[firstCol] = secondCol; pairDone = false; }
2038 vector<string> pieces = splitWhiteSpace(rest);
2040 for (int i = 0; i < pieces.size(); i++) {
2041 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2042 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2045 checkName(firstCol);
2046 checkName(secondCol);
2047 nameMap[firstCol] = secondCol; pairDone = false; }
2051 return nameMap.size();
2054 catch(exception& e) {
2055 errorOut(e, "MothurOut", "readNames");
2059 /**********************************************************************************************************************/
2060 int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap) {
2064 openInputFile(namefile, in);
2068 bool pairDone = false;
2069 bool columnOne = true;
2070 string firstCol, secondCol;
2073 if (control_pressed) { break; }
2075 in.read(buffer, 4096);
2076 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2078 for (int i = 0; i < pieces.size(); i++) {
2079 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2080 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2083 checkName(firstCol);
2084 checkName(secondCol);
2085 vector<string> temp;
2086 splitAtComma(secondCol, temp);
2087 nameMap[firstCol] = temp;
2095 vector<string> pieces = splitWhiteSpace(rest);
2097 for (int i = 0; i < pieces.size(); i++) {
2098 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2099 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2102 checkName(firstCol);
2103 checkName(secondCol);
2104 vector<string> temp;
2105 splitAtComma(secondCol, temp);
2106 nameMap[firstCol] = temp;
2112 return nameMap.size();
2114 catch(exception& e) {
2115 errorOut(e, "MothurOut", "readNames");
2119 /**********************************************************************************************************************/
2120 map<string, int> MothurOut::readNames(string namefile) {
2122 map<string, int> nameMap;
2126 openInputFile(namefile, in);
2130 bool pairDone = false;
2131 bool columnOne = true;
2132 string firstCol, secondCol;
2135 if (control_pressed) { break; }
2137 in.read(buffer, 4096);
2138 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2140 for (int i = 0; i < pieces.size(); i++) {
2141 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2142 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2145 checkName(firstCol);
2146 checkName(secondCol);
2147 int num = getNumNames(secondCol);
2148 nameMap[firstCol] = num;
2156 vector<string> pieces = splitWhiteSpace(rest);
2157 for (int i = 0; i < pieces.size(); i++) {
2158 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2159 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2162 checkName(firstCol);
2163 checkName(secondCol);
2164 int num = getNumNames(secondCol);
2165 nameMap[firstCol] = num;
2174 catch(exception& e) {
2175 errorOut(e, "MothurOut", "readNames");
2179 /**********************************************************************************************************************/
2180 map<string, int> MothurOut::readNames(string namefile, unsigned long int& numSeqs) {
2182 map<string, int> nameMap;
2187 openInputFile(namefile, in);
2191 bool pairDone = false;
2192 bool columnOne = true;
2193 string firstCol, secondCol;
2196 if (control_pressed) { break; }
2198 in.read(buffer, 4096);
2199 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2201 for (int i = 0; i < pieces.size(); i++) {
2202 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2203 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2206 checkName(firstCol);
2207 checkName(secondCol);
2208 int num = getNumNames(secondCol);
2209 nameMap[firstCol] = num;
2218 vector<string> pieces = splitWhiteSpace(rest);
2219 for (int i = 0; i < pieces.size(); i++) {
2220 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2221 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2224 checkName(firstCol);
2225 checkName(secondCol);
2226 int num = getNumNames(secondCol);
2227 nameMap[firstCol] = num;
2237 catch(exception& e) {
2238 errorOut(e, "MothurOut", "readNames");
2242 /************************************************************/
2243 int MothurOut::checkName(string& name) {
2246 for (int i = 0; i < name.length(); i++) {
2247 if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; }
2252 catch(exception& e) {
2253 errorOut(e, "MothurOut", "checkName");
2257 /**********************************************************************************************************************/
2258 int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, map<string, string>& fastamap) {
2264 openInputFile(namefile, in);
2268 bool pairDone = false;
2269 bool columnOne = true;
2270 string firstCol, secondCol;
2273 if (control_pressed) { break; }
2275 in.read(buffer, 4096);
2276 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2278 for (int i = 0; i < pieces.size(); i++) {
2279 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2280 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2283 checkName(firstCol);
2284 checkName(secondCol);
2285 int num = getNumNames(secondCol);
2287 map<string, string>::iterator it = fastamap.find(firstCol);
2288 if (it == fastamap.end()) {
2290 mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
2292 seqPriorityNode temp(num, it->second, firstCol);
2293 nameVector.push_back(temp);
2303 vector<string> pieces = splitWhiteSpace(rest);
2305 for (int i = 0; i < pieces.size(); i++) {
2306 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2307 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2310 checkName(firstCol);
2311 checkName(secondCol);
2312 int num = getNumNames(secondCol);
2314 map<string, string>::iterator it = fastamap.find(firstCol);
2315 if (it == fastamap.end()) {
2317 mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
2319 seqPriorityNode temp(num, it->second, firstCol);
2320 nameVector.push_back(temp);
2329 catch(exception& e) {
2330 errorOut(e, "MothurOut", "readNames");
2334 //**********************************************************************************************************************
2335 set<string> MothurOut::readAccnos(string accnosfile){
2339 openInputFile(accnosfile, in);
2346 if (control_pressed) { break; }
2348 in.read(buffer, 4096);
2349 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2351 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); }
2356 vector<string> pieces = splitWhiteSpace(rest);
2357 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); }
2361 catch(exception& e) {
2362 errorOut(e, "MothurOut", "readAccnos");
2366 //**********************************************************************************************************************
2367 int MothurOut::readAccnos(string accnosfile, vector<string>& names){
2371 openInputFile(accnosfile, in);
2378 if (control_pressed) { break; }
2380 in.read(buffer, 4096);
2381 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2383 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
2388 vector<string> pieces = splitWhiteSpace(rest);
2389 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
2394 catch(exception& e) {
2395 errorOut(e, "MothurOut", "readAccnos");
2399 /***********************************************************************/
2401 int MothurOut::getNumNames(string names){
2407 for(int i=0;i<names.size();i++){
2408 if(names[i] == ','){
2416 catch(exception& e) {
2417 errorOut(e, "MothurOut", "getNumNames");
2421 /***********************************************************************/
2423 int MothurOut::getNumChar(string line, char c){
2428 for(int i=0;i<line.size();i++){
2437 catch(exception& e) {
2438 errorOut(e, "MothurOut", "getNumChar");
2442 //**********************************************************************************************************************
2443 bool MothurOut::isSubset(vector<string> bigset, vector<string> subset) {
2447 if (subset.size() > bigset.size()) { return false; }
2449 //check if each guy in suset is also in bigset
2450 for (int i = 0; i < subset.size(); i++) {
2452 for (int j = 0; j < bigset.size(); j++) {
2453 if (subset[i] == bigset[j]) { match = true; break; }
2456 //you have a guy in subset that had no match in bigset
2457 if (match == false) { return false; }
2463 catch(exception& e) {
2464 errorOut(e, "MothurOut", "isSubset");
2468 /***********************************************************************/
2469 int MothurOut::mothurRemove(string filename){
2471 filename = getFullPathName(filename);
2472 int error = remove(filename.c_str());
2474 // if (errno != ENOENT) { //ENOENT == file does not exist
2475 // string message = "Error deleting file " + filename;
2476 // perror(message.c_str());
2481 catch(exception& e) {
2482 errorOut(e, "MothurOut", "mothurRemove");
2486 /***********************************************************************/
2487 bool MothurOut::mothurConvert(string item, int& num){
2491 if (isNumeric1(item)) {
2496 mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine();
2497 commandInputsConvertError = true;
2502 catch(exception& e) {
2503 errorOut(e, "MothurOut", "mothurConvert");
2507 /***********************************************************************/
2508 bool MothurOut::mothurConvert(string item, intDist& num){
2512 if (isNumeric1(item)) {
2517 mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine();
2518 commandInputsConvertError = true;
2523 catch(exception& e) {
2524 errorOut(e, "MothurOut", "mothurConvert");
2529 /***********************************************************************/
2530 bool MothurOut::isNumeric1(string stringToCheck){
2532 bool numeric = false;
2534 if(stringToCheck.find_first_not_of("0123456789.-") == string::npos) { numeric = true; }
2538 catch(exception& e) {
2539 errorOut(e, "MothurOut", "isNumeric1");
2544 /***********************************************************************/
2545 bool MothurOut::mothurConvert(string item, float& num){
2549 if (isNumeric1(item)) {
2554 mothurOut("[ERROR]: cannot convert " + item + " to a float."); mothurOutEndLine();
2555 commandInputsConvertError = true;
2560 catch(exception& e) {
2561 errorOut(e, "MothurOut", "mothurConvert");
2565 /***********************************************************************/
2566 bool MothurOut::mothurConvert(string item, double& num){
2570 if (isNumeric1(item)) {
2575 mothurOut("[ERROR]: cannot convert " + item + " to a double."); mothurOutEndLine();
2576 commandInputsConvertError = true;
2581 catch(exception& e) {
2582 errorOut(e, "MothurOut", "mothurConvert");
2586 /**************************************************************************************************/
2588 vector<vector<double> > MothurOut::binomial(int maxOrder){
2590 vector<vector<double> > binomial(maxOrder+1);
2592 for(int i=0;i<=maxOrder;i++){
2593 binomial[i].resize(maxOrder+1);
2602 for(int i=2;i<=maxOrder;i++){
2606 for(int i=2;i<=maxOrder;i++){
2607 for(int j=1;j<=maxOrder;j++){
2608 if(i==j){ binomial[i][j]=1; }
2609 if(j>i) { binomial[i][j]=0; }
2610 else { binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j]; }
2617 catch(exception& e) {
2618 errorOut(e, "MothurOut", "binomial");
2622 /**************************************************************************************************/
2623 unsigned int MothurOut::fromBase36(string base36){
2625 unsigned int num = 0;
2627 map<char, int> converts;
2692 while (i < base36.length()) {
2694 num = 36 * num + converts[c];
2701 catch(exception& e) {
2702 errorOut(e, "MothurOut", "fromBase36");
2706 /***********************************************************************/
2708 int MothurOut::factorial(int num){
2712 for (int i = 1; i <= num; i++) {
2718 catch(exception& e) {
2719 errorOut(e, "MothurOut", "factorial");
2723 /***********************************************************************/
2725 int MothurOut::getNumSeqs(ifstream& file){
2727 int numSeqs = count(istreambuf_iterator<char>(file),istreambuf_iterator<char>(), '>');
2731 catch(exception& e) {
2732 errorOut(e, "MothurOut", "getNumSeqs");
2736 /***********************************************************************/
2737 void MothurOut::getNumSeqs(ifstream& file, int& numSeqs){
2742 input = getline(file);
2743 if (input.length() != 0) {
2744 if(input[0] == '>'){ numSeqs++; }
2748 catch(exception& e) {
2749 errorOut(e, "MothurOut", "getNumSeqs");
2753 /***********************************************************************/
2755 //This function parses the estimator options and puts them in a vector
2756 void MothurOut::splitAtChar(string& estim, vector<string>& container, char symbol) {
2759 if (symbol == '-') { splitAtDash(estim, container); return; }
2761 string individual = "";
2762 int estimLength = estim.size();
2763 for(int i=0;i<estimLength;i++){
2764 if(estim[i] == symbol){
2765 container.push_back(individual);
2769 individual += estim[i];
2772 container.push_back(individual);
2775 catch(exception& e) {
2776 errorOut(e, "MothurOut", "splitAtChar");
2781 /***********************************************************************/
2783 //This function parses the estimator options and puts them in a vector
2784 void MothurOut::splitAtDash(string& estim, vector<string>& container) {
2786 string individual = "";
2787 int estimLength = estim.size();
2788 bool prevEscape = false;
2789 /*for(int i=0;i<estimLength;i++){
2791 individual += estim[i];
2795 if(estim[i] == '\\'){
2798 else if(estim[i] == '-'){
2799 container.push_back(individual);
2804 individual += estim[i];
2811 for(int i=0;i<estimLength;i++){
2812 if(estim[i] == '-'){
2813 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
2815 container.push_back(individual);
2818 }else if(estim[i] == '\\'){
2819 if (i < estimLength-1) {
2820 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
2821 else { individual += estim[i]; prevEscape = false; } //if no, add in
2822 }else { individual += estim[i]; }
2824 individual += estim[i];
2830 container.push_back(individual);
2832 catch(exception& e) {
2833 errorOut(e, "MothurOut", "splitAtDash");
2838 /***********************************************************************/
2839 //This function parses the label options and puts them in a set
2840 void MothurOut::splitAtDash(string& estim, set<string>& container) {
2842 string individual = "";
2843 int estimLength = estim.size();
2844 bool prevEscape = false;
2846 for(int i=0;i<estimLength;i++){
2848 individual += estim[i];
2852 if(estim[i] == '\\'){
2855 else if(estim[i] == '-'){
2856 container.insert(individual);
2861 individual += estim[i];
2868 for(int i=0;i<estimLength;i++){
2869 if(estim[i] == '-'){
2870 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
2872 container.insert(individual);
2875 }else if(estim[i] == '\\'){
2876 if (i < estimLength-1) {
2877 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
2878 else { individual += estim[i]; prevEscape = false; } //if no, add in
2879 }else { individual += estim[i]; }
2881 individual += estim[i];
2884 container.insert(individual);
2887 catch(exception& e) {
2888 errorOut(e, "MothurOut", "splitAtDash");
2892 /***********************************************************************/
2893 //This function parses the line options and puts them in a set
2894 void MothurOut::splitAtDash(string& estim, set<int>& container) {
2896 string individual = "";
2898 int estimLength = estim.size();
2899 bool prevEscape = false;
2901 for(int i=0;i<estimLength;i++){
2903 individual += estim[i];
2907 if(estim[i] == '\\'){
2910 else if(estim[i] == '-'){
2911 convert(individual, lineNum); //convert the string to int
2912 container.insert(lineNum);
2917 individual += estim[i];
2923 for(int i=0;i<estimLength;i++){
2924 if(estim[i] == '-'){
2925 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
2927 convert(individual, lineNum); //convert the string to int
2928 container.insert(lineNum);
2931 }else if(estim[i] == '\\'){
2932 if (i < estimLength-1) {
2933 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
2934 else { individual += estim[i]; prevEscape = false; } //if no, add in
2935 }else { individual += estim[i]; }
2937 individual += estim[i];
2941 convert(individual, lineNum); //convert the string to int
2942 container.insert(lineNum);
2944 catch(exception& e) {
2945 errorOut(e, "MothurOut", "splitAtDash");
2950 /***********************************************************************/
2951 string MothurOut::makeList(vector<string>& names) {
2955 if (names.size() == 0) { return list; }
2957 for (int i = 0; i < names.size()-1; i++) { list += names[i] + ","; }
2960 list += names[names.size()-1];
2964 catch(exception& e) {
2965 errorOut(e, "MothurOut", "makeList");
2970 /***********************************************************************/
2971 //This function parses the a string and puts peices in a vector
2972 void MothurOut::splitAtComma(string& estim, vector<string>& container) {
2974 string individual = "";
2975 int estimLength = estim.size();
2976 for(int i=0;i<estimLength;i++){
2977 if(estim[i] == ','){
2978 container.push_back(individual);
2982 individual += estim[i];
2985 container.push_back(individual);
2990 // string individual;
2992 // while (estim.find_first_of(',') != -1) {
2993 // individual = estim.substr(0,estim.find_first_of(','));
2994 // if ((estim.find_first_of(',')+1) <= estim.length()) { //checks to make sure you don't have comma at end of string
2995 // estim = estim.substr(estim.find_first_of(',')+1, estim.length());
2996 // container.push_back(individual);
3000 // container.push_back(estim);
3002 catch(exception& e) {
3003 errorOut(e, "MothurOut", "splitAtComma");
3007 /***********************************************************************/
3008 //This function splits up the various option parameters
3009 void MothurOut::splitAtChar(string& prefix, string& suffix, char c){
3011 prefix = suffix.substr(0,suffix.find_first_of(c));
3012 if ((suffix.find_first_of(c)+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
3013 suffix = suffix.substr(suffix.find_first_of(c)+1, suffix.length());
3015 while(suffix.at(0) == ' ')
3016 suffix = suffix.substr(1, suffix.length());
3017 }else { suffix = ""; }
3020 catch(exception& e) {
3021 errorOut(e, "MothurOut", "splitAtChar");
3026 /***********************************************************************/
3028 //This function splits up the various option parameters
3029 void MothurOut::splitAtComma(string& prefix, string& suffix){
3031 prefix = suffix.substr(0,suffix.find_first_of(','));
3032 if ((suffix.find_first_of(',')+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
3033 suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
3035 while(suffix.at(0) == ' ')
3036 suffix = suffix.substr(1, suffix.length());
3037 }else { suffix = ""; }
3040 catch(exception& e) {
3041 errorOut(e, "MothurOut", "splitAtComma");
3045 /***********************************************************************/
3047 //This function separates the key value from the option value i.e. dist=96_...
3048 void MothurOut::splitAtEquals(string& key, string& value){
3050 if(value.find_first_of('=') != -1){
3051 key = value.substr(0,value.find_first_of('='));
3052 if ((value.find_first_of('=')+1) <= value.length()) {
3053 value = value.substr(value.find_first_of('=')+1, value.length());
3060 catch(exception& e) {
3061 errorOut(e, "MothurOut", "splitAtEquals");
3066 /**************************************************************************************************/
3068 bool MothurOut::inUsersGroups(string groupname, vector<string> Groups) {
3070 for (int i = 0; i < Groups.size(); i++) {
3071 if (groupname == Groups[i]) { return true; }
3075 catch(exception& e) {
3076 errorOut(e, "MothurOut", "inUsersGroups");
3080 /**************************************************************************************************/
3082 bool MothurOut::inUsersGroups(vector<int> set, vector< vector<int> > sets) {
3084 for (int i = 0; i < sets.size(); i++) {
3085 if (set == sets[i]) { return true; }
3089 catch(exception& e) {
3090 errorOut(e, "MothurOut", "inUsersGroups");
3094 /**************************************************************************************************/
3096 bool MothurOut::inUsersGroups(int groupname, vector<int> Groups) {
3098 for (int i = 0; i < Groups.size(); i++) {
3099 if (groupname == Groups[i]) { return true; }
3103 catch(exception& e) {
3104 errorOut(e, "MothurOut", "inUsersGroups");
3109 /**************************************************************************************************/
3110 //returns true if any of the strings in first vector are in second vector
3111 bool MothurOut::inUsersGroups(vector<string> groupnames, vector<string> Groups) {
3114 for (int i = 0; i < groupnames.size(); i++) {
3115 if (inUsersGroups(groupnames[i], Groups)) { return true; }
3119 catch(exception& e) {
3120 errorOut(e, "MothurOut", "inUsersGroups");
3124 /***********************************************************************/
3125 //this function determines if the user has given us labels that are smaller than the given label.
3126 //if so then it returns true so that the calling function can run the previous valid distance.
3127 //it's a "smart" distance function. It also checks for invalid labels.
3128 bool MothurOut::anyLabelsToProcess(string label, set<string>& userLabels, string errorOff) {
3131 set<string>::iterator it;
3132 vector<float> orderFloat;
3133 map<string, float> userMap; //the conversion process removes trailing 0's which we need to put back
3134 map<string, float>::iterator it2;
3136 bool smaller = false;
3138 //unique is the smallest line
3139 if (label == "unique") { return false; }
3141 if (convertTestFloat(label, labelFloat)) {
3142 convert(label, labelFloat);
3143 }else { //cant convert
3148 //go through users set and make them floats
3149 for(it = userLabels.begin(); it != userLabels.end();) {
3152 if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){
3154 orderFloat.push_back(temp);
3155 userMap[*it] = temp;
3157 }else if (*it == "unique") {
3158 orderFloat.push_back(-1.0);
3159 userMap["unique"] = -1.0;
3162 if (errorOff == "") { mothurOut(*it + " is not a valid label."); mothurOutEndLine(); }
3163 userLabels.erase(it++);
3168 sort(orderFloat.begin(), orderFloat.end());
3170 /*************************************************/
3171 //is this label bigger than any of the users labels
3172 /*************************************************/
3174 //loop through order until you find a label greater than label
3175 for (int i = 0; i < orderFloat.size(); i++) {
3176 if (orderFloat[i] < labelFloat) {
3178 if (orderFloat[i] == -1) {
3179 if (errorOff == "") { mothurOut("Your file does not include the label unique."); mothurOutEndLine(); }
3180 userLabels.erase("unique");
3183 if (errorOff == "") { mothurOut("Your file does not include the label "); mothurOutEndLine(); }
3185 for (it2 = userMap.begin(); it2!= userMap.end(); it2++) {
3186 if (it2->second == orderFloat[i]) {
3188 //remove small labels
3189 userLabels.erase(s);
3193 if (errorOff == "") {mothurOut( s + ". I will use the next smallest distance. "); mothurOutEndLine(); }
3195 //since they are sorted once you find a bigger one stop looking
3202 catch(exception& e) {
3203 errorOut(e, "MothurOut", "anyLabelsToProcess");
3208 /**************************************************************************************************/
3209 bool MothurOut::checkReleaseVersion(ifstream& file, string version) {
3214 string line = getline(file);
3216 //before we added this check
3217 if (line[0] != '#') { good = false; }
3220 line = line.substr(1);
3222 vector<string> versionVector;
3223 splitAtChar(version, versionVector, '.');
3225 //check file version
3226 vector<string> linesVector;
3227 splitAtChar(line, linesVector, '.');
3229 if (versionVector.size() != linesVector.size()) { good = false; }
3231 for (int j = 0; j < versionVector.size(); j++) {
3233 convert(versionVector[j], num1);
3234 convert(linesVector[j], num2);
3236 //if mothurs version is newer than this files version, then we want to remake it
3237 if (num1 > num2) { good = false; break; }
3243 if (!good) { file.close(); }
3244 else { file.seekg(0); }
3248 catch(exception& e) {
3249 errorOut(e, "MothurOut", "checkReleaseVersion");
3253 /**************************************************************************************************/
3254 vector<double> MothurOut::getAverages(vector< vector<double> >& dists) {
3256 vector<double> averages; //averages.resize(numComp, 0.0);
3257 for (int i = 0; i < dists[0].size(); i++) { averages.push_back(0.0); }
3259 for (int thisIter = 0; thisIter < dists.size(); thisIter++) {
3260 for (int i = 0; i < dists[thisIter].size(); i++) {
3261 averages[i] += dists[thisIter][i];
3266 for (int i = 0; i < averages.size(); i++) { averages[i] /= (double) dists.size(); }
3270 catch(exception& e) {
3271 errorOut(e, "MothurOut", "getAverages");
3275 /**************************************************************************************************/
3276 double MothurOut::getAverage(vector<double> dists) {
3280 for (int i = 0; i < dists.size(); i++) {
3281 average += dists[i];
3285 average /= (double) dists.size();
3289 catch(exception& e) {
3290 errorOut(e, "MothurOut", "getAverage");
3295 /**************************************************************************************************/
3296 vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists) {
3299 vector<double> averages = getAverages(dists);
3301 //find standard deviation
3302 vector<double> stdDev; //stdDev.resize(numComp, 0.0);
3303 for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
3305 for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3306 for (int j = 0; j < dists[thisIter].size(); j++) {
3307 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
3310 for (int i = 0; i < stdDev.size(); i++) {
3311 stdDev[i] /= (double) dists.size();
3312 stdDev[i] = sqrt(stdDev[i]);
3317 catch(exception& e) {
3318 errorOut(e, "MothurOut", "getAverages");
3322 /**************************************************************************************************/
3323 vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists, vector<double>& averages) {
3325 //find standard deviation
3326 vector<double> stdDev; //stdDev.resize(numComp, 0.0);
3327 for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
3329 for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3330 for (int j = 0; j < dists[thisIter].size(); j++) {
3331 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
3334 for (int i = 0; i < stdDev.size(); i++) {
3335 stdDev[i] /= (double) dists.size();
3336 stdDev[i] = sqrt(stdDev[i]);
3341 catch(exception& e) {
3342 errorOut(e, "MothurOut", "getAverages");
3346 /**************************************************************************************************/
3347 vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals, string mode) {
3350 vector< vector<seqDist> > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
3351 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3352 //calcAverages[i].resize(calcDistsTotals[0][i].size());
3353 vector<seqDist> temp;
3354 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3356 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3357 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3358 tempDist.dist = 0.0;
3359 temp.push_back(tempDist);
3361 calcAverages.push_back(temp);
3364 if (mode == "average") {
3365 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
3366 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
3367 for (int j = 0; j < calcAverages[i].size(); j++) {
3368 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
3373 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
3374 for (int j = 0; j < calcAverages[i].size(); j++) {
3375 calcAverages[i][j].dist /= (float) calcDistsTotals.size();
3378 }else { //find median
3379 for (int i = 0; i < calcAverages.size(); i++) { //for each calc
3380 for (int j = 0; j < calcAverages[i].size(); j++) { //for each comparison
3381 vector<double> dists;
3382 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //for each subsample
3383 dists.push_back(calcDistsTotals[thisIter][i][j].dist);
3385 sort(dists.begin(), dists.end());
3386 calcAverages[i][j].dist = dists[(calcDistsTotals.size()/2)];
3391 return calcAverages;
3393 catch(exception& e) {
3394 errorOut(e, "MothurOut", "getAverages");
3398 /**************************************************************************************************/
3399 vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals) {
3402 vector< vector<seqDist> > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
3403 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3404 //calcAverages[i].resize(calcDistsTotals[0][i].size());
3405 vector<seqDist> temp;
3406 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3408 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3409 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3410 tempDist.dist = 0.0;
3411 temp.push_back(tempDist);
3413 calcAverages.push_back(temp);
3417 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
3418 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
3419 for (int j = 0; j < calcAverages[i].size(); j++) {
3420 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
3425 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
3426 for (int j = 0; j < calcAverages[i].size(); j++) {
3427 calcAverages[i][j].dist /= (float) calcDistsTotals.size();
3431 return calcAverages;
3433 catch(exception& e) {
3434 errorOut(e, "MothurOut", "getAverages");
3438 /**************************************************************************************************/
3439 vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals) {
3442 vector< vector<seqDist> > calcAverages = getAverages(calcDistsTotals);
3444 //find standard deviation
3445 vector< vector<seqDist> > stdDev;
3446 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3447 vector<seqDist> temp;
3448 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3450 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3451 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3452 tempDist.dist = 0.0;
3453 temp.push_back(tempDist);
3455 stdDev.push_back(temp);
3458 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3459 for (int i = 0; i < stdDev.size(); i++) {
3460 for (int j = 0; j < stdDev[i].size(); j++) {
3461 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
3466 for (int i = 0; i < stdDev.size(); i++) { //finds average.
3467 for (int j = 0; j < stdDev[i].size(); j++) {
3468 stdDev[i][j].dist /= (float) calcDistsTotals.size();
3469 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
3475 catch(exception& e) {
3476 errorOut(e, "MothurOut", "getAverages");
3480 /**************************************************************************************************/
3481 vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals, vector< vector<seqDist> >& calcAverages) {
3483 //find standard deviation
3484 vector< vector<seqDist> > stdDev;
3485 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3486 vector<seqDist> temp;
3487 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3489 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3490 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3491 tempDist.dist = 0.0;
3492 temp.push_back(tempDist);
3494 stdDev.push_back(temp);
3497 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3498 for (int i = 0; i < stdDev.size(); i++) {
3499 for (int j = 0; j < stdDev[i].size(); j++) {
3500 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
3505 for (int i = 0; i < stdDev.size(); i++) { //finds average.
3506 for (int j = 0; j < stdDev[i].size(); j++) {
3507 stdDev[i][j].dist /= (float) calcDistsTotals.size();
3508 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
3514 catch(exception& e) {
3515 errorOut(e, "MothurOut", "getAverages");
3520 /**************************************************************************************************/
3521 bool MothurOut::isContainingOnlyDigits(string input) {
3524 //are you a digit in ascii code
3525 for (int i = 0;i < input.length(); i++){
3526 if( input[i]>47 && input[i]<58){}
3527 else { return false; }
3532 catch(exception& e) {
3533 errorOut(e, "MothurOut", "isContainingOnlyDigits");
3537 /**************************************************************************************************/
3538 int MothurOut::removeConfidences(string& tax) {
3544 while (tax.find_first_of(';') != -1) {
3546 if (control_pressed) { return 0; }
3549 taxon = tax.substr(0,tax.find_first_of(';'));
3551 int pos = taxon.find_last_of('(');
3554 int pos2 = taxon.find_last_of(')');
3556 string confidenceScore = taxon.substr(pos+1, (pos2-(pos+1)));
3557 if (isNumeric1(confidenceScore)) {
3558 taxon = taxon.substr(0, pos); //rip off confidence
3564 tax = tax.substr(tax.find_first_of(';')+1, tax.length());
3572 catch(exception& e) {
3573 errorOut(e, "MothurOut", "removeConfidences");
3577 /**************************************************************************************************/
3578 string MothurOut::removeQuotes(string tax) {
3584 for (int i = 0; i < tax.length(); i++) {
3586 if (control_pressed) { return newTax; }
3588 if ((tax[i] != '\'') && (tax[i] != '\"')) { newTax += tax[i]; }
3594 catch(exception& e) {
3595 errorOut(e, "MothurOut", "removeQuotes");
3599 /**************************************************************************************************/
3600 // function for calculating standard deviation
3601 double MothurOut::getStandardDeviation(vector<int>& featureVector){
3605 for (int i = 0; i < featureVector.size(); i++) { average += featureVector[i]; }
3606 average /= (double) featureVector.size();
3608 //find standard deviation
3610 for (int i = 0; i < featureVector.size(); i++) { //compute the difference of each dist from the mean, and square the result of each
3611 stdDev += ((featureVector[i] - average) * (featureVector[i] - average));
3614 stdDev /= (double) featureVector.size();
3615 stdDev = sqrt(stdDev);
3619 catch(exception& e) {
3620 errorOut(e, "MothurOut", "getStandardDeviation");
3624 /**************************************************************************************************/