5 * Created by westcott on 2/25/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "mothurout.h"
13 /******************************************************/
14 MothurOut* MothurOut::getInstance() {
15 if( _uniqueInstance == 0) {
16 _uniqueInstance = new MothurOut();
18 return _uniqueInstance;
20 /*********************************************************************************************/
21 set<string> MothurOut::getCurrentTypes() {
25 types.insert("fasta");
26 types.insert("summary");
27 types.insert("accnos");
28 types.insert("column");
29 types.insert("design");
30 types.insert("group");
33 types.insert("oligos");
34 types.insert("order");
35 types.insert("ordergroup");
36 types.insert("phylip");
37 types.insert("qfile");
38 types.insert("relabund");
39 types.insert("sabund");
40 types.insert("rabund");
42 types.insert("shared");
43 types.insert("taxonomy");
47 types.insert("count");
48 types.insert("processors");
53 errorOut(e, "MothurOut", "getCurrentTypes");
57 /*********************************************************************************************/
58 void MothurOut::printCurrentFiles() {
62 if (accnosfile != "") { mothurOut("accnos=" + accnosfile); mothurOutEndLine(); }
63 if (columnfile != "") { mothurOut("column=" + columnfile); mothurOutEndLine(); }
64 if (designfile != "") { mothurOut("design=" + designfile); mothurOutEndLine(); }
65 if (fastafile != "") { mothurOut("fasta=" + fastafile); mothurOutEndLine(); }
66 if (groupfile != "") { mothurOut("group=" + groupfile); mothurOutEndLine(); }
67 if (listfile != "") { mothurOut("list=" + listfile); mothurOutEndLine(); }
68 if (namefile != "") { mothurOut("name=" + namefile); mothurOutEndLine(); }
69 if (oligosfile != "") { mothurOut("oligos=" + oligosfile); mothurOutEndLine(); }
70 if (orderfile != "") { mothurOut("order=" + orderfile); mothurOutEndLine(); }
71 if (ordergroupfile != "") { mothurOut("ordergroup=" + ordergroupfile); mothurOutEndLine(); }
72 if (phylipfile != "") { mothurOut("phylip=" + phylipfile); mothurOutEndLine(); }
73 if (qualfile != "") { mothurOut("qfile=" + qualfile); mothurOutEndLine(); }
74 if (rabundfile != "") { mothurOut("rabund=" + rabundfile); mothurOutEndLine(); }
75 if (relabundfile != "") { mothurOut("relabund=" + relabundfile); mothurOutEndLine(); }
76 if (sabundfile != "") { mothurOut("sabund=" + sabundfile); mothurOutEndLine(); }
77 if (sfffile != "") { mothurOut("sff=" + sfffile); mothurOutEndLine(); }
78 if (sharedfile != "") { mothurOut("shared=" + sharedfile); mothurOutEndLine(); }
79 if (taxonomyfile != "") { mothurOut("taxonomy=" + taxonomyfile); mothurOutEndLine(); }
80 if (treefile != "") { mothurOut("tree=" + treefile); mothurOutEndLine(); }
81 if (flowfile != "") { mothurOut("flow=" + flowfile); mothurOutEndLine(); }
82 if (biomfile != "") { mothurOut("biom=" + biomfile); mothurOutEndLine(); }
83 if (counttablefile != "") { mothurOut("count=" + counttablefile); mothurOutEndLine(); }
84 if (processors != "1") { mothurOut("processors=" + processors); mothurOutEndLine(); }
85 if (summaryfile != "") { mothurOut("summary=" + summaryfile); mothurOutEndLine(); }
89 errorOut(e, "MothurOut", "printCurrentFiles");
93 /*********************************************************************************************/
94 bool MothurOut::hasCurrentFiles() {
96 bool hasCurrent = false;
98 if (accnosfile != "") { return true; }
99 if (columnfile != "") { return true; }
100 if (designfile != "") { return true; }
101 if (fastafile != "") { return true; }
102 if (groupfile != "") { return true; }
103 if (listfile != "") { return true; }
104 if (namefile != "") { return true; }
105 if (oligosfile != "") { return true; }
106 if (orderfile != "") { return true; }
107 if (ordergroupfile != "") { return true; }
108 if (phylipfile != "") { return true; }
109 if (qualfile != "") { return true; }
110 if (rabundfile != "") { return true; }
111 if (relabundfile != "") { return true; }
112 if (sabundfile != "") { return true; }
113 if (sfffile != "") { return true; }
114 if (sharedfile != "") { return true; }
115 if (taxonomyfile != "") { return true; }
116 if (treefile != "") { return true; }
117 if (flowfile != "") { return true; }
118 if (biomfile != "") { return true; }
119 if (counttablefile != "") { return true; }
120 if (summaryfile != "") { return true; }
121 if (processors != "1") { return true; }
126 catch(exception& e) {
127 errorOut(e, "MothurOut", "hasCurrentFiles");
132 /*********************************************************************************************/
133 void MothurOut::clearCurrentFiles() {
160 catch(exception& e) {
161 errorOut(e, "MothurOut", "clearCurrentFiles");
165 /***********************************************************************/
166 string MothurOut::findProgramPath(string programName){
169 string envPath = getenv("PATH");
172 //delimiting path char
174 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
180 //break apart path variable by ':'
182 splitAtChar(envPath, dirs, delim);
184 if (debug) { mothurOut("[DEBUG]: dir's in path: \n"); }
186 //get path related to mothur
187 for (int i = 0; i < dirs.size(); i++) {
189 if (debug) { mothurOut("[DEBUG]: " + dirs[i] + "\n"); }
191 //to lower so we can find it
192 string tempLower = "";
193 for (int j = 0; j < dirs[i].length(); j++) { tempLower += tolower(dirs[i][j]); }
195 //is this mothurs path?
196 if (tempLower.find(programName) != -1) { pPath = dirs[i]; break; }
199 if (debug) { mothurOut("[DEBUG]: programPath = " + pPath + "\n"); }
202 //add programName so it looks like what argv would look like
203 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
204 pPath += "/" + programName;
206 pPath += "\\" + programName;
209 //okay programName is not in the path, so the folder programName is in must be in the path
210 //lets find out which one
212 //get path related to the program
213 for (int i = 0; i < dirs.size(); i++) {
215 if (debug) { mothurOut("[DEBUG]: looking in " + dirs[i] + " for " + programName + " \n"); }
217 //is this the programs path?
219 string tempIn = dirs[i];
220 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
221 tempIn += "/" + programName;
223 tempIn += "\\" + programName;
225 openInputFile(tempIn, in, "");
227 //if this file exists
228 if (in) { in.close(); pPath = tempIn; if (debug) { mothurOut("[DEBUG]: found it, programPath = " + pPath + "\n"); } break; }
235 catch(exception& e) {
236 errorOut(e, "MothurOut", "findProgramPath");
240 /*********************************************************************************************/
241 void MothurOut::setFileName(string filename) {
243 logFileName = filename;
247 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
249 if (pid == 0) { //only one process should output to screen
252 openOutputFile(filename, out);
258 catch(exception& e) {
259 errorOut(e, "MothurOut", "setFileName");
263 /*********************************************************************************************/
264 void MothurOut::setDefaultPath(string pathname) {
267 //add / to name if needed
268 string lastChar = pathname.substr(pathname.length()-1);
269 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
270 if (lastChar != "/") { pathname += "/"; }
272 if (lastChar != "\\") { pathname += "\\"; }
275 defaultPath = pathname;
278 catch(exception& e) {
279 errorOut(e, "MothurOut", "setDefaultPath");
283 /*********************************************************************************************/
284 void MothurOut::setOutputDir(string pathname) {
286 outputDir = pathname;
288 catch(exception& e) {
289 errorOut(e, "MothurOut", "setOutputDir");
293 /*********************************************************************************************/
294 void MothurOut::closeLog() {
299 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
301 if (pid == 0) { //only one process should output to screen
310 catch(exception& e) {
311 errorOut(e, "MothurOut", "closeLog");
316 /*********************************************************************************************/
317 MothurOut::~MothurOut() {
322 catch(exception& e) {
323 errorOut(e, "MothurOut", "MothurOut");
327 /*********************************************************************************************/
328 void MothurOut::mothurOut(string output) {
333 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
335 if (pid == 0) { //only one process should output to screen
345 catch(exception& e) {
346 errorOut(e, "MothurOut", "MothurOut");
350 /*********************************************************************************************/
351 void MothurOut::mothurOutJustToScreen(string output) {
356 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
358 if (pid == 0) { //only one process should output to screen
366 catch(exception& e) {
367 errorOut(e, "MothurOut", "MothurOut");
371 /*********************************************************************************************/
372 void MothurOut::mothurOutEndLine() {
376 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
378 if (pid == 0) { //only one process should output to screen
388 catch(exception& e) {
389 errorOut(e, "MothurOut", "MothurOutEndLine");
393 /*********************************************************************************************/
394 void MothurOut::mothurOut(string output, ofstream& outputFile) {
399 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
401 if (pid == 0) { //only one process should output to screen
406 outputFile << output;
414 catch(exception& e) {
415 errorOut(e, "MothurOut", "MothurOut");
419 /*********************************************************************************************/
420 void MothurOut::mothurOutEndLine(ofstream& outputFile) {
424 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
426 if (pid == 0) { //only one process should output to screen
437 catch(exception& e) {
438 errorOut(e, "MothurOut", "MothurOutEndLine");
442 /*********************************************************************************************/
443 void MothurOut::mothurOutJustToLog(string output) {
447 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
449 if (pid == 0) { //only one process should output to screen
458 catch(exception& e) {
459 errorOut(e, "MothurOut", "MothurOutJustToLog");
463 /*********************************************************************************************/
464 void MothurOut::errorOut(exception& e, string object, string function) {
466 //mem_usage(vm, rss);
468 string errorType = toString(e.what());
470 int pos = errorType.find("bad_alloc");
471 mothurOut("[ERROR]: ");
472 mothurOut(errorType);
474 if (pos == string::npos) { //not bad_alloc
475 mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
478 if (object == "cluster"){
479 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. There are two common causes for this, file size and format.\n\nFile Size:\nThe cluster command loads your distance matrix into RAM, and your distance file is most likely too large to fit in RAM. There are two options to help with this. The first is to use a cutoff. By using a cutoff mothur will only load distances that are below the cutoff. If that is still not enough, there is a command called cluster.split, http://www.mothur.org/wiki/cluster.split which divides the distance matrix, and clusters the smaller pieces separately. You may also be able to reduce the size of the original distance matrix by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. \n\nWrong Format:\nThis error can be caused by trying to read a column formatted distance matrix using the phylip parameter. By default, the dist.seqs command generates a column formatted distance matrix. To make a phylip formatted matrix set the dist.seqs command parameter output to lt. \n\nIf you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
480 }else if (object == "shhh.flows"){
481 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. The shhh.flows command is very memory intensive. This error is most commonly caused by trying to process a dataset too large, using multiple processors, or failing to run trim.flows before shhh.flows. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Running trim.flows with an oligos file, and then shhh.flows with the file option may also resolve the issue. If for some reason you are unable to run shhh.flows with your data, a good alternative is to use the trim.seqs command using a 50-bp sliding window and to trim the sequence when the average quality score over that window drops below 35. Our results suggest that the sequencing error rates by this method are very good, but not quite as good as by shhh.flows and that the resulting sequences tend to be a bit shorter. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry. ");
483 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. This is most commonly caused by trying to process a dataset too large, using multiple processors, or a file format issue. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Also, you may be able to reduce the size of your dataset by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
487 /*********************************************************************************************/
488 //The following was originally from http://stackoverflow.com/questions/669438/how-to-get-memory-usage-at-run-time-in-c
489 // process_mem_usage(double &, double &) - takes two doubles by reference,
490 // attempts to read the system-dependent data for a process' virtual memory
491 // size and resident set size, and return the results in KB.
493 // On failure, returns 0.0, 0.0
494 int MothurOut::mem_usage(double& vm_usage, double& resident_set) {
495 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
500 // 'file' stat seems to give the most reliable results
502 ifstream stat_stream("/proc/self/stat",ios_base::in);
504 // dummy vars for leading entries in stat that we don't care about
506 string pid, comm, state, ppid, pgrp, session, tty_nr;
507 string tpgid, flags, minflt, cminflt, majflt, cmajflt;
508 string utime, stime, cutime, cstime, priority, nice;
509 string O, itrealvalue, starttime;
511 // the two fields we want
516 stat_stream >> pid >> comm >> state >> ppid >> pgrp >> session >> tty_nr
517 >> tpgid >> flags >> minflt >> cminflt >> majflt >> cmajflt
518 >> utime >> stime >> cutime >> cstime >> priority >> nice
519 >> O >> itrealvalue >> starttime >> vsize >> rss; // don't care about the rest
521 long page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages
522 vm_usage = vsize / 1024.0;
523 resident_set = rss * page_size_kb;
525 mothurOut("Memory Usage: vm = " + toString(vm_usage) + " rss = " + toString(resident_set) + "\n");
529 /* //windows memory usage
530 // Get the list of process identifiers.
531 DWORD aProcesses[1024], cbNeeded, cProcesses;
533 if ( !EnumProcesses( aProcesses, sizeof(aProcesses), &cbNeeded ) ){ return 1; }
535 // Calculate how many process identifiers were returned.
536 cProcesses = cbNeeded / sizeof(DWORD);
538 // Print the memory usage for each process
539 for (int i = 0; i < cProcesses; i++ ) {
540 DWORD processID = aProcesses[i];
542 PROCESS_MEMORY_COUNTERS pmc;
544 HANDLE hProcess = OpenProcess((PROCESS_QUERY_INFORMATION | PROCESS_VM_READ), FALSE, processID);
546 // Print the process identifier.
547 printf( "\nProcess ID: %u\n", processID);
549 if (NULL != hProcess) {
551 if ( GetProcessMemoryInfo( hProcess, &pmc, sizeof(pmc)) ) {
552 printf( "\tPageFaultCount: 0x%08X\n", pmc.PageFaultCount );
553 printf( "\tPeakWorkingSetSize: 0x%08X\n", pmc.PeakWorkingSetSize );
554 printf( "\tWorkingSetSize: 0x%08X\n", pmc.WorkingSetSize );
555 printf( "\tQuotaPeakPagedPoolUsage: 0x%08X\n", pmc.QuotaPeakPagedPoolUsage );
556 printf( "\tQuotaPagedPoolUsage: 0x%08X\n", pmc.QuotaPagedPoolUsage );
557 printf( "\tQuotaPeakNonPagedPoolUsage: 0x%08X\n", pmc.QuotaPeakNonPagedPoolUsage );
558 printf( "\tQuotaNonPagedPoolUsage: 0x%08X\n", pmc.QuotaNonPagedPoolUsage );
559 printf( "\tPagefileUsage: 0x%08X\n", pmc.PagefileUsage );
560 printf( "\tPeakPagefileUsage: 0x%08X\n", pmc.PeakPagefileUsage );
562 CloseHandle(hProcess);
572 /***********************************************************************/
573 int MothurOut::openOutputFileAppend(string fileName, ofstream& fileHandle){
575 fileName = getFullPathName(fileName);
577 fileHandle.open(fileName.c_str(), ios::app);
579 mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
586 catch(exception& e) {
587 errorOut(e, "MothurOut", "openOutputFileAppend");
591 /***********************************************************************/
592 int MothurOut::openOutputFileBinaryAppend(string fileName, ofstream& fileHandle){
594 fileName = getFullPathName(fileName);
596 fileHandle.open(fileName.c_str(), ios::app | ios::binary);
598 mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
605 catch(exception& e) {
606 errorOut(e, "MothurOut", "openOutputFileAppend");
611 /***********************************************************************/
612 void MothurOut::gobble(istream& f){
616 while(isspace(d=f.get())) { ;}
617 if(!f.eof()) { f.putback(d); }
619 catch(exception& e) {
620 errorOut(e, "MothurOut", "gobble");
624 /***********************************************************************/
625 void MothurOut::gobble(istringstream& f){
628 while(isspace(d=f.get())) {;}
629 if(!f.eof()) { f.putback(d); }
631 catch(exception& e) {
632 errorOut(e, "MothurOut", "gobble");
637 /***********************************************************************/
639 string MothurOut::getline(istringstream& fileHandle) {
644 while (!fileHandle.eof()) {
646 char c = fileHandle.get();
648 //are you at the end of the line
649 if ((c == '\n') || (c == '\r') || (c == '\f')){ break; }
656 catch(exception& e) {
657 errorOut(e, "MothurOut", "getline");
661 /***********************************************************************/
663 string MothurOut::getline(ifstream& fileHandle) {
670 char c = fileHandle.get();
672 //are you at the end of the line
673 if ((c == '\n') || (c == '\r') || (c == '\f') || (c == EOF)){ break; }
680 catch(exception& e) {
681 errorOut(e, "MothurOut", "getline");
685 /***********************************************************************/
687 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
688 #ifdef USE_COMPRESSION
689 inline bool endsWith(string s, const char * suffix){
690 size_t suffixLength = strlen(suffix);
691 return s.size() >= suffixLength && s.substr(s.size() - suffixLength, suffixLength).compare(suffix) == 0;
696 string MothurOut::getRootName(string longName){
699 string rootName = longName;
701 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
702 #ifdef USE_COMPRESSION
703 if (endsWith(rootName, ".gz") || endsWith(rootName, ".bz2")) {
704 int pos = rootName.find_last_of('.');
705 rootName = rootName.substr(0, pos);
706 cerr << "shortening " << longName << " to " << rootName << "\n";
710 if(rootName.find_last_of(".") != rootName.npos){
711 int pos = rootName.find_last_of('.')+1;
712 rootName = rootName.substr(0, pos);
717 catch(exception& e) {
718 errorOut(e, "MothurOut", "getRootName");
722 /***********************************************************************/
724 string MothurOut::getSimpleName(string longName){
726 string simpleName = longName;
729 found=longName.find_last_of("/\\");
731 if(found != longName.npos){
732 simpleName = longName.substr(found+1);
737 catch(exception& e) {
738 errorOut(e, "MothurOut", "getSimpleName");
743 /***********************************************************************/
745 int MothurOut::getRandomIndex(int highest){
748 int random = (int) ((float)(highest+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
752 catch(exception& e) {
753 errorOut(e, "MothurOut", "getRandomIndex");
758 /**********************************************************************/
760 string MothurOut::getPathName(string longName){
762 string rootPathName = longName;
764 if(longName.find_last_of("/\\") != longName.npos){
765 int pos = longName.find_last_of("/\\")+1;
766 rootPathName = longName.substr(0, pos);
771 catch(exception& e) {
772 errorOut(e, "MothurOut", "getPathName");
777 /***********************************************************************/
779 bool MothurOut::dirCheck(string& dirName){
785 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
790 //add / to name if needed
791 string lastChar = dirName.substr(dirName.length()-1);
792 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
793 if (lastChar != "/") { dirName += "/"; }
795 if (lastChar != "\\") { dirName += "\\"; }
798 //test to make sure directory exists
799 dirName = getFullPathName(dirName);
800 string outTemp = dirName + tag + "temp";
802 out.open(outTemp.c_str(), ios::trunc);
804 mothurOut(dirName + " directory does not exist or is not writable."); mothurOutEndLine();
807 mothurRemove(outTemp);
813 catch(exception& e) {
814 errorOut(e, "MothurOut", "dirCheck");
819 //**********************************************************************************************************************
821 map<string, vector<string> > MothurOut::parseClasses(string classes){
823 map<string, vector<string> > parts;
825 //treatment<Early|Late>-age<young|old>
826 vector<string> pieces; splitAtDash(classes, pieces); // -> treatment<Early|Late>, age<young|old>
828 for (int i = 0; i < pieces.size(); i++) {
829 string category = ""; string value = "";
830 bool foundOpen = false;
831 for (int j = 0; j < pieces[i].length(); j++) {
832 if (control_pressed) { return parts; }
834 if (pieces[i][j] == '<') { foundOpen = true; }
835 else if (pieces[i][j] == '>') { j += pieces[i].length(); }
837 if (!foundOpen) { category += pieces[i][j]; }
838 else { value += pieces[i][j]; }
841 vector<string> values; splitAtChar(value, values, '|');
842 parts[category] = values;
847 catch(exception& e) {
848 errorOut(e, "MothurOut", "parseClasses");
852 /***********************************************************************/
854 string MothurOut::hasPath(string longName){
859 found=longName.find_last_of("~/\\");
861 if(found != longName.npos){
862 path = longName.substr(0, found+1);
867 catch(exception& e) {
868 errorOut(e, "MothurOut", "hasPath");
873 /***********************************************************************/
875 string MothurOut::getExtension(string longName){
877 string extension = "";
879 if(longName.find_last_of('.') != longName.npos){
880 int pos = longName.find_last_of('.');
881 extension = longName.substr(pos, longName.length());
886 catch(exception& e) {
887 errorOut(e, "MothurOut", "getExtension");
891 /***********************************************************************/
892 bool MothurOut::isBlank(string fileName){
895 fileName = getFullPathName(fileName);
898 fileHandle.open(fileName.c_str());
900 mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
903 //check for blank file
905 if (fileHandle.eof()) { fileHandle.close(); return true; }
910 catch(exception& e) {
911 errorOut(e, "MothurOut", "isBlank");
915 /***********************************************************************/
917 string MothurOut::getFullPathName(string fileName){
920 string path = hasPath(fileName);
924 if (path == "") { return fileName; } //its a simple name
925 else { //we need to complete the pathname
926 // ex. ../../../filename
927 // cwd = /user/work/desktop
930 //get current working directory
931 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
933 if (path.find("~") != -1) { //go to home directory
936 char *homepath = NULL;
937 homepath = getenv ("HOME");
938 if ( homepath != NULL) { homeDir = homepath; }
939 else { homeDir = ""; }
941 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
944 if (path.rfind("./") == string::npos) { return fileName; } //already complete name
945 else { newFileName = fileName.substr(fileName.rfind("./")+2); } //save the complete part of the name
947 //char* cwdpath = new char[1024];
949 //cwdpath=getcwd(cwdpath,size);
952 char *cwdpath = NULL;
953 cwdpath = getcwd(NULL, 0); // or _getcwd
954 if ( cwdpath != NULL) { cwd = cwdpath; }
960 if (cwd.length() > 0) { simpleCWD = cwd.substr(1); }
962 //break apart the current working directory
964 while (simpleCWD.find_first_of('/') != string::npos) {
965 string dir = simpleCWD.substr(0,simpleCWD.find_first_of('/'));
966 simpleCWD = simpleCWD.substr(simpleCWD.find_first_of('/')+1, simpleCWD.length());
969 //get last one // ex. ../../../filename = /user/work/desktop/filename
970 dirs.push_back(simpleCWD); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
973 int index = dirs.size()-1;
975 while((pos = path.rfind("./")) != string::npos) { //while you don't have a complete path
976 if (pos == 0) { break; //you are at the end
977 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
978 path = path.substr(0, pos-1);
980 if (index == 0) { break; }
981 }else if (path[(pos-1)] == '/') { //you want the current working dir ./
982 path = path.substr(0, pos);
983 }else if (pos == 1) { break; //you are at the end
984 }else { mothurOut("cannot resolve path for " + fileName + "\n"); return fileName; }
987 for (int i = index; i >= 0; i--) {
988 newFileName = dirs[i] + "/" + newFileName;
991 newFileName = "/" + newFileName;
995 if (path.find("~") != string::npos) { //go to home directory
996 string homeDir = getenv ("HOMEPATH");
997 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
1000 if (path.rfind(".\\") == string::npos) { return fileName; } //already complete name
1001 else { newFileName = fileName.substr(fileName.rfind(".\\")+2); } //save the complete part of the name
1003 char *cwdpath = NULL;
1004 cwdpath = getcwd(NULL, 0); // or _getcwd
1005 if ( cwdpath != NULL) { cwd = cwdpath; }
1008 //break apart the current working directory
1009 vector<string> dirs;
1010 while (cwd.find_first_of('\\') != -1) {
1011 string dir = cwd.substr(0,cwd.find_first_of('\\'));
1012 cwd = cwd.substr(cwd.find_first_of('\\')+1, cwd.length());
1013 dirs.push_back(dir);
1017 dirs.push_back(cwd); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
1019 int index = dirs.size()-1;
1021 while((pos = path.rfind(".\\")) != string::npos) { //while you don't have a complete path
1022 if (pos == 0) { break; //you are at the end
1023 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
1024 path = path.substr(0, pos-1);
1026 if (index == 0) { break; }
1027 }else if (path[(pos-1)] == '\\') { //you want the current working dir ./
1028 path = path.substr(0, pos);
1029 }else if (pos == 1) { break; //you are at the end
1030 }else { mothurOut("cannot resolve path for " + fileName + "\n"); return fileName; }
1033 for (int i = index; i >= 0; i--) {
1034 newFileName = dirs[i] + "\\" + newFileName;
1043 catch(exception& e) {
1044 errorOut(e, "MothurOut", "getFullPathName");
1048 /***********************************************************************/
1050 int MothurOut::openInputFile(string fileName, ifstream& fileHandle, string m){
1052 //get full path name
1053 string completeFileName = getFullPathName(fileName);
1054 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1055 #ifdef USE_COMPRESSION
1056 // check for gzipped or bzipped file
1057 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1058 string tempName = string(tmpnam(0));
1059 mkfifo(tempName.c_str(), 0666);
1060 int fork_result = fork();
1061 if (fork_result < 0) {
1062 cerr << "Error forking.\n";
1064 } else if (fork_result == 0) {
1065 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
1066 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1067 system(command.c_str());
1068 cerr << "Done decompressing " << completeFileName << "\n";
1069 mothurRemove(tempName);
1072 cerr << "waiting on child process " << fork_result << "\n";
1073 completeFileName = tempName;
1078 fileHandle.open(completeFileName.c_str());
1080 //mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1083 //check for blank file
1088 catch(exception& e) {
1089 errorOut(e, "MothurOut", "openInputFile - no Error");
1093 /***********************************************************************/
1095 int MothurOut::openInputFile(string fileName, ifstream& fileHandle){
1098 //get full path name
1099 string completeFileName = getFullPathName(fileName);
1100 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1101 #ifdef USE_COMPRESSION
1102 // check for gzipped or bzipped file
1103 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1104 string tempName = string(tmpnam(0));
1105 mkfifo(tempName.c_str(), 0666);
1106 int fork_result = fork();
1107 if (fork_result < 0) {
1108 cerr << "Error forking.\n";
1110 } else if (fork_result == 0) {
1111 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
1112 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1113 system(command.c_str());
1114 cerr << "Done decompressing " << completeFileName << "\n";
1115 mothurRemove(tempName);
1118 cerr << "waiting on child process " << fork_result << "\n";
1119 completeFileName = tempName;
1125 fileHandle.open(completeFileName.c_str());
1127 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1131 //check for blank file
1133 if (fileHandle.eof()) { mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct."); mothurOutEndLine(); }
1138 catch(exception& e) {
1139 errorOut(e, "MothurOut", "openInputFile");
1143 /***********************************************************************/
1145 int MothurOut::renameFile(string oldName, string newName){
1148 if (oldName == newName) { return 0; }
1151 int exist = openInputFile(newName, inTest, "");
1154 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1155 if (exist == 0) { //you could open it so you want to delete it
1156 string command = "rm " + newName;
1157 system(command.c_str());
1160 string command = "mv " + oldName + " " + newName;
1161 system(command.c_str());
1163 mothurRemove(newName);
1164 int renameOk = rename(oldName.c_str(), newName.c_str());
1169 catch(exception& e) {
1170 errorOut(e, "MothurOut", "renameFile");
1175 /***********************************************************************/
1177 int MothurOut::openOutputFile(string fileName, ofstream& fileHandle){
1180 string completeFileName = getFullPathName(fileName);
1181 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1182 #ifdef USE_COMPRESSION
1183 // check for gzipped file
1184 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1185 string tempName = string(tmpnam(0));
1186 mkfifo(tempName.c_str(), 0666);
1187 cerr << "Compressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1188 int fork_result = fork();
1189 if (fork_result < 0) {
1190 cerr << "Error forking.\n";
1192 } else if (fork_result == 0) {
1193 string command = string(endsWith(completeFileName, ".gz") ? "gzip" : "bzip2") + " -v > " + completeFileName + string(" < ") + tempName;
1194 system(command.c_str());
1197 completeFileName = tempName;
1202 fileHandle.open(completeFileName.c_str(), ios::trunc);
1204 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1211 catch(exception& e) {
1212 errorOut(e, "MothurOut", "openOutputFile");
1217 /***********************************************************************/
1219 int MothurOut::openOutputFileBinary(string fileName, ofstream& fileHandle){
1222 string completeFileName = getFullPathName(fileName);
1223 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1224 #ifdef USE_COMPRESSION
1225 // check for gzipped file
1226 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1227 string tempName = string(tmpnam(0));
1228 mkfifo(tempName.c_str(), 0666);
1229 cerr << "Compressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1230 int fork_result = fork();
1231 if (fork_result < 0) {
1232 cerr << "Error forking.\n";
1234 } else if (fork_result == 0) {
1235 string command = string(endsWith(completeFileName, ".gz") ? "gzip" : "bzip2") + " -v > " + completeFileName + string(" < ") + tempName;
1236 system(command.c_str());
1239 completeFileName = tempName;
1244 fileHandle.open(completeFileName.c_str(), ios::trunc | ios::binary);
1246 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1253 catch(exception& e) {
1254 errorOut(e, "MothurOut", "openOutputFileBinary");
1259 /**************************************************************************************************/
1260 int MothurOut::appendFiles(string temp, string filename) {
1265 //open output file in append mode
1266 openOutputFileAppend(filename, output);
1267 int ableToOpen = openInputFile(temp, input, "no error");
1268 //int ableToOpen = openInputFile(temp, input);
1271 if (ableToOpen == 0) { //you opened it
1274 while (!input.eof()) {
1275 input.read(buffer, 4096);
1276 output.write(buffer, input.gcount());
1277 //count number of lines
1278 for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} }
1287 catch(exception& e) {
1288 errorOut(e, "MothurOut", "appendFiles");
1292 /**************************************************************************************************/
1293 int MothurOut::appendFilesWithoutHeaders(string temp, string filename) {
1298 //open output file in append mode
1299 openOutputFileAppend(filename, output);
1300 int ableToOpen = openInputFile(temp, input, "no error");
1301 //int ableToOpen = openInputFile(temp, input);
1304 if (ableToOpen == 0) { //you opened it
1306 string headers = getline(input); gobble(input);
1307 if (debug) { mothurOut("[DEBUG]: skipping headers " + headers +'\n'); }
1310 while (!input.eof()) {
1311 input.read(buffer, 4096);
1312 output.write(buffer, input.gcount());
1313 //count number of lines
1314 for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} }
1323 catch(exception& e) {
1324 errorOut(e, "MothurOut", "appendFiles");
1328 /**************************************************************************************************/
1329 string MothurOut::sortFile(string distFile, string outputDir){
1332 //if (outputDir == "") { outputDir += hasPath(distFile); }
1333 string outfile = getRootName(distFile) + "sorted.dist";
1336 //if you can, use the unix sort since its been optimized for years
1337 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1338 string command = "sort -n -k +3 " + distFile + " -o " + outfile;
1339 system(command.c_str());
1340 #else //you are stuck with my best attempt...
1341 //windows sort does not have a way to specify a column, only a character in the line
1342 //since we cannot assume that the distance will always be at the the same character location on each line
1343 //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back.
1345 //read in file line by file and put distance first
1346 string tempDistFile = distFile + ".temp";
1349 openInputFile(distFile, input);
1350 openOutputFile(tempDistFile, output);
1352 string firstName, secondName;
1354 while (!input.eof()) {
1355 input >> firstName >> secondName >> dist;
1356 output << dist << '\t' << firstName << '\t' << secondName << endl;
1363 //sort using windows sort
1364 string tempOutfile = outfile + ".temp";
1365 string command = "sort " + tempDistFile + " /O " + tempOutfile;
1366 system(command.c_str());
1368 //read in sorted file and put distance at end again
1371 openInputFile(tempOutfile, input2);
1372 openOutputFile(outfile, output2);
1374 while (!input2.eof()) {
1375 input2 >> dist >> firstName >> secondName;
1376 output2 << firstName << '\t' << secondName << '\t' << dist << endl;
1383 mothurRemove(tempDistFile);
1384 mothurRemove(tempOutfile);
1389 catch(exception& e) {
1390 errorOut(e, "MothurOut", "sortFile");
1394 /**************************************************************************************************/
1395 vector<unsigned long long> MothurOut::setFilePosFasta(string filename, int& num) {
1397 vector<unsigned long long> positions;
1399 //openInputFile(filename, inFASTA);
1400 inFASTA.open(filename.c_str(), ios::binary);
1403 unsigned long long count = 0;
1404 while(!inFASTA.eof()){
1405 //input = getline(inFASTA);
1406 //cout << input << '\t' << inFASTA.tellg() << endl;
1407 //if (input.length() != 0) {
1408 // if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); cout << (pos - input.length() - 1) << endl; }
1410 //gobble(inFASTA); //has to be here since windows line endings are 2 characters and mess up the positions
1411 char c = inFASTA.get(); count++;
1413 positions.push_back(count-1);
1414 if (debug) { mothurOut("[DEBUG]: numSeqs = " + toString(positions.size()) + " count = " + toString(count) + ".\n"); }
1419 num = positions.size();
1420 if (debug) { mothurOut("[DEBUG]: num = " + toString(num) + ".\n"); }
1422 unsigned long long size;
1424 //get num bytes in file
1425 pFile = fopen (filename.c_str(),"rb");
1426 if (pFile==NULL) perror ("Error opening file");
1428 fseek (pFile, 0, SEEK_END);
1433 /*unsigned long long size = positions[(positions.size()-1)];
1435 openInputFile(filename, in);
1440 if(in.eof()) { break; }
1445 if (debug) { mothurOut("[DEBUG]: size = " + toString(size) + ".\n"); }
1447 positions.push_back(size);
1452 catch(exception& e) {
1453 errorOut(e, "MothurOut", "setFilePosFasta");
1457 //**********************************************************************************************************************
1458 vector<consTax> MothurOut::readConsTax(string inputfile){
1461 vector<consTax> taxes;
1464 openInputFile(inputfile, in);
1471 if (control_pressed) { break; }
1473 string otu = ""; string tax = "unknown";
1476 in >> otu >> size >> tax; gobble(in);
1477 consTax temp(otu, tax, size);
1478 taxes.push_back(temp);
1484 catch(exception& e) {
1485 errorOut(e, "MothurOut", "readConsTax");
1489 //**********************************************************************************************************************
1490 int MothurOut::readConsTax(string inputfile, map<string, consTax2>& taxes){
1493 openInputFile(inputfile, in);
1500 if (control_pressed) { break; }
1502 string otu = ""; string tax = "unknown";
1505 in >> otu >> size >> tax; gobble(in);
1506 consTax2 temp(tax, size);
1513 catch(exception& e) {
1514 errorOut(e, "MothurOut", "readConsTax");
1518 /**************************************************************************************************/
1519 vector<unsigned long long> MothurOut::setFilePosEachLine(string filename, int& num) {
1521 filename = getFullPathName(filename);
1523 vector<unsigned long long> positions;
1525 //openInputFile(filename, in);
1526 in.open(filename.c_str(), ios::binary);
1529 unsigned long long count = 0;
1530 positions.push_back(0);
1533 //getline counting reads
1534 char d = in.get(); count++;
1535 while ((d != '\n') && (d != '\r') && (d != '\f') && (d != in.eof())) {
1536 //get next character
1542 d=in.get(); count++;
1543 while(isspace(d) && (d != in.eof())) { d=in.get(); count++;}
1545 positions.push_back(count-1);
1546 //cout << count-1 << endl;
1550 num = positions.size()-1;
1553 unsigned long long size;
1555 //get num bytes in file
1556 pFile = fopen (filename.c_str(),"rb");
1557 if (pFile==NULL) perror ("Error opening file");
1559 fseek (pFile, 0, SEEK_END);
1564 positions[(positions.size()-1)] = size;
1568 catch(exception& e) {
1569 errorOut(e, "MothurOut", "setFilePosEachLine");
1573 /**************************************************************************************************/
1575 vector<unsigned long long> MothurOut::divideFile(string filename, int& proc) {
1577 vector<unsigned long long> filePos;
1578 filePos.push_back(0);
1581 unsigned long long size;
1583 filename = getFullPathName(filename);
1585 //get num bytes in file
1586 pFile = fopen (filename.c_str(),"rb");
1587 if (pFile==NULL) perror ("Error opening file");
1589 fseek (pFile, 0, SEEK_END);
1594 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1596 //estimate file breaks
1597 unsigned long long chunkSize = 0;
1598 chunkSize = size / proc;
1600 //file to small to divide by processors
1601 if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; }
1603 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
1604 for (int i = 0; i < proc; i++) {
1605 unsigned long long spot = (i+1) * chunkSize;
1608 openInputFile(filename, in);
1612 unsigned long long newSpot = spot;
1616 if (c == '>') { in.putback(c); newSpot = in.tellg(); break; }
1617 else if (int(c) == -1) { break; }
1621 //there was not another sequence before the end of the file
1622 unsigned long long sanityPos = in.tellg();
1624 if (sanityPos == -1) { break; }
1625 else { filePos.push_back(newSpot); }
1631 filePos.push_back(size);
1633 //sanity check filePos
1634 for (int i = 0; i < (filePos.size()-1); i++) {
1635 if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; }
1638 proc = (filePos.size() - 1);
1640 mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine();
1642 filePos.push_back(size);
1646 catch(exception& e) {
1647 errorOut(e, "MothurOut", "divideFile");
1651 /**************************************************************************************************/
1653 vector<unsigned long long> MothurOut::divideFilePerLine(string filename, int& proc) {
1655 vector<unsigned long long> filePos;
1656 filePos.push_back(0);
1659 unsigned long long size;
1661 filename = getFullPathName(filename);
1663 //get num bytes in file
1664 pFile = fopen (filename.c_str(),"rb");
1665 if (pFile==NULL) perror ("Error opening file");
1667 fseek (pFile, 0, SEEK_END);
1672 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1674 //estimate file breaks
1675 unsigned long long chunkSize = 0;
1676 chunkSize = size / proc;
1678 //file to small to divide by processors
1679 if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; }
1681 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
1682 for (int i = 0; i < proc; i++) {
1683 unsigned long long spot = (i+1) * chunkSize;
1686 openInputFile(filename, in);
1689 //look for next line break
1690 unsigned long long newSpot = spot;
1694 if ((c == '\n') || (c == '\r') || (c == '\f')) { gobble(in); newSpot = in.tellg(); break; }
1695 else if (int(c) == -1) { break; }
1698 //there was not another line before the end of the file
1699 unsigned long long sanityPos = in.tellg();
1701 if (sanityPos == -1) { break; }
1702 else { filePos.push_back(newSpot); }
1708 filePos.push_back(size);
1710 //sanity check filePos
1711 for (int i = 0; i < (filePos.size()-1); i++) {
1712 if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; }
1715 proc = (filePos.size() - 1);
1717 mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine();
1719 filePos.push_back(size);
1723 catch(exception& e) {
1724 errorOut(e, "MothurOut", "divideFile");
1728 /**************************************************************************************************/
1729 int MothurOut::divideFile(string filename, int& proc, vector<string>& files) {
1732 vector<unsigned long long> filePos = divideFile(filename, proc);
1734 for (int i = 0; i < (filePos.size()-1); i++) {
1738 openInputFile(filename, in);
1739 in.seekg(filePos[i]);
1740 unsigned long long size = filePos[(i+1)] - filePos[i];
1741 char* chunk = new char[size];
1742 in.read(chunk, size);
1746 string fileChunkName = filename + "." + toString(i) + ".tmp";
1748 openOutputFile(fileChunkName, out);
1750 out << chunk << endl;
1755 files.push_back(fileChunkName);
1760 catch(exception& e) {
1761 errorOut(e, "MothurOut", "divideFile");
1765 /***********************************************************************/
1767 bool MothurOut::isTrue(string f){
1770 for (int i = 0; i < f.length(); i++) { f[i] = toupper(f[i]); }
1772 if ((f == "TRUE") || (f == "T")) { return true; }
1773 else { return false; }
1775 catch(exception& e) {
1776 errorOut(e, "MothurOut", "isTrue");
1781 /***********************************************************************/
1783 float MothurOut::roundDist(float dist, int precision){
1785 return int(dist * precision + 0.5)/float(precision);
1787 catch(exception& e) {
1788 errorOut(e, "MothurOut", "roundDist");
1792 /***********************************************************************/
1794 float MothurOut::ceilDist(float dist, int precision){
1796 return int(ceil(dist * precision))/float(precision);
1798 catch(exception& e) {
1799 errorOut(e, "MothurOut", "ceilDist");
1803 /***********************************************************************/
1805 vector<string> MothurOut::splitWhiteSpace(string& rest, char buffer[], int size){
1807 vector<string> pieces;
1809 for (int i = 0; i < size; i++) {
1810 if (!isspace(buffer[i])) { rest += buffer[i]; }
1812 if (rest != "") { pieces.push_back(rest); rest = ""; }
1813 while (i < size) { //gobble white space
1814 if (isspace(buffer[i])) { i++; }
1815 else { rest = buffer[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1822 catch(exception& e) {
1823 errorOut(e, "MothurOut", "splitWhiteSpace");
1827 /***********************************************************************/
1828 vector<string> MothurOut::splitWhiteSpace(string input){
1830 vector<string> pieces;
1833 for (int i = 0; i < input.length(); i++) {
1834 if (!isspace(input[i])) { rest += input[i]; }
1836 if (rest != "") { pieces.push_back(rest); rest = ""; }
1837 while (i < input.length()) { //gobble white space
1838 if (isspace(input[i])) { i++; }
1839 else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1844 if (rest != "") { pieces.push_back(rest); }
1848 catch(exception& e) {
1849 errorOut(e, "MothurOut", "splitWhiteSpace");
1853 /***********************************************************************/
1854 vector<string> MothurOut::splitWhiteSpaceWithQuotes(string input){
1856 vector<string> pieces;
1859 int pos = input.find('\'');
1860 int pos2 = input.find('\"');
1862 if ((pos == string::npos) && (pos2 == string::npos)) { return splitWhiteSpace(input); } //no quotes to worry about
1864 for (int i = 0; i < input.length(); i++) {
1865 if ((input[i] == '\'') || (input[i] == '\"') || (rest == "\'") || (rest == "\"")) { //grab everything til end or next ' or "
1867 for (int j = i+1; j < input.length(); j++) {
1868 if ((input[j] == '\'') || (input[j] == '\"')) { //then quit
1872 }else { rest += input[j]; }
1874 }else if (!isspace(input[i])) { rest += input[i]; }
1876 if (rest != "") { pieces.push_back(rest); rest = ""; }
1877 while (i < input.length()) { //gobble white space
1878 if (isspace(input[i])) { i++; }
1879 else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1884 if (rest != "") { pieces.push_back(rest); }
1888 catch(exception& e) {
1889 errorOut(e, "MothurOut", "splitWhiteSpace");
1893 //**********************************************************************************************************************
1894 int MothurOut::readTax(string namefile, map<string, string>& taxMap) {
1898 openInputFile(namefile, in);
1902 bool pairDone = false;
1903 bool columnOne = true;
1904 string firstCol, secondCol;
1908 if (control_pressed) { break; }
1910 in.read(buffer, 4096);
1911 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1913 for (int i = 0; i < pieces.size(); i++) {
1914 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1915 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1918 checkName(firstCol);
1919 //are there confidence scores, if so remove them
1920 if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
1921 map<string, string>::iterator itTax = taxMap.find(firstCol);
1923 if(itTax == taxMap.end()) {
1924 bool ignore = false;
1925 if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
1927 if (!ignore) { taxMap[firstCol] = secondCol; }
1928 if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
1930 mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique.\n"); error = true;
1939 vector<string> pieces = splitWhiteSpace(rest);
1941 for (int i = 0; i < pieces.size(); i++) {
1942 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1943 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1946 checkName(firstCol);
1947 //are there confidence scores, if so remove them
1948 if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
1949 map<string, string>::iterator itTax = taxMap.find(firstCol);
1951 if(itTax == taxMap.end()) {
1952 bool ignore = false;
1953 if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
1955 if (!ignore) { taxMap[firstCol] = secondCol; }
1956 if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
1958 mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); error = true;
1966 if (error) { control_pressed = true; }
1967 if (debug) { mothurOut("[DEBUG]: numSeqs saved = '" + toString(taxMap.size()) + "'\n"); }
1968 return taxMap.size();
1971 catch(exception& e) {
1972 errorOut(e, "MothurOut", "readTax");
1976 /**********************************************************************************************************************/
1977 int MothurOut::readNames(string namefile, map<string, string>& nameMap, bool redund) {
1981 openInputFile(namefile, in);
1985 bool pairDone = false;
1986 bool columnOne = true;
1987 string firstCol, secondCol;
1990 if (control_pressed) { break; }
1992 in.read(buffer, 4096);
1993 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1995 for (int i = 0; i < pieces.size(); i++) {
1996 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1997 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2000 checkName(firstCol);
2001 checkName(secondCol);
2003 //parse names into vector
2004 vector<string> theseNames;
2005 splitAtComma(secondCol, theseNames);
2006 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
2014 vector<string> pieces = splitWhiteSpace(rest);
2016 for (int i = 0; i < pieces.size(); i++) {
2017 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2018 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2021 checkName(firstCol);
2022 checkName(secondCol);
2024 //parse names into vector
2025 vector<string> theseNames;
2026 splitAtComma(secondCol, theseNames);
2027 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
2033 return nameMap.size();
2036 catch(exception& e) {
2037 errorOut(e, "MothurOut", "readNames");
2041 /**********************************************************************************************************************/
2042 int MothurOut::readNames(string namefile, map<string, string>& nameMap, int flip) {
2046 openInputFile(namefile, in);
2050 bool pairDone = false;
2051 bool columnOne = true;
2052 string firstCol, secondCol;
2055 if (control_pressed) { break; }
2057 in.read(buffer, 4096);
2058 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2060 for (int i = 0; i < pieces.size(); i++) {
2061 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2062 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2065 checkName(firstCol);
2066 checkName(secondCol);
2067 nameMap[secondCol] = firstCol;
2075 vector<string> pieces = splitWhiteSpace(rest);
2077 for (int i = 0; i < pieces.size(); i++) {
2078 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2079 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2082 checkName(firstCol);
2083 checkName(secondCol);
2084 nameMap[secondCol] = firstCol;
2090 return nameMap.size();
2093 catch(exception& e) {
2094 errorOut(e, "MothurOut", "readNames");
2098 /**********************************************************************************************************************/
2099 int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<string, int>& nameCount) {
2101 nameMap.clear(); nameCount.clear();
2104 openInputFile(namefile, in);
2108 bool pairDone = false;
2109 bool columnOne = true;
2110 string firstCol, secondCol;
2113 if (control_pressed) { break; }
2115 in.read(buffer, 4096);
2116 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2118 for (int i = 0; i < pieces.size(); i++) {
2119 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2120 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2123 checkName(firstCol);
2124 checkName(secondCol);
2125 //parse names into vector
2126 vector<string> theseNames;
2127 splitAtComma(secondCol, theseNames);
2128 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
2129 nameCount[firstCol] = theseNames.size();
2137 vector<string> pieces = splitWhiteSpace(rest);
2139 for (int i = 0; i < pieces.size(); i++) {
2140 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2141 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2144 checkName(firstCol);
2145 checkName(secondCol);
2146 //parse names into vector
2147 vector<string> theseNames;
2148 splitAtComma(secondCol, theseNames);
2149 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
2150 nameCount[firstCol] = theseNames.size();
2156 return nameMap.size();
2159 catch(exception& e) {
2160 errorOut(e, "MothurOut", "readNames");
2164 /**********************************************************************************************************************/
2165 int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
2169 openInputFile(namefile, in);
2173 bool pairDone = false;
2174 bool columnOne = true;
2175 string firstCol, secondCol;
2178 if (control_pressed) { break; }
2180 in.read(buffer, 4096);
2181 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2183 for (int i = 0; i < pieces.size(); i++) {
2184 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2185 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2188 checkName(firstCol);
2189 checkName(secondCol);
2190 nameMap[firstCol] = secondCol; pairDone = false; }
2196 vector<string> pieces = splitWhiteSpace(rest);
2198 for (int i = 0; i < pieces.size(); i++) {
2199 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2200 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2203 checkName(firstCol);
2204 checkName(secondCol);
2205 nameMap[firstCol] = secondCol; pairDone = false; }
2209 return nameMap.size();
2212 catch(exception& e) {
2213 errorOut(e, "MothurOut", "readNames");
2217 /**********************************************************************************************************************/
2218 int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap) {
2222 openInputFile(namefile, in);
2226 bool pairDone = false;
2227 bool columnOne = true;
2228 string firstCol, secondCol;
2231 if (control_pressed) { break; }
2233 in.read(buffer, 4096);
2234 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2236 for (int i = 0; i < pieces.size(); i++) {
2237 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2238 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2241 checkName(firstCol);
2242 checkName(secondCol);
2243 vector<string> temp;
2244 splitAtComma(secondCol, temp);
2245 nameMap[firstCol] = temp;
2253 vector<string> pieces = splitWhiteSpace(rest);
2255 for (int i = 0; i < pieces.size(); i++) {
2256 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2257 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2260 checkName(firstCol);
2261 checkName(secondCol);
2262 vector<string> temp;
2263 splitAtComma(secondCol, temp);
2264 nameMap[firstCol] = temp;
2270 return nameMap.size();
2272 catch(exception& e) {
2273 errorOut(e, "MothurOut", "readNames");
2277 /**********************************************************************************************************************/
2278 map<string, int> MothurOut::readNames(string namefile) {
2280 map<string, int> nameMap;
2284 openInputFile(namefile, in);
2288 bool pairDone = false;
2289 bool columnOne = true;
2290 string firstCol, secondCol;
2293 if (control_pressed) { break; }
2295 in.read(buffer, 4096);
2296 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2298 for (int i = 0; i < pieces.size(); i++) {
2299 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2300 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2303 checkName(firstCol);
2304 checkName(secondCol);
2305 int num = getNumNames(secondCol);
2306 nameMap[firstCol] = num;
2314 vector<string> pieces = splitWhiteSpace(rest);
2315 for (int i = 0; i < pieces.size(); i++) {
2316 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2317 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2320 checkName(firstCol);
2321 checkName(secondCol);
2322 int num = getNumNames(secondCol);
2323 nameMap[firstCol] = num;
2332 catch(exception& e) {
2333 errorOut(e, "MothurOut", "readNames");
2337 /**********************************************************************************************************************/
2338 map<string, int> MothurOut::readNames(string namefile, unsigned long int& numSeqs) {
2340 map<string, int> nameMap;
2345 openInputFile(namefile, in);
2349 bool pairDone = false;
2350 bool columnOne = true;
2351 string firstCol, secondCol;
2354 if (control_pressed) { break; }
2356 in.read(buffer, 4096);
2357 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2359 for (int i = 0; i < pieces.size(); i++) {
2360 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2361 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2364 checkName(firstCol);
2365 checkName(secondCol);
2366 int num = getNumNames(secondCol);
2367 nameMap[firstCol] = num;
2376 vector<string> pieces = splitWhiteSpace(rest);
2377 for (int i = 0; i < pieces.size(); i++) {
2378 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2379 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2382 checkName(firstCol);
2383 checkName(secondCol);
2384 int num = getNumNames(secondCol);
2385 nameMap[firstCol] = num;
2395 catch(exception& e) {
2396 errorOut(e, "MothurOut", "readNames");
2400 /************************************************************/
2401 int MothurOut::checkName(string& name) {
2404 for (int i = 0; i < name.length(); i++) {
2405 if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; }
2410 catch(exception& e) {
2411 errorOut(e, "MothurOut", "checkName");
2415 /**********************************************************************************************************************/
2416 int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, map<string, string>& fastamap) {
2422 openInputFile(namefile, in);
2426 bool pairDone = false;
2427 bool columnOne = true;
2428 string firstCol, secondCol;
2431 if (control_pressed) { break; }
2433 in.read(buffer, 4096);
2434 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2436 for (int i = 0; i < pieces.size(); i++) {
2437 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2438 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2441 checkName(firstCol);
2442 checkName(secondCol);
2443 int num = getNumNames(secondCol);
2445 map<string, string>::iterator it = fastamap.find(firstCol);
2446 if (it == fastamap.end()) {
2448 mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
2450 seqPriorityNode temp(num, it->second, firstCol);
2451 nameVector.push_back(temp);
2461 vector<string> pieces = splitWhiteSpace(rest);
2463 for (int i = 0; i < pieces.size(); i++) {
2464 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2465 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2468 checkName(firstCol);
2469 checkName(secondCol);
2470 int num = getNumNames(secondCol);
2472 map<string, string>::iterator it = fastamap.find(firstCol);
2473 if (it == fastamap.end()) {
2475 mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
2477 seqPriorityNode temp(num, it->second, firstCol);
2478 nameVector.push_back(temp);
2487 catch(exception& e) {
2488 errorOut(e, "MothurOut", "readNames");
2492 //**********************************************************************************************************************
2493 set<string> MothurOut::readAccnos(string accnosfile){
2497 openInputFile(accnosfile, in);
2504 if (control_pressed) { break; }
2506 in.read(buffer, 4096);
2507 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2509 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]);
2510 names.insert(pieces[i]);
2516 vector<string> pieces = splitWhiteSpace(rest);
2517 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); }
2521 catch(exception& e) {
2522 errorOut(e, "MothurOut", "readAccnos");
2526 //**********************************************************************************************************************
2527 int MothurOut::readAccnos(string accnosfile, vector<string>& names){
2531 openInputFile(accnosfile, in);
2538 if (control_pressed) { break; }
2540 in.read(buffer, 4096);
2541 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2543 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
2548 vector<string> pieces = splitWhiteSpace(rest);
2549 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
2554 catch(exception& e) {
2555 errorOut(e, "MothurOut", "readAccnos");
2559 /***********************************************************************/
2561 int MothurOut::getNumNames(string names){
2567 for(int i=0;i<names.size();i++){
2568 if(names[i] == ','){
2576 catch(exception& e) {
2577 errorOut(e, "MothurOut", "getNumNames");
2581 /***********************************************************************/
2583 int MothurOut::getNumChar(string line, char c){
2588 for(int i=0;i<line.size();i++){
2597 catch(exception& e) {
2598 errorOut(e, "MothurOut", "getNumChar");
2602 //**********************************************************************************************************************
2603 bool MothurOut::isSubset(vector<string> bigset, vector<string> subset) {
2607 if (subset.size() > bigset.size()) { return false; }
2609 //check if each guy in suset is also in bigset
2610 for (int i = 0; i < subset.size(); i++) {
2612 for (int j = 0; j < bigset.size(); j++) {
2613 if (subset[i] == bigset[j]) { match = true; break; }
2616 //you have a guy in subset that had no match in bigset
2617 if (match == false) { return false; }
2623 catch(exception& e) {
2624 errorOut(e, "MothurOut", "isSubset");
2628 /***********************************************************************/
2629 int MothurOut::mothurRemove(string filename){
2631 filename = getFullPathName(filename);
2632 int error = remove(filename.c_str());
2634 // if (errno != ENOENT) { //ENOENT == file does not exist
2635 // string message = "Error deleting file " + filename;
2636 // perror(message.c_str());
2641 catch(exception& e) {
2642 errorOut(e, "MothurOut", "mothurRemove");
2646 /***********************************************************************/
2647 bool MothurOut::mothurConvert(string item, int& num){
2651 if (isNumeric1(item)) {
2656 mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine();
2657 commandInputsConvertError = true;
2662 catch(exception& e) {
2663 errorOut(e, "MothurOut", "mothurConvert");
2667 /***********************************************************************/
2668 bool MothurOut::mothurConvert(string item, intDist& num){
2672 if (isNumeric1(item)) {
2677 mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine();
2678 commandInputsConvertError = true;
2683 catch(exception& e) {
2684 errorOut(e, "MothurOut", "mothurConvert");
2689 /***********************************************************************/
2690 bool MothurOut::isNumeric1(string stringToCheck){
2692 bool numeric = false;
2694 if(stringToCheck.find_first_not_of("0123456789.-") == string::npos) { numeric = true; }
2698 catch(exception& e) {
2699 errorOut(e, "MothurOut", "isNumeric1");
2704 /***********************************************************************/
2705 bool MothurOut::mothurConvert(string item, float& num){
2709 if (isNumeric1(item)) {
2714 mothurOut("[ERROR]: cannot convert " + item + " to a float."); mothurOutEndLine();
2715 commandInputsConvertError = true;
2720 catch(exception& e) {
2721 errorOut(e, "MothurOut", "mothurConvert");
2725 /***********************************************************************/
2726 bool MothurOut::mothurConvert(string item, double& num){
2730 if (isNumeric1(item)) {
2735 mothurOut("[ERROR]: cannot convert " + item + " to a double."); mothurOutEndLine();
2736 commandInputsConvertError = true;
2741 catch(exception& e) {
2742 errorOut(e, "MothurOut", "mothurConvert");
2746 /**************************************************************************************************/
2748 vector<vector<double> > MothurOut::binomial(int maxOrder){
2750 vector<vector<double> > binomial(maxOrder+1);
2752 for(int i=0;i<=maxOrder;i++){
2753 binomial[i].resize(maxOrder+1);
2762 for(int i=2;i<=maxOrder;i++){
2766 for(int i=2;i<=maxOrder;i++){
2767 for(int j=1;j<=maxOrder;j++){
2768 if(i==j){ binomial[i][j]=1; }
2769 if(j>i) { binomial[i][j]=0; }
2770 else { binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j]; }
2777 catch(exception& e) {
2778 errorOut(e, "MothurOut", "binomial");
2782 /**************************************************************************************************/
2783 unsigned int MothurOut::fromBase36(string base36){
2785 unsigned int num = 0;
2787 map<char, int> converts;
2852 while (i < base36.length()) {
2854 num = 36 * num + converts[c];
2861 catch(exception& e) {
2862 errorOut(e, "MothurOut", "fromBase36");
2866 /***********************************************************************/
2868 int MothurOut::factorial(int num){
2872 for (int i = 1; i <= num; i++) {
2878 catch(exception& e) {
2879 errorOut(e, "MothurOut", "factorial");
2883 /***********************************************************************/
2885 int MothurOut::getNumSeqs(ifstream& file){
2887 int numSeqs = count(istreambuf_iterator<char>(file),istreambuf_iterator<char>(), '>');
2891 catch(exception& e) {
2892 errorOut(e, "MothurOut", "getNumSeqs");
2896 /***********************************************************************/
2897 void MothurOut::getNumSeqs(ifstream& file, int& numSeqs){
2902 input = getline(file);
2903 if (input.length() != 0) {
2904 if(input[0] == '>'){ numSeqs++; }
2908 catch(exception& e) {
2909 errorOut(e, "MothurOut", "getNumSeqs");
2913 /***********************************************************************/
2915 //This function parses the estimator options and puts them in a vector
2916 void MothurOut::splitAtChar(string& estim, vector<string>& container, char symbol) {
2919 if (symbol == '-') { splitAtDash(estim, container); return; }
2921 string individual = "";
2922 int estimLength = estim.size();
2923 for(int i=0;i<estimLength;i++){
2924 if(estim[i] == symbol){
2925 container.push_back(individual);
2929 individual += estim[i];
2932 container.push_back(individual);
2935 catch(exception& e) {
2936 errorOut(e, "MothurOut", "splitAtChar");
2941 /***********************************************************************/
2943 //This function parses the estimator options and puts them in a vector
2944 void MothurOut::splitAtDash(string& estim, vector<string>& container) {
2946 string individual = "";
2947 int estimLength = estim.size();
2948 bool prevEscape = false;
2949 /*for(int i=0;i<estimLength;i++){
2951 individual += estim[i];
2955 if(estim[i] == '\\'){
2958 else if(estim[i] == '-'){
2959 container.push_back(individual);
2964 individual += estim[i];
2971 for(int i=0;i<estimLength;i++){
2972 if(estim[i] == '-'){
2973 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
2975 container.push_back(individual);
2978 }else if(estim[i] == '\\'){
2979 if (i < estimLength-1) {
2980 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
2981 else { individual += estim[i]; prevEscape = false; } //if no, add in
2982 }else { individual += estim[i]; }
2984 individual += estim[i];
2990 container.push_back(individual);
2992 catch(exception& e) {
2993 errorOut(e, "MothurOut", "splitAtDash");
2998 /***********************************************************************/
2999 //This function parses the label options and puts them in a set
3000 void MothurOut::splitAtDash(string& estim, set<string>& container) {
3002 string individual = "";
3003 int estimLength = estim.size();
3004 bool prevEscape = false;
3006 for(int i=0;i<estimLength;i++){
3008 individual += estim[i];
3012 if(estim[i] == '\\'){
3015 else if(estim[i] == '-'){
3016 container.insert(individual);
3021 individual += estim[i];
3028 for(int i=0;i<estimLength;i++){
3029 if(estim[i] == '-'){
3030 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
3032 container.insert(individual);
3035 }else if(estim[i] == '\\'){
3036 if (i < estimLength-1) {
3037 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
3038 else { individual += estim[i]; prevEscape = false; } //if no, add in
3039 }else { individual += estim[i]; }
3041 individual += estim[i];
3044 container.insert(individual);
3047 catch(exception& e) {
3048 errorOut(e, "MothurOut", "splitAtDash");
3052 /***********************************************************************/
3053 //This function parses the line options and puts them in a set
3054 void MothurOut::splitAtDash(string& estim, set<int>& container) {
3056 string individual = "";
3058 int estimLength = estim.size();
3059 bool prevEscape = false;
3061 for(int i=0;i<estimLength;i++){
3063 individual += estim[i];
3067 if(estim[i] == '\\'){
3070 else if(estim[i] == '-'){
3071 convert(individual, lineNum); //convert the string to int
3072 container.insert(lineNum);
3077 individual += estim[i];
3083 for(int i=0;i<estimLength;i++){
3084 if(estim[i] == '-'){
3085 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
3087 convert(individual, lineNum); //convert the string to int
3088 container.insert(lineNum);
3091 }else if(estim[i] == '\\'){
3092 if (i < estimLength-1) {
3093 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
3094 else { individual += estim[i]; prevEscape = false; } //if no, add in
3095 }else { individual += estim[i]; }
3097 individual += estim[i];
3101 convert(individual, lineNum); //convert the string to int
3102 container.insert(lineNum);
3104 catch(exception& e) {
3105 errorOut(e, "MothurOut", "splitAtDash");
3110 /***********************************************************************/
3111 string MothurOut::makeList(vector<string>& names) {
3115 if (names.size() == 0) { return list; }
3117 for (int i = 0; i < names.size()-1; i++) { list += names[i] + ","; }
3120 list += names[names.size()-1];
3124 catch(exception& e) {
3125 errorOut(e, "MothurOut", "makeList");
3130 /***********************************************************************/
3131 //This function parses the a string and puts peices in a vector
3132 void MothurOut::splitAtComma(string& estim, vector<string>& container) {
3134 string individual = "";
3135 int estimLength = estim.size();
3136 for(int i=0;i<estimLength;i++){
3137 if(estim[i] == ','){
3138 container.push_back(individual);
3142 individual += estim[i];
3145 container.push_back(individual);
3150 // string individual;
3152 // while (estim.find_first_of(',') != -1) {
3153 // individual = estim.substr(0,estim.find_first_of(','));
3154 // if ((estim.find_first_of(',')+1) <= estim.length()) { //checks to make sure you don't have comma at end of string
3155 // estim = estim.substr(estim.find_first_of(',')+1, estim.length());
3156 // container.push_back(individual);
3160 // container.push_back(estim);
3162 catch(exception& e) {
3163 errorOut(e, "MothurOut", "splitAtComma");
3167 /***********************************************************************/
3168 //This function splits up the various option parameters
3169 void MothurOut::splitAtChar(string& prefix, string& suffix, char c){
3171 prefix = suffix.substr(0,suffix.find_first_of(c));
3172 if ((suffix.find_first_of(c)+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
3173 suffix = suffix.substr(suffix.find_first_of(c)+1, suffix.length());
3175 while(suffix.at(0) == ' ')
3176 suffix = suffix.substr(1, suffix.length());
3177 }else { suffix = ""; }
3180 catch(exception& e) {
3181 errorOut(e, "MothurOut", "splitAtChar");
3186 /***********************************************************************/
3188 //This function splits up the various option parameters
3189 void MothurOut::splitAtComma(string& prefix, string& suffix){
3191 prefix = suffix.substr(0,suffix.find_first_of(','));
3192 if ((suffix.find_first_of(',')+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
3193 suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
3195 while(suffix.at(0) == ' ')
3196 suffix = suffix.substr(1, suffix.length());
3197 }else { suffix = ""; }
3200 catch(exception& e) {
3201 errorOut(e, "MothurOut", "splitAtComma");
3205 /***********************************************************************/
3207 //This function separates the key value from the option value i.e. dist=96_...
3208 void MothurOut::splitAtEquals(string& key, string& value){
3210 if(value.find_first_of('=') != -1){
3211 key = value.substr(0,value.find_first_of('='));
3212 if ((value.find_first_of('=')+1) <= value.length()) {
3213 value = value.substr(value.find_first_of('=')+1, value.length());
3220 catch(exception& e) {
3221 errorOut(e, "MothurOut", "splitAtEquals");
3226 /**************************************************************************************************/
3228 bool MothurOut::inUsersGroups(string groupname, vector<string> Groups) {
3230 for (int i = 0; i < Groups.size(); i++) {
3231 if (groupname == Groups[i]) { return true; }
3235 catch(exception& e) {
3236 errorOut(e, "MothurOut", "inUsersGroups");
3240 /**************************************************************************************************/
3242 bool MothurOut::inUsersGroups(vector<int> set, vector< vector<int> > sets) {
3244 for (int i = 0; i < sets.size(); i++) {
3245 if (set == sets[i]) { return true; }
3249 catch(exception& e) {
3250 errorOut(e, "MothurOut", "inUsersGroups");
3254 /**************************************************************************************************/
3256 bool MothurOut::inUsersGroups(int groupname, vector<int> Groups) {
3258 for (int i = 0; i < Groups.size(); i++) {
3259 if (groupname == Groups[i]) { return true; }
3263 catch(exception& e) {
3264 errorOut(e, "MothurOut", "inUsersGroups");
3269 /**************************************************************************************************/
3270 //returns true if any of the strings in first vector are in second vector
3271 bool MothurOut::inUsersGroups(vector<string> groupnames, vector<string> Groups) {
3274 for (int i = 0; i < groupnames.size(); i++) {
3275 if (inUsersGroups(groupnames[i], Groups)) { return true; }
3279 catch(exception& e) {
3280 errorOut(e, "MothurOut", "inUsersGroups");
3284 /***********************************************************************/
3285 //this function determines if the user has given us labels that are smaller than the given label.
3286 //if so then it returns true so that the calling function can run the previous valid distance.
3287 //it's a "smart" distance function. It also checks for invalid labels.
3288 bool MothurOut::anyLabelsToProcess(string label, set<string>& userLabels, string errorOff) {
3291 set<string>::iterator it;
3292 vector<float> orderFloat;
3293 map<string, float> userMap; //the conversion process removes trailing 0's which we need to put back
3294 map<string, float>::iterator it2;
3296 bool smaller = false;
3298 //unique is the smallest line
3299 if (label == "unique") { return false; }
3301 if (convertTestFloat(label, labelFloat)) {
3302 convert(label, labelFloat);
3303 }else { //cant convert
3308 //go through users set and make them floats
3309 for(it = userLabels.begin(); it != userLabels.end();) {
3312 if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){
3314 orderFloat.push_back(temp);
3315 userMap[*it] = temp;
3317 }else if (*it == "unique") {
3318 orderFloat.push_back(-1.0);
3319 userMap["unique"] = -1.0;
3322 if (errorOff == "") { mothurOut(*it + " is not a valid label."); mothurOutEndLine(); }
3323 userLabels.erase(it++);
3328 sort(orderFloat.begin(), orderFloat.end());
3330 /*************************************************/
3331 //is this label bigger than any of the users labels
3332 /*************************************************/
3334 //loop through order until you find a label greater than label
3335 for (int i = 0; i < orderFloat.size(); i++) {
3336 if (orderFloat[i] < labelFloat) {
3338 if (orderFloat[i] == -1) {
3339 if (errorOff == "") { mothurOut("Your file does not include the label unique."); mothurOutEndLine(); }
3340 userLabels.erase("unique");
3343 if (errorOff == "") { mothurOut("Your file does not include the label "); mothurOutEndLine(); }
3345 for (it2 = userMap.begin(); it2!= userMap.end(); it2++) {
3346 if (it2->second == orderFloat[i]) {
3348 //remove small labels
3349 userLabels.erase(s);
3353 if (errorOff == "") {mothurOut( s + ". I will use the next smallest distance. "); mothurOutEndLine(); }
3355 //since they are sorted once you find a bigger one stop looking
3362 catch(exception& e) {
3363 errorOut(e, "MothurOut", "anyLabelsToProcess");
3368 /**************************************************************************************************/
3369 bool MothurOut::checkReleaseVersion(ifstream& file, string version) {
3374 string line = getline(file);
3376 //before we added this check
3377 if (line[0] != '#') { good = false; }
3380 line = line.substr(1);
3382 vector<string> versionVector;
3383 splitAtChar(version, versionVector, '.');
3385 //check file version
3386 vector<string> linesVector;
3387 splitAtChar(line, linesVector, '.');
3389 if (versionVector.size() != linesVector.size()) { good = false; }
3391 for (int j = 0; j < versionVector.size(); j++) {
3393 convert(versionVector[j], num1);
3394 convert(linesVector[j], num2);
3396 //if mothurs version is newer than this files version, then we want to remake it
3397 if (num1 > num2) { good = false; break; }
3403 if (!good) { file.close(); }
3404 else { file.seekg(0); }
3408 catch(exception& e) {
3409 errorOut(e, "MothurOut", "checkReleaseVersion");
3413 /**************************************************************************************************/
3414 vector<double> MothurOut::getAverages(vector< vector<double> >& dists) {
3416 vector<double> averages; //averages.resize(numComp, 0.0);
3417 for (int i = 0; i < dists[0].size(); i++) { averages.push_back(0.0); }
3419 for (int thisIter = 0; thisIter < dists.size(); thisIter++) {
3420 for (int i = 0; i < dists[thisIter].size(); i++) {
3421 averages[i] += dists[thisIter][i];
3426 for (int i = 0; i < averages.size(); i++) { averages[i] /= (double) dists.size(); }
3430 catch(exception& e) {
3431 errorOut(e, "MothurOut", "getAverages");
3435 /**************************************************************************************************/
3436 double MothurOut::getAverage(vector<double> dists) {
3440 for (int i = 0; i < dists.size(); i++) {
3441 average += dists[i];
3445 average /= (double) dists.size();
3449 catch(exception& e) {
3450 errorOut(e, "MothurOut", "getAverage");
3455 /**************************************************************************************************/
3456 vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists) {
3459 vector<double> averages = getAverages(dists);
3461 //find standard deviation
3462 vector<double> stdDev; //stdDev.resize(numComp, 0.0);
3463 for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
3465 for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3466 for (int j = 0; j < dists[thisIter].size(); j++) {
3467 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
3470 for (int i = 0; i < stdDev.size(); i++) {
3471 stdDev[i] /= (double) dists.size();
3472 stdDev[i] = sqrt(stdDev[i]);
3477 catch(exception& e) {
3478 errorOut(e, "MothurOut", "getAverages");
3482 /**************************************************************************************************/
3483 vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists, vector<double>& averages) {
3485 //find standard deviation
3486 vector<double> stdDev; //stdDev.resize(numComp, 0.0);
3487 for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
3489 for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3490 for (int j = 0; j < dists[thisIter].size(); j++) {
3491 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
3494 for (int i = 0; i < stdDev.size(); i++) {
3495 stdDev[i] /= (double) dists.size();
3496 stdDev[i] = sqrt(stdDev[i]);
3501 catch(exception& e) {
3502 errorOut(e, "MothurOut", "getAverages");
3506 /**************************************************************************************************/
3507 vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals, string mode) {
3510 vector< vector<seqDist> > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
3511 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3512 //calcAverages[i].resize(calcDistsTotals[0][i].size());
3513 vector<seqDist> temp;
3514 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3516 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3517 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3518 tempDist.dist = 0.0;
3519 temp.push_back(tempDist);
3521 calcAverages.push_back(temp);
3524 if (mode == "average") {
3525 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
3526 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
3527 for (int j = 0; j < calcAverages[i].size(); j++) {
3528 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
3533 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
3534 for (int j = 0; j < calcAverages[i].size(); j++) {
3535 calcAverages[i][j].dist /= (float) calcDistsTotals.size();
3538 }else { //find median
3539 for (int i = 0; i < calcAverages.size(); i++) { //for each calc
3540 for (int j = 0; j < calcAverages[i].size(); j++) { //for each comparison
3541 vector<double> dists;
3542 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //for each subsample
3543 dists.push_back(calcDistsTotals[thisIter][i][j].dist);
3545 sort(dists.begin(), dists.end());
3546 calcAverages[i][j].dist = dists[(calcDistsTotals.size()/2)];
3551 return calcAverages;
3553 catch(exception& e) {
3554 errorOut(e, "MothurOut", "getAverages");
3558 /**************************************************************************************************/
3559 vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals) {
3562 vector< vector<seqDist> > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
3563 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3564 //calcAverages[i].resize(calcDistsTotals[0][i].size());
3565 vector<seqDist> temp;
3566 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3568 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3569 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3570 tempDist.dist = 0.0;
3571 temp.push_back(tempDist);
3573 calcAverages.push_back(temp);
3577 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
3578 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
3579 for (int j = 0; j < calcAverages[i].size(); j++) {
3580 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
3585 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
3586 for (int j = 0; j < calcAverages[i].size(); j++) {
3587 calcAverages[i][j].dist /= (float) calcDistsTotals.size();
3591 return calcAverages;
3593 catch(exception& e) {
3594 errorOut(e, "MothurOut", "getAverages");
3598 /**************************************************************************************************/
3599 vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals) {
3602 vector< vector<seqDist> > calcAverages = getAverages(calcDistsTotals);
3604 //find standard deviation
3605 vector< vector<seqDist> > stdDev;
3606 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3607 vector<seqDist> temp;
3608 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3610 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3611 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3612 tempDist.dist = 0.0;
3613 temp.push_back(tempDist);
3615 stdDev.push_back(temp);
3618 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3619 for (int i = 0; i < stdDev.size(); i++) {
3620 for (int j = 0; j < stdDev[i].size(); j++) {
3621 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
3626 for (int i = 0; i < stdDev.size(); i++) { //finds average.
3627 for (int j = 0; j < stdDev[i].size(); j++) {
3628 stdDev[i][j].dist /= (float) calcDistsTotals.size();
3629 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
3635 catch(exception& e) {
3636 errorOut(e, "MothurOut", "getAverages");
3640 /**************************************************************************************************/
3641 vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals, vector< vector<seqDist> >& calcAverages) {
3643 //find standard deviation
3644 vector< vector<seqDist> > stdDev;
3645 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3646 vector<seqDist> temp;
3647 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3649 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3650 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3651 tempDist.dist = 0.0;
3652 temp.push_back(tempDist);
3654 stdDev.push_back(temp);
3657 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3658 for (int i = 0; i < stdDev.size(); i++) {
3659 for (int j = 0; j < stdDev[i].size(); j++) {
3660 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
3665 for (int i = 0; i < stdDev.size(); i++) { //finds average.
3666 for (int j = 0; j < stdDev[i].size(); j++) {
3667 stdDev[i][j].dist /= (float) calcDistsTotals.size();
3668 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
3674 catch(exception& e) {
3675 errorOut(e, "MothurOut", "getAverages");
3680 /**************************************************************************************************/
3681 bool MothurOut::isContainingOnlyDigits(string input) {
3684 //are you a digit in ascii code
3685 for (int i = 0;i < input.length(); i++){
3686 if( input[i]>47 && input[i]<58){}
3687 else { return false; }
3692 catch(exception& e) {
3693 errorOut(e, "MothurOut", "isContainingOnlyDigits");
3697 /**************************************************************************************************/
3698 int MothurOut::removeConfidences(string& tax) {
3704 while (tax.find_first_of(';') != -1) {
3706 if (control_pressed) { return 0; }
3709 taxon = tax.substr(0,tax.find_first_of(';'));
3711 int pos = taxon.find_last_of('(');
3714 int pos2 = taxon.find_last_of(')');
3716 string confidenceScore = taxon.substr(pos+1, (pos2-(pos+1)));
3717 if (isNumeric1(confidenceScore)) {
3718 taxon = taxon.substr(0, pos); //rip off confidence
3724 tax = tax.substr(tax.find_first_of(';')+1, tax.length());
3732 catch(exception& e) {
3733 errorOut(e, "MothurOut", "removeConfidences");
3737 /**************************************************************************************************/
3738 string MothurOut::removeQuotes(string tax) {
3744 for (int i = 0; i < tax.length(); i++) {
3746 if (control_pressed) { return newTax; }
3748 if ((tax[i] != '\'') && (tax[i] != '\"')) { newTax += tax[i]; }
3754 catch(exception& e) {
3755 errorOut(e, "MothurOut", "removeQuotes");
3759 /**************************************************************************************************/
3760 // function for calculating standard deviation
3761 double MothurOut::getStandardDeviation(vector<int>& featureVector){
3765 for (int i = 0; i < featureVector.size(); i++) { average += featureVector[i]; }
3766 average /= (double) featureVector.size();
3768 //find standard deviation
3770 for (int i = 0; i < featureVector.size(); i++) { //compute the difference of each dist from the mean, and square the result of each
3771 stdDev += ((featureVector[i] - average) * (featureVector[i] - average));
3774 stdDev /= (double) featureVector.size();
3775 stdDev = sqrt(stdDev);
3779 catch(exception& e) {
3780 errorOut(e, "MothurOut", "getStandardDeviation");
3784 /**************************************************************************************************/