5 * Created by westcott on 2/25/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "mothurout.h"
13 /******************************************************/
14 MothurOut* MothurOut::getInstance() {
15 if( _uniqueInstance == 0) {
16 _uniqueInstance = new MothurOut();
18 return _uniqueInstance;
20 /*********************************************************************************************/
21 set<string> MothurOut::getCurrentTypes() {
25 types.insert("fasta");
26 types.insert("accnos");
27 types.insert("column");
28 types.insert("design");
29 types.insert("group");
32 types.insert("oligos");
33 types.insert("order");
34 types.insert("ordergroup");
35 types.insert("phylip");
36 types.insert("qfile");
37 types.insert("relabund");
38 types.insert("sabund");
39 types.insert("rabund");
41 types.insert("shared");
42 types.insert("taxonomy");
46 types.insert("count");
47 types.insert("processors");
52 errorOut(e, "MothurOut", "getCurrentTypes");
56 /*********************************************************************************************/
57 void MothurOut::printCurrentFiles() {
61 if (accnosfile != "") { mothurOut("accnos=" + accnosfile); mothurOutEndLine(); }
62 if (columnfile != "") { mothurOut("column=" + columnfile); mothurOutEndLine(); }
63 if (designfile != "") { mothurOut("design=" + designfile); mothurOutEndLine(); }
64 if (fastafile != "") { mothurOut("fasta=" + fastafile); mothurOutEndLine(); }
65 if (groupfile != "") { mothurOut("group=" + groupfile); mothurOutEndLine(); }
66 if (listfile != "") { mothurOut("list=" + listfile); mothurOutEndLine(); }
67 if (namefile != "") { mothurOut("name=" + namefile); mothurOutEndLine(); }
68 if (oligosfile != "") { mothurOut("oligos=" + oligosfile); mothurOutEndLine(); }
69 if (orderfile != "") { mothurOut("order=" + orderfile); mothurOutEndLine(); }
70 if (ordergroupfile != "") { mothurOut("ordergroup=" + ordergroupfile); mothurOutEndLine(); }
71 if (phylipfile != "") { mothurOut("phylip=" + phylipfile); mothurOutEndLine(); }
72 if (qualfile != "") { mothurOut("qfile=" + qualfile); mothurOutEndLine(); }
73 if (rabundfile != "") { mothurOut("rabund=" + rabundfile); mothurOutEndLine(); }
74 if (relabundfile != "") { mothurOut("relabund=" + relabundfile); mothurOutEndLine(); }
75 if (sabundfile != "") { mothurOut("sabund=" + sabundfile); mothurOutEndLine(); }
76 if (sfffile != "") { mothurOut("sff=" + sfffile); mothurOutEndLine(); }
77 if (sharedfile != "") { mothurOut("shared=" + sharedfile); mothurOutEndLine(); }
78 if (taxonomyfile != "") { mothurOut("taxonomy=" + taxonomyfile); mothurOutEndLine(); }
79 if (treefile != "") { mothurOut("tree=" + treefile); mothurOutEndLine(); }
80 if (flowfile != "") { mothurOut("flow=" + flowfile); mothurOutEndLine(); }
81 if (biomfile != "") { mothurOut("biom=" + biomfile); mothurOutEndLine(); }
82 if (counttablefile != "") { mothurOut("count=" + counttablefile); mothurOutEndLine(); }
83 if (processors != "1") { mothurOut("processors=" + processors); mothurOutEndLine(); }
87 errorOut(e, "MothurOut", "printCurrentFiles");
91 /*********************************************************************************************/
92 bool MothurOut::hasCurrentFiles() {
94 bool hasCurrent = false;
96 if (accnosfile != "") { return true; }
97 if (columnfile != "") { return true; }
98 if (designfile != "") { return true; }
99 if (fastafile != "") { return true; }
100 if (groupfile != "") { return true; }
101 if (listfile != "") { return true; }
102 if (namefile != "") { return true; }
103 if (oligosfile != "") { return true; }
104 if (orderfile != "") { return true; }
105 if (ordergroupfile != "") { return true; }
106 if (phylipfile != "") { return true; }
107 if (qualfile != "") { return true; }
108 if (rabundfile != "") { return true; }
109 if (relabundfile != "") { return true; }
110 if (sabundfile != "") { return true; }
111 if (sfffile != "") { return true; }
112 if (sharedfile != "") { return true; }
113 if (taxonomyfile != "") { return true; }
114 if (treefile != "") { return true; }
115 if (flowfile != "") { return true; }
116 if (biomfile != "") { return true; }
117 if (counttablefile != "") { return true; }
118 if (processors != "1") { return true; }
123 catch(exception& e) {
124 errorOut(e, "MothurOut", "hasCurrentFiles");
129 /*********************************************************************************************/
130 void MothurOut::clearCurrentFiles() {
156 catch(exception& e) {
157 errorOut(e, "MothurOut", "clearCurrentFiles");
161 /***********************************************************************/
162 string MothurOut::findProgramPath(string programName){
165 string envPath = getenv("PATH");
168 //delimiting path char
170 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
176 //break apart path variable by ':'
178 splitAtChar(envPath, dirs, delim);
180 if (debug) { mothurOut("[DEBUG]: dir's in path: \n"); }
182 //get path related to mothur
183 for (int i = 0; i < dirs.size(); i++) {
185 if (debug) { mothurOut("[DEBUG]: " + dirs[i] + "\n"); }
187 //to lower so we can find it
188 string tempLower = "";
189 for (int j = 0; j < dirs[i].length(); j++) { tempLower += tolower(dirs[i][j]); }
191 //is this mothurs path?
192 if (tempLower.find(programName) != -1) { pPath = dirs[i]; break; }
195 if (debug) { mothurOut("[DEBUG]: programPath = " + pPath + "\n"); }
198 //add programName so it looks like what argv would look like
199 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
200 pPath += "/" + programName;
202 pPath += "\\" + programName;
205 //okay programName is not in the path, so the folder programName is in must be in the path
206 //lets find out which one
208 //get path related to the program
209 for (int i = 0; i < dirs.size(); i++) {
211 if (debug) { mothurOut("[DEBUG]: looking in " + dirs[i] + " for " + programName + " \n"); }
213 //is this the programs path?
215 string tempIn = dirs[i];
216 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
217 tempIn += "/" + programName;
219 tempIn += "\\" + programName;
221 openInputFile(tempIn, in, "");
223 //if this file exists
224 if (in) { in.close(); pPath = tempIn; if (debug) { mothurOut("[DEBUG]: found it, programPath = " + pPath + "\n"); } break; }
231 catch(exception& e) {
232 errorOut(e, "MothurOut", "findProgramPath");
236 /*********************************************************************************************/
237 void MothurOut::setFileName(string filename) {
239 logFileName = filename;
243 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
245 if (pid == 0) { //only one process should output to screen
248 openOutputFile(filename, out);
254 catch(exception& e) {
255 errorOut(e, "MothurOut", "setFileName");
259 /*********************************************************************************************/
260 void MothurOut::setDefaultPath(string pathname) {
263 //add / to name if needed
264 string lastChar = pathname.substr(pathname.length()-1);
265 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
266 if (lastChar != "/") { pathname += "/"; }
268 if (lastChar != "\\") { pathname += "\\"; }
271 defaultPath = pathname;
274 catch(exception& e) {
275 errorOut(e, "MothurOut", "setDefaultPath");
279 /*********************************************************************************************/
280 void MothurOut::setOutputDir(string pathname) {
282 outputDir = pathname;
284 catch(exception& e) {
285 errorOut(e, "MothurOut", "setOutputDir");
289 /*********************************************************************************************/
290 void MothurOut::closeLog() {
295 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
297 if (pid == 0) { //only one process should output to screen
306 catch(exception& e) {
307 errorOut(e, "MothurOut", "closeLog");
312 /*********************************************************************************************/
313 MothurOut::~MothurOut() {
318 catch(exception& e) {
319 errorOut(e, "MothurOut", "MothurOut");
323 /*********************************************************************************************/
324 void MothurOut::mothurOut(string output) {
329 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
331 if (pid == 0) { //only one process should output to screen
341 catch(exception& e) {
342 errorOut(e, "MothurOut", "MothurOut");
346 /*********************************************************************************************/
347 void MothurOut::mothurOutEndLine() {
351 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
353 if (pid == 0) { //only one process should output to screen
363 catch(exception& e) {
364 errorOut(e, "MothurOut", "MothurOutEndLine");
368 /*********************************************************************************************/
369 void MothurOut::mothurOut(string output, ofstream& outputFile) {
374 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
376 if (pid == 0) { //only one process should output to screen
381 outputFile << output;
389 catch(exception& e) {
390 errorOut(e, "MothurOut", "MothurOut");
394 /*********************************************************************************************/
395 void MothurOut::mothurOutEndLine(ofstream& outputFile) {
399 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
401 if (pid == 0) { //only one process should output to screen
412 catch(exception& e) {
413 errorOut(e, "MothurOut", "MothurOutEndLine");
417 /*********************************************************************************************/
418 void MothurOut::mothurOutJustToLog(string output) {
422 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
424 if (pid == 0) { //only one process should output to screen
433 catch(exception& e) {
434 errorOut(e, "MothurOut", "MothurOutJustToLog");
438 /*********************************************************************************************/
439 void MothurOut::errorOut(exception& e, string object, string function) {
441 //mem_usage(vm, rss);
443 mothurOut("[ERROR]: ");
444 mothurOut(toString(e.what()));
445 mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
448 /*********************************************************************************************/
449 //The following was originally from http://stackoverflow.com/questions/669438/how-to-get-memory-usage-at-run-time-in-c
450 // process_mem_usage(double &, double &) - takes two doubles by reference,
451 // attempts to read the system-dependent data for a process' virtual memory
452 // size and resident set size, and return the results in KB.
454 // On failure, returns 0.0, 0.0
455 int MothurOut::mem_usage(double& vm_usage, double& resident_set) {
456 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
461 // 'file' stat seems to give the most reliable results
463 ifstream stat_stream("/proc/self/stat",ios_base::in);
465 // dummy vars for leading entries in stat that we don't care about
467 string pid, comm, state, ppid, pgrp, session, tty_nr;
468 string tpgid, flags, minflt, cminflt, majflt, cmajflt;
469 string utime, stime, cutime, cstime, priority, nice;
470 string O, itrealvalue, starttime;
472 // the two fields we want
477 stat_stream >> pid >> comm >> state >> ppid >> pgrp >> session >> tty_nr
478 >> tpgid >> flags >> minflt >> cminflt >> majflt >> cmajflt
479 >> utime >> stime >> cutime >> cstime >> priority >> nice
480 >> O >> itrealvalue >> starttime >> vsize >> rss; // don't care about the rest
482 long page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages
483 vm_usage = vsize / 1024.0;
484 resident_set = rss * page_size_kb;
486 mothurOut("Memory Usage: vm = " + toString(vm_usage) + " rss = " + toString(resident_set) + "\n");
490 /* //windows memory usage
491 // Get the list of process identifiers.
492 DWORD aProcesses[1024], cbNeeded, cProcesses;
494 if ( !EnumProcesses( aProcesses, sizeof(aProcesses), &cbNeeded ) ){ return 1; }
496 // Calculate how many process identifiers were returned.
497 cProcesses = cbNeeded / sizeof(DWORD);
499 // Print the memory usage for each process
500 for (int i = 0; i < cProcesses; i++ ) {
501 DWORD processID = aProcesses[i];
503 PROCESS_MEMORY_COUNTERS pmc;
505 HANDLE hProcess = OpenProcess((PROCESS_QUERY_INFORMATION | PROCESS_VM_READ), FALSE, processID);
507 // Print the process identifier.
508 printf( "\nProcess ID: %u\n", processID);
510 if (NULL != hProcess) {
512 if ( GetProcessMemoryInfo( hProcess, &pmc, sizeof(pmc)) ) {
513 printf( "\tPageFaultCount: 0x%08X\n", pmc.PageFaultCount );
514 printf( "\tPeakWorkingSetSize: 0x%08X\n", pmc.PeakWorkingSetSize );
515 printf( "\tWorkingSetSize: 0x%08X\n", pmc.WorkingSetSize );
516 printf( "\tQuotaPeakPagedPoolUsage: 0x%08X\n", pmc.QuotaPeakPagedPoolUsage );
517 printf( "\tQuotaPagedPoolUsage: 0x%08X\n", pmc.QuotaPagedPoolUsage );
518 printf( "\tQuotaPeakNonPagedPoolUsage: 0x%08X\n", pmc.QuotaPeakNonPagedPoolUsage );
519 printf( "\tQuotaNonPagedPoolUsage: 0x%08X\n", pmc.QuotaNonPagedPoolUsage );
520 printf( "\tPagefileUsage: 0x%08X\n", pmc.PagefileUsage );
521 printf( "\tPeakPagefileUsage: 0x%08X\n", pmc.PeakPagefileUsage );
523 CloseHandle(hProcess);
533 /***********************************************************************/
534 int MothurOut::openOutputFileAppend(string fileName, ofstream& fileHandle){
536 fileName = getFullPathName(fileName);
538 fileHandle.open(fileName.c_str(), ios::app);
540 mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
547 catch(exception& e) {
548 errorOut(e, "MothurOut", "openOutputFileAppend");
552 /***********************************************************************/
553 void MothurOut::gobble(istream& f){
557 while(isspace(d=f.get())) { ;}
558 if(!f.eof()) { f.putback(d); }
560 catch(exception& e) {
561 errorOut(e, "MothurOut", "gobble");
565 /***********************************************************************/
566 void MothurOut::gobble(istringstream& f){
569 while(isspace(d=f.get())) {;}
570 if(!f.eof()) { f.putback(d); }
572 catch(exception& e) {
573 errorOut(e, "MothurOut", "gobble");
578 /***********************************************************************/
580 string MothurOut::getline(istringstream& fileHandle) {
585 while (!fileHandle.eof()) {
587 char c = fileHandle.get();
589 //are you at the end of the line
590 if ((c == '\n') || (c == '\r') || (c == '\f')){ break; }
597 catch(exception& e) {
598 errorOut(e, "MothurOut", "getline");
602 /***********************************************************************/
604 string MothurOut::getline(ifstream& fileHandle) {
611 char c = fileHandle.get();
613 //are you at the end of the line
614 if ((c == '\n') || (c == '\r') || (c == '\f') || (c == EOF)){ break; }
621 catch(exception& e) {
622 errorOut(e, "MothurOut", "getline");
626 /***********************************************************************/
628 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
629 #ifdef USE_COMPRESSION
630 inline bool endsWith(string s, const char * suffix){
631 size_t suffixLength = strlen(suffix);
632 return s.size() >= suffixLength && s.substr(s.size() - suffixLength, suffixLength).compare(suffix) == 0;
637 string MothurOut::getRootName(string longName){
640 string rootName = longName;
642 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
643 #ifdef USE_COMPRESSION
644 if (endsWith(rootName, ".gz") || endsWith(rootName, ".bz2")) {
645 int pos = rootName.find_last_of('.');
646 rootName = rootName.substr(0, pos);
647 cerr << "shortening " << longName << " to " << rootName << "\n";
651 if(rootName.find_last_of(".") != rootName.npos){
652 int pos = rootName.find_last_of('.')+1;
653 rootName = rootName.substr(0, pos);
658 catch(exception& e) {
659 errorOut(e, "MothurOut", "getRootName");
663 /***********************************************************************/
665 string MothurOut::getSimpleName(string longName){
667 string simpleName = longName;
670 found=longName.find_last_of("/\\");
672 if(found != longName.npos){
673 simpleName = longName.substr(found+1);
678 catch(exception& e) {
679 errorOut(e, "MothurOut", "getSimpleName");
684 /***********************************************************************/
686 int MothurOut::getRandomIndex(int highest){
689 int random = (int) ((float)(highest+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
693 catch(exception& e) {
694 errorOut(e, "MothurOut", "getRandomIndex");
699 /**********************************************************************/
701 string MothurOut::getPathName(string longName){
703 string rootPathName = longName;
705 if(longName.find_last_of("/\\") != longName.npos){
706 int pos = longName.find_last_of("/\\")+1;
707 rootPathName = longName.substr(0, pos);
712 catch(exception& e) {
713 errorOut(e, "MothurOut", "getPathName");
718 /***********************************************************************/
720 bool MothurOut::dirCheck(string& dirName){
726 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
731 //add / to name if needed
732 string lastChar = dirName.substr(dirName.length()-1);
733 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
734 if (lastChar != "/") { dirName += "/"; }
736 if (lastChar != "\\") { dirName += "\\"; }
739 //test to make sure directory exists
740 dirName = getFullPathName(dirName);
741 string outTemp = dirName + tag + "temp";
743 out.open(outTemp.c_str(), ios::trunc);
745 mothurOut(dirName + " directory does not exist or is not writable."); mothurOutEndLine();
748 mothurRemove(outTemp);
754 catch(exception& e) {
755 errorOut(e, "MothurOut", "dirCheck");
760 /***********************************************************************/
762 string MothurOut::hasPath(string longName){
767 found=longName.find_last_of("~/\\");
769 if(found != longName.npos){
770 path = longName.substr(0, found+1);
775 catch(exception& e) {
776 errorOut(e, "MothurOut", "hasPath");
781 /***********************************************************************/
783 string MothurOut::getExtension(string longName){
785 string extension = "";
787 if(longName.find_last_of('.') != longName.npos){
788 int pos = longName.find_last_of('.');
789 extension = longName.substr(pos, longName.length());
794 catch(exception& e) {
795 errorOut(e, "MothurOut", "getExtension");
799 /***********************************************************************/
800 bool MothurOut::isBlank(string fileName){
803 fileName = getFullPathName(fileName);
806 fileHandle.open(fileName.c_str());
808 mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
811 //check for blank file
813 if (fileHandle.eof()) { fileHandle.close(); return true; }
818 catch(exception& e) {
819 errorOut(e, "MothurOut", "isBlank");
823 /***********************************************************************/
825 string MothurOut::getFullPathName(string fileName){
828 string path = hasPath(fileName);
832 if (path == "") { return fileName; } //its a simple name
833 else { //we need to complete the pathname
834 // ex. ../../../filename
835 // cwd = /user/work/desktop
838 //get current working directory
839 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
841 if (path.find("~") != -1) { //go to home directory
844 char *homepath = NULL;
845 homepath = getenv ("HOME");
846 if ( homepath != NULL) { homeDir = homepath; }
847 else { homeDir = ""; }
849 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
852 if (path.rfind("./") == string::npos) { return fileName; } //already complete name
853 else { newFileName = fileName.substr(fileName.rfind("./")+2); } //save the complete part of the name
855 //char* cwdpath = new char[1024];
857 //cwdpath=getcwd(cwdpath,size);
860 char *cwdpath = NULL;
861 cwdpath = getcwd(NULL, 0); // or _getcwd
862 if ( cwdpath != NULL) { cwd = cwdpath; }
868 if (cwd.length() > 0) { simpleCWD = cwd.substr(1); }
870 //break apart the current working directory
872 while (simpleCWD.find_first_of('/') != string::npos) {
873 string dir = simpleCWD.substr(0,simpleCWD.find_first_of('/'));
874 simpleCWD = simpleCWD.substr(simpleCWD.find_first_of('/')+1, simpleCWD.length());
877 //get last one // ex. ../../../filename = /user/work/desktop/filename
878 dirs.push_back(simpleCWD); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
881 int index = dirs.size()-1;
883 while((pos = path.rfind("./")) != string::npos) { //while you don't have a complete path
884 if (pos == 0) { break; //you are at the end
885 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
886 path = path.substr(0, pos-1);
888 if (index == 0) { break; }
889 }else if (path[(pos-1)] == '/') { //you want the current working dir ./
890 path = path.substr(0, pos);
891 }else if (pos == 1) { break; //you are at the end
892 }else { mothurOut("cannot resolve path for " + fileName + "\n"); return fileName; }
895 for (int i = index; i >= 0; i--) {
896 newFileName = dirs[i] + "/" + newFileName;
899 newFileName = "/" + newFileName;
903 if (path.find("~") != string::npos) { //go to home directory
904 string homeDir = getenv ("HOMEPATH");
905 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
908 if (path.rfind(".\\") == string::npos) { return fileName; } //already complete name
909 else { newFileName = fileName.substr(fileName.rfind(".\\")+2); } //save the complete part of the name
911 char *cwdpath = NULL;
912 cwdpath = getcwd(NULL, 0); // or _getcwd
913 if ( cwdpath != NULL) { cwd = cwdpath; }
916 //break apart the current working directory
918 while (cwd.find_first_of('\\') != -1) {
919 string dir = cwd.substr(0,cwd.find_first_of('\\'));
920 cwd = cwd.substr(cwd.find_first_of('\\')+1, cwd.length());
925 dirs.push_back(cwd); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
927 int index = dirs.size()-1;
929 while((pos = path.rfind(".\\")) != string::npos) { //while you don't have a complete path
930 if (pos == 0) { break; //you are at the end
931 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
932 path = path.substr(0, pos-1);
934 if (index == 0) { break; }
935 }else if (path[(pos-1)] == '\\') { //you want the current working dir ./
936 path = path.substr(0, pos);
937 }else if (pos == 1) { break; //you are at the end
938 }else { mothurOut("cannot resolve path for " + fileName + "\n"); return fileName; }
941 for (int i = index; i >= 0; i--) {
942 newFileName = dirs[i] + "\\\\" + newFileName;
951 catch(exception& e) {
952 errorOut(e, "MothurOut", "getFullPathName");
956 /***********************************************************************/
958 int MothurOut::openInputFile(string fileName, ifstream& fileHandle, string m){
961 string completeFileName = getFullPathName(fileName);
962 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
963 #ifdef USE_COMPRESSION
964 // check for gzipped or bzipped file
965 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
966 string tempName = string(tmpnam(0));
967 mkfifo(tempName.c_str(), 0666);
968 int fork_result = fork();
969 if (fork_result < 0) {
970 cerr << "Error forking.\n";
972 } else if (fork_result == 0) {
973 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
974 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
975 system(command.c_str());
976 cerr << "Done decompressing " << completeFileName << "\n";
977 mothurRemove(tempName);
980 cerr << "waiting on child process " << fork_result << "\n";
981 completeFileName = tempName;
986 fileHandle.open(completeFileName.c_str());
988 //mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
991 //check for blank file
996 catch(exception& e) {
997 errorOut(e, "MothurOut", "openInputFile - no Error");
1001 /***********************************************************************/
1003 int MothurOut::openInputFile(string fileName, ifstream& fileHandle){
1006 //get full path name
1007 string completeFileName = getFullPathName(fileName);
1008 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1009 #ifdef USE_COMPRESSION
1010 // check for gzipped or bzipped file
1011 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1012 string tempName = string(tmpnam(0));
1013 mkfifo(tempName.c_str(), 0666);
1014 int fork_result = fork();
1015 if (fork_result < 0) {
1016 cerr << "Error forking.\n";
1018 } else if (fork_result == 0) {
1019 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
1020 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1021 system(command.c_str());
1022 cerr << "Done decompressing " << completeFileName << "\n";
1023 mothurRemove(tempName);
1026 cerr << "waiting on child process " << fork_result << "\n";
1027 completeFileName = tempName;
1033 fileHandle.open(completeFileName.c_str());
1035 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1039 //check for blank file
1041 if (fileHandle.eof()) { mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct."); mothurOutEndLine(); }
1046 catch(exception& e) {
1047 errorOut(e, "MothurOut", "openInputFile");
1051 /***********************************************************************/
1053 int MothurOut::renameFile(string oldName, string newName){
1056 if (oldName == newName) { return 0; }
1059 int exist = openInputFile(newName, inTest, "");
1062 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1063 if (exist == 0) { //you could open it so you want to delete it
1064 string command = "rm " + newName;
1065 system(command.c_str());
1068 string command = "mv " + oldName + " " + newName;
1069 system(command.c_str());
1071 mothurRemove(newName);
1072 int renameOk = rename(oldName.c_str(), newName.c_str());
1077 catch(exception& e) {
1078 errorOut(e, "MothurOut", "renameFile");
1083 /***********************************************************************/
1085 int MothurOut::openOutputFile(string fileName, ofstream& fileHandle){
1088 string completeFileName = getFullPathName(fileName);
1089 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1090 #ifdef USE_COMPRESSION
1091 // check for gzipped file
1092 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1093 string tempName = string(tmpnam(0));
1094 mkfifo(tempName.c_str(), 0666);
1095 cerr << "Compressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1096 int fork_result = fork();
1097 if (fork_result < 0) {
1098 cerr << "Error forking.\n";
1100 } else if (fork_result == 0) {
1101 string command = string(endsWith(completeFileName, ".gz") ? "gzip" : "bzip2") + " -v > " + completeFileName + string(" < ") + tempName;
1102 system(command.c_str());
1105 completeFileName = tempName;
1110 fileHandle.open(completeFileName.c_str(), ios::trunc);
1112 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1119 catch(exception& e) {
1120 errorOut(e, "MothurOut", "openOutputFile");
1126 /**************************************************************************************************/
1127 int MothurOut::appendFiles(string temp, string filename) {
1132 //open output file in append mode
1133 openOutputFileAppend(filename, output);
1134 int ableToOpen = openInputFile(temp, input, "no error");
1135 //int ableToOpen = openInputFile(temp, input);
1138 if (ableToOpen == 0) { //you opened it
1141 while (!input.eof()) {
1142 input.read(buffer, 4096);
1143 output.write(buffer, input.gcount());
1144 //count number of lines
1145 for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} }
1154 catch(exception& e) {
1155 errorOut(e, "MothurOut", "appendFiles");
1160 /**************************************************************************************************/
1161 string MothurOut::sortFile(string distFile, string outputDir){
1164 //if (outputDir == "") { outputDir += hasPath(distFile); }
1165 string outfile = getRootName(distFile) + "sorted.dist";
1168 //if you can, use the unix sort since its been optimized for years
1169 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1170 string command = "sort -n -k +3 " + distFile + " -o " + outfile;
1171 system(command.c_str());
1172 #else //you are stuck with my best attempt...
1173 //windows sort does not have a way to specify a column, only a character in the line
1174 //since we cannot assume that the distance will always be at the the same character location on each line
1175 //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back.
1177 //read in file line by file and put distance first
1178 string tempDistFile = distFile + ".temp";
1181 openInputFile(distFile, input);
1182 openOutputFile(tempDistFile, output);
1184 string firstName, secondName;
1186 while (!input.eof()) {
1187 input >> firstName >> secondName >> dist;
1188 output << dist << '\t' << firstName << '\t' << secondName << endl;
1195 //sort using windows sort
1196 string tempOutfile = outfile + ".temp";
1197 string command = "sort " + tempDistFile + " /O " + tempOutfile;
1198 system(command.c_str());
1200 //read in sorted file and put distance at end again
1203 openInputFile(tempOutfile, input2);
1204 openOutputFile(outfile, output2);
1206 while (!input2.eof()) {
1207 input2 >> dist >> firstName >> secondName;
1208 output2 << firstName << '\t' << secondName << '\t' << dist << endl;
1215 mothurRemove(tempDistFile);
1216 mothurRemove(tempOutfile);
1221 catch(exception& e) {
1222 errorOut(e, "MothurOut", "sortFile");
1226 /**************************************************************************************************/
1227 vector<unsigned long long> MothurOut::setFilePosFasta(string filename, int& num) {
1229 vector<unsigned long long> positions;
1231 //openInputFile(filename, inFASTA);
1232 inFASTA.open(filename.c_str(), ios::binary);
1235 unsigned long long count = 0;
1236 while(!inFASTA.eof()){
1237 //input = getline(inFASTA);
1238 //cout << input << '\t' << inFASTA.tellg() << endl;
1239 //if (input.length() != 0) {
1240 // if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); cout << (pos - input.length() - 1) << endl; }
1242 //gobble(inFASTA); //has to be here since windows line endings are 2 characters and mess up the positions
1243 char c = inFASTA.get(); count++;
1245 positions.push_back(count-1);
1246 //cout << count << endl;
1251 num = positions.size();
1256 //get num bytes in file
1257 pFile = fopen (filename.c_str(),"rb");
1258 if (pFile==NULL) perror ("Error opening file");
1260 fseek (pFile, 0, SEEK_END);
1265 unsigned long long size = positions[(positions.size()-1)];
1267 openInputFile(filename, in);
1272 if(in.eof()) { break; }
1277 positions.push_back(size);
1282 catch(exception& e) {
1283 errorOut(e, "MothurOut", "setFilePosFasta");
1287 /**************************************************************************************************/
1288 vector<unsigned long long> MothurOut::setFilePosEachLine(string filename, int& num) {
1290 filename = getFullPathName(filename);
1292 vector<unsigned long long> positions;
1294 //openInputFile(filename, in);
1295 in.open(filename.c_str(), ios::binary);
1298 unsigned long long count = 0;
1299 positions.push_back(0);
1302 //getline counting reads
1303 char d = in.get(); count++;
1304 while ((d != '\n') && (d != '\r') && (d != '\f') && (d != in.eof())) {
1305 //get next character
1311 d=in.get(); count++;
1312 while(isspace(d) && (d != in.eof())) { d=in.get(); count++;}
1314 positions.push_back(count-1);
1315 //cout << count-1 << endl;
1319 num = positions.size()-1;
1322 unsigned long long size;
1324 //get num bytes in file
1325 pFile = fopen (filename.c_str(),"rb");
1326 if (pFile==NULL) perror ("Error opening file");
1328 fseek (pFile, 0, SEEK_END);
1333 positions[(positions.size()-1)] = size;
1337 catch(exception& e) {
1338 errorOut(e, "MothurOut", "setFilePosEachLine");
1342 /**************************************************************************************************/
1344 vector<unsigned long long> MothurOut::divideFile(string filename, int& proc) {
1346 vector<unsigned long long> filePos;
1347 filePos.push_back(0);
1350 unsigned long long size;
1352 filename = getFullPathName(filename);
1354 //get num bytes in file
1355 pFile = fopen (filename.c_str(),"rb");
1356 if (pFile==NULL) perror ("Error opening file");
1358 fseek (pFile, 0, SEEK_END);
1363 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1365 //estimate file breaks
1366 unsigned long long chunkSize = 0;
1367 chunkSize = size / proc;
1369 //file to small to divide by processors
1370 if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; }
1372 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
1373 for (int i = 0; i < proc; i++) {
1374 unsigned long long spot = (i+1) * chunkSize;
1377 openInputFile(filename, in);
1381 unsigned long long newSpot = spot;
1385 if (c == '>') { in.putback(c); newSpot = in.tellg(); break; }
1386 else if (int(c) == -1) { break; }
1390 //there was not another sequence before the end of the file
1391 unsigned long long sanityPos = in.tellg();
1393 if (sanityPos == -1) { break; }
1394 else { filePos.push_back(newSpot); }
1400 filePos.push_back(size);
1402 //sanity check filePos
1403 for (int i = 0; i < (filePos.size()-1); i++) {
1404 if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; }
1407 proc = (filePos.size() - 1);
1409 mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine();
1411 filePos.push_back(size);
1415 catch(exception& e) {
1416 errorOut(e, "MothurOut", "divideFile");
1420 /**************************************************************************************************/
1421 int MothurOut::divideFile(string filename, int& proc, vector<string>& files) {
1424 vector<unsigned long long> filePos = divideFile(filename, proc);
1426 for (int i = 0; i < (filePos.size()-1); i++) {
1430 openInputFile(filename, in);
1431 in.seekg(filePos[i]);
1432 unsigned long long size = filePos[(i+1)] - filePos[i];
1433 char* chunk = new char[size];
1434 in.read(chunk, size);
1438 string fileChunkName = filename + "." + toString(i) + ".tmp";
1440 openOutputFile(fileChunkName, out);
1442 out << chunk << endl;
1447 files.push_back(fileChunkName);
1452 catch(exception& e) {
1453 errorOut(e, "MothurOut", "divideFile");
1457 /***********************************************************************/
1459 bool MothurOut::isTrue(string f){
1462 for (int i = 0; i < f.length(); i++) { f[i] = toupper(f[i]); }
1464 if ((f == "TRUE") || (f == "T")) { return true; }
1465 else { return false; }
1467 catch(exception& e) {
1468 errorOut(e, "MothurOut", "isTrue");
1473 /***********************************************************************/
1475 float MothurOut::roundDist(float dist, int precision){
1477 return int(dist * precision + 0.5)/float(precision);
1479 catch(exception& e) {
1480 errorOut(e, "MothurOut", "roundDist");
1484 /***********************************************************************/
1486 float MothurOut::ceilDist(float dist, int precision){
1488 return int(ceil(dist * precision))/float(precision);
1490 catch(exception& e) {
1491 errorOut(e, "MothurOut", "ceilDist");
1495 /***********************************************************************/
1497 vector<string> MothurOut::splitWhiteSpace(string& rest, char buffer[], int size){
1499 vector<string> pieces;
1501 for (int i = 0; i < size; i++) {
1502 if (!isspace(buffer[i])) { rest += buffer[i]; }
1504 if (rest != "") { pieces.push_back(rest); rest = ""; }
1505 while (i < size) { //gobble white space
1506 if (isspace(buffer[i])) { i++; }
1507 else { rest = buffer[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1514 catch(exception& e) {
1515 errorOut(e, "MothurOut", "splitWhiteSpace");
1519 /***********************************************************************/
1520 vector<string> MothurOut::splitWhiteSpace(string input){
1522 vector<string> pieces;
1525 for (int i = 0; i < input.length(); i++) {
1526 if (!isspace(input[i])) { rest += input[i]; }
1528 if (rest != "") { pieces.push_back(rest); rest = ""; }
1529 while (i < input.length()) { //gobble white space
1530 if (isspace(input[i])) { i++; }
1531 else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1536 if (rest != "") { pieces.push_back(rest); }
1540 catch(exception& e) {
1541 errorOut(e, "MothurOut", "splitWhiteSpace");
1545 /***********************************************************************/
1546 vector<string> MothurOut::splitWhiteSpaceWithQuotes(string input){
1548 vector<string> pieces;
1551 int pos = input.find('\'');
1552 int pos2 = input.find('\"');
1554 if ((pos == string::npos) && (pos2 == string::npos)) { return splitWhiteSpace(input); } //no quotes to worry about
1556 for (int i = 0; i < input.length(); i++) {
1557 if ((input[i] == '\'') || (input[i] == '\"') || (rest == "\'") || (rest == "\"")) { //grab everything til end or next ' or "
1559 for (int j = i+1; j < input.length(); j++) {
1560 if ((input[j] == '\'') || (input[j] == '\"')) { //then quit
1564 }else { rest += input[j]; }
1566 }else if (!isspace(input[i])) { rest += input[i]; }
1568 if (rest != "") { pieces.push_back(rest); rest = ""; }
1569 while (i < input.length()) { //gobble white space
1570 if (isspace(input[i])) { i++; }
1571 else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1576 if (rest != "") { pieces.push_back(rest); }
1580 catch(exception& e) {
1581 errorOut(e, "MothurOut", "splitWhiteSpace");
1585 //**********************************************************************************************************************
1586 int MothurOut::readTax(string namefile, map<string, string>& taxMap) {
1590 openInputFile(namefile, in);
1594 bool pairDone = false;
1595 bool columnOne = true;
1596 string firstCol, secondCol;
1599 if (control_pressed) { break; }
1601 in.read(buffer, 4096);
1602 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1604 for (int i = 0; i < pieces.size(); i++) {
1605 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1606 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1609 //are there confidence scores, if so remove them
1610 if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
1611 map<string, string>::iterator itTax = taxMap.find(firstCol);
1613 if(itTax == taxMap.end()) {
1614 bool ignore = false;
1615 if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
1617 if (!ignore) { taxMap[firstCol] = secondCol; }
1618 if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
1620 mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true;
1629 vector<string> pieces = splitWhiteSpace(rest);
1631 for (int i = 0; i < pieces.size(); i++) {
1632 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1633 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1636 //are there confidence scores, if so remove them
1637 if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
1638 map<string, string>::iterator itTax = taxMap.find(firstCol);
1640 if(itTax == taxMap.end()) {
1641 bool ignore = false;
1642 if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
1644 if (!ignore) { taxMap[firstCol] = secondCol; }
1645 if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
1647 mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); control_pressed = true;
1655 return taxMap.size();
1658 catch(exception& e) {
1659 errorOut(e, "MothurOut", "readTax");
1663 /**********************************************************************************************************************/
1664 int MothurOut::readNames(string namefile, map<string, string>& nameMap, bool redund) {
1668 openInputFile(namefile, in);
1672 bool pairDone = false;
1673 bool columnOne = true;
1674 string firstCol, secondCol;
1677 if (control_pressed) { break; }
1679 in.read(buffer, 4096);
1680 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1682 for (int i = 0; i < pieces.size(); i++) {
1683 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1684 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1687 //parse names into vector
1688 vector<string> theseNames;
1689 splitAtComma(secondCol, theseNames);
1690 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
1698 vector<string> pieces = splitWhiteSpace(rest);
1700 for (int i = 0; i < pieces.size(); i++) {
1701 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1702 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1705 //parse names into vector
1706 vector<string> theseNames;
1707 splitAtComma(secondCol, theseNames);
1708 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
1714 return nameMap.size();
1717 catch(exception& e) {
1718 errorOut(e, "MothurOut", "readNames");
1722 /**********************************************************************************************************************/
1723 int MothurOut::readNames(string namefile, map<string, string>& nameMap, int flip) {
1727 openInputFile(namefile, in);
1731 bool pairDone = false;
1732 bool columnOne = true;
1733 string firstCol, secondCol;
1736 if (control_pressed) { break; }
1738 in.read(buffer, 4096);
1739 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1741 for (int i = 0; i < pieces.size(); i++) {
1742 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1743 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1746 nameMap[secondCol] = firstCol;
1754 vector<string> pieces = splitWhiteSpace(rest);
1756 for (int i = 0; i < pieces.size(); i++) {
1757 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1758 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1761 nameMap[secondCol] = firstCol;
1767 return nameMap.size();
1770 catch(exception& e) {
1771 errorOut(e, "MothurOut", "readNames");
1775 /**********************************************************************************************************************/
1776 int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<string, int>& nameCount) {
1778 nameMap.clear(); nameCount.clear();
1781 openInputFile(namefile, in);
1785 bool pairDone = false;
1786 bool columnOne = true;
1787 string firstCol, secondCol;
1790 if (control_pressed) { break; }
1792 in.read(buffer, 4096);
1793 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1795 for (int i = 0; i < pieces.size(); i++) {
1796 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1797 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1800 //parse names into vector
1801 vector<string> theseNames;
1802 splitAtComma(secondCol, theseNames);
1803 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
1804 nameCount[firstCol] = theseNames.size();
1812 vector<string> pieces = splitWhiteSpace(rest);
1814 for (int i = 0; i < pieces.size(); i++) {
1815 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1816 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1819 //parse names into vector
1820 vector<string> theseNames;
1821 splitAtComma(secondCol, theseNames);
1822 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
1823 nameCount[firstCol] = theseNames.size();
1829 return nameMap.size();
1832 catch(exception& e) {
1833 errorOut(e, "MothurOut", "readNames");
1837 /**********************************************************************************************************************/
1838 int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
1842 openInputFile(namefile, in);
1846 bool pairDone = false;
1847 bool columnOne = true;
1848 string firstCol, secondCol;
1851 if (control_pressed) { break; }
1853 in.read(buffer, 4096);
1854 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1856 for (int i = 0; i < pieces.size(); i++) {
1857 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1858 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1860 if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; }
1866 vector<string> pieces = splitWhiteSpace(rest);
1868 for (int i = 0; i < pieces.size(); i++) {
1869 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1870 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1872 if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; }
1876 return nameMap.size();
1879 catch(exception& e) {
1880 errorOut(e, "MothurOut", "readNames");
1884 /**********************************************************************************************************************/
1885 int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap) {
1889 openInputFile(namefile, in);
1893 bool pairDone = false;
1894 bool columnOne = true;
1895 string firstCol, secondCol;
1898 if (control_pressed) { break; }
1900 in.read(buffer, 4096);
1901 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1903 for (int i = 0; i < pieces.size(); i++) {
1904 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1905 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1908 vector<string> temp;
1909 splitAtComma(secondCol, temp);
1910 nameMap[firstCol] = temp;
1918 vector<string> pieces = splitWhiteSpace(rest);
1920 for (int i = 0; i < pieces.size(); i++) {
1921 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1922 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1925 vector<string> temp;
1926 splitAtComma(secondCol, temp);
1927 nameMap[firstCol] = temp;
1933 return nameMap.size();
1935 catch(exception& e) {
1936 errorOut(e, "MothurOut", "readNames");
1940 /**********************************************************************************************************************/
1941 map<string, int> MothurOut::readNames(string namefile) {
1943 map<string, int> nameMap;
1947 openInputFile(namefile, in);
1951 bool pairDone = false;
1952 bool columnOne = true;
1953 string firstCol, secondCol;
1956 if (control_pressed) { break; }
1958 in.read(buffer, 4096);
1959 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
1961 for (int i = 0; i < pieces.size(); i++) {
1962 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1963 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1966 int num = getNumNames(secondCol);
1967 nameMap[firstCol] = num;
1975 vector<string> pieces = splitWhiteSpace(rest);
1976 for (int i = 0; i < pieces.size(); i++) {
1977 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
1978 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
1981 int num = getNumNames(secondCol);
1982 nameMap[firstCol] = num;
1991 catch(exception& e) {
1992 errorOut(e, "MothurOut", "readNames");
1996 /**********************************************************************************************************************/
1997 int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, map<string, string>& fastamap) {
2003 openInputFile(namefile, in);
2007 bool pairDone = false;
2008 bool columnOne = true;
2009 string firstCol, secondCol;
2012 if (control_pressed) { break; }
2014 in.read(buffer, 4096);
2015 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2017 for (int i = 0; i < pieces.size(); i++) {
2018 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2019 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2022 int num = getNumNames(secondCol);
2024 map<string, string>::iterator it = fastamap.find(firstCol);
2025 if (it == fastamap.end()) {
2027 mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
2029 seqPriorityNode temp(num, it->second, firstCol);
2030 nameVector.push_back(temp);
2040 vector<string> pieces = splitWhiteSpace(rest);
2042 for (int i = 0; i < pieces.size(); i++) {
2043 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2044 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2047 int num = getNumNames(secondCol);
2049 map<string, string>::iterator it = fastamap.find(firstCol);
2050 if (it == fastamap.end()) {
2052 mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
2054 seqPriorityNode temp(num, it->second, firstCol);
2055 nameVector.push_back(temp);
2064 catch(exception& e) {
2065 errorOut(e, "MothurOut", "readNames");
2069 //**********************************************************************************************************************
2070 set<string> MothurOut::readAccnos(string accnosfile){
2074 openInputFile(accnosfile, in);
2081 if (control_pressed) { break; }
2083 in.read(buffer, 4096);
2084 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2086 for (int i = 0; i < pieces.size(); i++) { names.insert(pieces[i]); }
2091 vector<string> pieces = splitWhiteSpace(rest);
2092 for (int i = 0; i < pieces.size(); i++) { names.insert(pieces[i]); }
2096 catch(exception& e) {
2097 errorOut(e, "MothurOut", "readAccnos");
2101 //**********************************************************************************************************************
2102 int MothurOut::readAccnos(string accnosfile, vector<string>& names){
2106 openInputFile(accnosfile, in);
2113 if (control_pressed) { break; }
2115 in.read(buffer, 4096);
2116 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2118 for (int i = 0; i < pieces.size(); i++) { names.push_back(pieces[i]); }
2123 vector<string> pieces = splitWhiteSpace(rest);
2124 for (int i = 0; i < pieces.size(); i++) { names.push_back(pieces[i]); }
2129 catch(exception& e) {
2130 errorOut(e, "MothurOut", "readAccnos");
2134 /***********************************************************************/
2136 int MothurOut::getNumNames(string names){
2142 for(int i=0;i<names.size();i++){
2143 if(names[i] == ','){
2151 catch(exception& e) {
2152 errorOut(e, "MothurOut", "getNumNames");
2156 /***********************************************************************/
2158 int MothurOut::getNumChar(string line, char c){
2163 for(int i=0;i<line.size();i++){
2172 catch(exception& e) {
2173 errorOut(e, "MothurOut", "getNumChar");
2177 //**********************************************************************************************************************
2178 bool MothurOut::isSubset(vector<string> bigset, vector<string> subset) {
2182 if (subset.size() > bigset.size()) { return false; }
2184 //check if each guy in suset is also in bigset
2185 for (int i = 0; i < subset.size(); i++) {
2187 for (int j = 0; j < bigset.size(); j++) {
2188 if (subset[i] == bigset[j]) { match = true; break; }
2191 //you have a guy in subset that had no match in bigset
2192 if (match == false) { return false; }
2198 catch(exception& e) {
2199 errorOut(e, "MothurOut", "isSubset");
2203 /***********************************************************************/
2204 int MothurOut::mothurRemove(string filename){
2206 filename = getFullPathName(filename);
2207 int error = remove(filename.c_str());
2209 // if (errno != ENOENT) { //ENOENT == file does not exist
2210 // string message = "Error deleting file " + filename;
2211 // perror(message.c_str());
2216 catch(exception& e) {
2217 errorOut(e, "MothurOut", "mothurRemove");
2221 /***********************************************************************/
2222 bool MothurOut::mothurConvert(string item, int& num){
2226 if (isNumeric1(item)) {
2231 mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine();
2232 commandInputsConvertError = true;
2237 catch(exception& e) {
2238 errorOut(e, "MothurOut", "mothurConvert");
2242 /***********************************************************************/
2243 bool MothurOut::mothurConvert(string item, intDist& num){
2247 if (isNumeric1(item)) {
2252 mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine();
2253 commandInputsConvertError = true;
2258 catch(exception& e) {
2259 errorOut(e, "MothurOut", "mothurConvert");
2264 /***********************************************************************/
2265 bool MothurOut::isNumeric1(string stringToCheck){
2267 bool numeric = false;
2269 if(stringToCheck.find_first_not_of("0123456789.-") == string::npos) { numeric = true; }
2273 catch(exception& e) {
2274 errorOut(e, "MothurOut", "isNumeric1");
2279 /***********************************************************************/
2280 bool MothurOut::mothurConvert(string item, float& num){
2284 if (isNumeric1(item)) {
2289 mothurOut("[ERROR]: cannot convert " + item + " to a float."); mothurOutEndLine();
2290 commandInputsConvertError = true;
2295 catch(exception& e) {
2296 errorOut(e, "MothurOut", "mothurConvert");
2300 /***********************************************************************/
2301 bool MothurOut::mothurConvert(string item, double& num){
2305 if (isNumeric1(item)) {
2310 mothurOut("[ERROR]: cannot convert " + item + " to a double."); mothurOutEndLine();
2311 commandInputsConvertError = true;
2316 catch(exception& e) {
2317 errorOut(e, "MothurOut", "mothurConvert");
2321 /**************************************************************************************************/
2323 vector<vector<double> > MothurOut::binomial(int maxOrder){
2325 vector<vector<double> > binomial(maxOrder+1);
2327 for(int i=0;i<=maxOrder;i++){
2328 binomial[i].resize(maxOrder+1);
2337 for(int i=2;i<=maxOrder;i++){
2341 for(int i=2;i<=maxOrder;i++){
2342 for(int j=1;j<=maxOrder;j++){
2343 if(i==j){ binomial[i][j]=1; }
2344 if(j>i) { binomial[i][j]=0; }
2345 else { binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j]; }
2352 catch(exception& e) {
2353 errorOut(e, "MothurOut", "binomial");
2357 /**************************************************************************************************/
2358 unsigned int MothurOut::fromBase36(string base36){
2360 unsigned int num = 0;
2362 map<char, int> converts;
2427 while (i < base36.length()) {
2429 num = 36 * num + converts[c];
2436 catch(exception& e) {
2437 errorOut(e, "MothurOut", "fromBase36");
2441 /***********************************************************************/
2443 int MothurOut::factorial(int num){
2447 for (int i = 1; i <= num; i++) {
2453 catch(exception& e) {
2454 errorOut(e, "MothurOut", "factorial");
2458 /***********************************************************************/
2460 int MothurOut::getNumSeqs(ifstream& file){
2462 int numSeqs = count(istreambuf_iterator<char>(file),istreambuf_iterator<char>(), '>');
2466 catch(exception& e) {
2467 errorOut(e, "MothurOut", "getNumSeqs");
2471 /***********************************************************************/
2472 void MothurOut::getNumSeqs(ifstream& file, int& numSeqs){
2477 input = getline(file);
2478 if (input.length() != 0) {
2479 if(input[0] == '>'){ numSeqs++; }
2483 catch(exception& e) {
2484 errorOut(e, "MothurOut", "getNumSeqs");
2488 /***********************************************************************/
2490 //This function parses the estimator options and puts them in a vector
2491 void MothurOut::splitAtChar(string& estim, vector<string>& container, char symbol) {
2494 if (symbol == '-') { splitAtDash(estim, container); return; }
2496 string individual = "";
2497 int estimLength = estim.size();
2498 for(int i=0;i<estimLength;i++){
2499 if(estim[i] == symbol){
2500 container.push_back(individual);
2504 individual += estim[i];
2507 container.push_back(individual);
2510 catch(exception& e) {
2511 errorOut(e, "MothurOut", "splitAtChar");
2516 /***********************************************************************/
2518 //This function parses the estimator options and puts them in a vector
2519 void MothurOut::splitAtDash(string& estim, vector<string>& container) {
2521 string individual = "";
2522 int estimLength = estim.size();
2523 bool prevEscape = false;
2524 for(int i=0;i<estimLength;i++){
2526 individual += estim[i];
2530 if(estim[i] == '\\'){
2533 else if(estim[i] == '-'){
2534 container.push_back(individual);
2539 individual += estim[i];
2544 container.push_back(individual);
2546 catch(exception& e) {
2547 errorOut(e, "MothurOut", "splitAtDash");
2552 /***********************************************************************/
2553 //This function parses the label options and puts them in a set
2554 void MothurOut::splitAtDash(string& estim, set<string>& container) {
2556 string individual = "";
2557 int estimLength = estim.size();
2558 bool prevEscape = false;
2559 for(int i=0;i<estimLength;i++){
2561 individual += estim[i];
2565 if(estim[i] == '\\'){
2568 else if(estim[i] == '-'){
2569 container.insert(individual);
2574 individual += estim[i];
2579 container.insert(individual);
2582 catch(exception& e) {
2583 errorOut(e, "MothurOut", "splitAtDash");
2587 /***********************************************************************/
2588 //This function parses the line options and puts them in a set
2589 void MothurOut::splitAtDash(string& estim, set<int>& container) {
2591 string individual = "";
2593 int estimLength = estim.size();
2594 bool prevEscape = false;
2595 for(int i=0;i<estimLength;i++){
2597 individual += estim[i];
2601 if(estim[i] == '\\'){
2604 else if(estim[i] == '-'){
2605 convert(individual, lineNum); //convert the string to int
2606 container.insert(lineNum);
2611 individual += estim[i];
2616 convert(individual, lineNum); //convert the string to int
2617 container.insert(lineNum);
2619 catch(exception& e) {
2620 errorOut(e, "MothurOut", "splitAtDash");
2624 /***********************************************************************/
2625 string MothurOut::makeList(vector<string>& names) {
2629 if (names.size() == 0) { return list; }
2631 for (int i = 0; i < names.size()-1; i++) { list += names[i] + ","; }
2634 list += names[names.size()-1];
2638 catch(exception& e) {
2639 errorOut(e, "MothurOut", "makeList");
2644 /***********************************************************************/
2645 //This function parses the a string and puts peices in a vector
2646 void MothurOut::splitAtComma(string& estim, vector<string>& container) {
2648 string individual = "";
2649 int estimLength = estim.size();
2650 for(int i=0;i<estimLength;i++){
2651 if(estim[i] == ','){
2652 container.push_back(individual);
2656 individual += estim[i];
2659 container.push_back(individual);
2664 // string individual;
2666 // while (estim.find_first_of(',') != -1) {
2667 // individual = estim.substr(0,estim.find_first_of(','));
2668 // if ((estim.find_first_of(',')+1) <= estim.length()) { //checks to make sure you don't have comma at end of string
2669 // estim = estim.substr(estim.find_first_of(',')+1, estim.length());
2670 // container.push_back(individual);
2674 // container.push_back(estim);
2676 catch(exception& e) {
2677 errorOut(e, "MothurOut", "splitAtComma");
2681 /***********************************************************************/
2682 //This function splits up the various option parameters
2683 void MothurOut::splitAtChar(string& prefix, string& suffix, char c){
2685 prefix = suffix.substr(0,suffix.find_first_of(c));
2686 if ((suffix.find_first_of(c)+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
2687 suffix = suffix.substr(suffix.find_first_of(c)+1, suffix.length());
2689 while(suffix.at(0) == ' ')
2690 suffix = suffix.substr(1, suffix.length());
2694 catch(exception& e) {
2695 errorOut(e, "MothurOut", "splitAtComma");
2700 /***********************************************************************/
2702 //This function splits up the various option parameters
2703 void MothurOut::splitAtComma(string& prefix, string& suffix){
2705 prefix = suffix.substr(0,suffix.find_first_of(','));
2706 if ((suffix.find_first_of(',')+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
2707 suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
2709 while(suffix.at(0) == ' ')
2710 suffix = suffix.substr(1, suffix.length());
2714 catch(exception& e) {
2715 errorOut(e, "MothurOut", "splitAtComma");
2719 /***********************************************************************/
2721 //This function separates the key value from the option value i.e. dist=96_...
2722 void MothurOut::splitAtEquals(string& key, string& value){
2724 if(value.find_first_of('=') != -1){
2725 key = value.substr(0,value.find_first_of('='));
2726 if ((value.find_first_of('=')+1) <= value.length()) {
2727 value = value.substr(value.find_first_of('=')+1, value.length());
2734 catch(exception& e) {
2735 errorOut(e, "MothurOut", "splitAtEquals");
2740 /**************************************************************************************************/
2742 bool MothurOut::inUsersGroups(string groupname, vector<string> Groups) {
2744 for (int i = 0; i < Groups.size(); i++) {
2745 if (groupname == Groups[i]) { return true; }
2749 catch(exception& e) {
2750 errorOut(e, "MothurOut", "inUsersGroups");
2754 /**************************************************************************************************/
2756 bool MothurOut::inUsersGroups(vector<int> set, vector< vector<int> > sets) {
2758 for (int i = 0; i < sets.size(); i++) {
2759 if (set == sets[i]) { return true; }
2763 catch(exception& e) {
2764 errorOut(e, "MothurOut", "inUsersGroups");
2768 /**************************************************************************************************/
2770 bool MothurOut::inUsersGroups(int groupname, vector<int> Groups) {
2772 for (int i = 0; i < Groups.size(); i++) {
2773 if (groupname == Groups[i]) { return true; }
2777 catch(exception& e) {
2778 errorOut(e, "MothurOut", "inUsersGroups");
2783 /**************************************************************************************************/
2784 //returns true if any of the strings in first vector are in second vector
2785 bool MothurOut::inUsersGroups(vector<string> groupnames, vector<string> Groups) {
2788 for (int i = 0; i < groupnames.size(); i++) {
2789 if (inUsersGroups(groupnames[i], Groups)) { return true; }
2793 catch(exception& e) {
2794 errorOut(e, "MothurOut", "inUsersGroups");
2798 /***********************************************************************/
2799 //this function determines if the user has given us labels that are smaller than the given label.
2800 //if so then it returns true so that the calling function can run the previous valid distance.
2801 //it's a "smart" distance function. It also checks for invalid labels.
2802 bool MothurOut::anyLabelsToProcess(string label, set<string>& userLabels, string errorOff) {
2805 set<string>::iterator it;
2806 vector<float> orderFloat;
2807 map<string, float> userMap; //the conversion process removes trailing 0's which we need to put back
2808 map<string, float>::iterator it2;
2810 bool smaller = false;
2812 //unique is the smallest line
2813 if (label == "unique") { return false; }
2815 if (convertTestFloat(label, labelFloat)) {
2816 convert(label, labelFloat);
2817 }else { //cant convert
2822 //go through users set and make them floats
2823 for(it = userLabels.begin(); it != userLabels.end();) {
2826 if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){
2828 orderFloat.push_back(temp);
2829 userMap[*it] = temp;
2831 }else if (*it == "unique") {
2832 orderFloat.push_back(-1.0);
2833 userMap["unique"] = -1.0;
2836 if (errorOff == "") { mothurOut(*it + " is not a valid label."); mothurOutEndLine(); }
2837 userLabels.erase(it++);
2842 sort(orderFloat.begin(), orderFloat.end());
2844 /*************************************************/
2845 //is this label bigger than any of the users labels
2846 /*************************************************/
2848 //loop through order until you find a label greater than label
2849 for (int i = 0; i < orderFloat.size(); i++) {
2850 if (orderFloat[i] < labelFloat) {
2852 if (orderFloat[i] == -1) {
2853 if (errorOff == "") { mothurOut("Your file does not include the label unique."); mothurOutEndLine(); }
2854 userLabels.erase("unique");
2857 if (errorOff == "") { mothurOut("Your file does not include the label "); mothurOutEndLine(); }
2859 for (it2 = userMap.begin(); it2!= userMap.end(); it2++) {
2860 if (it2->second == orderFloat[i]) {
2862 //remove small labels
2863 userLabels.erase(s);
2867 if (errorOff == "") {mothurOut( s + ". I will use the next smallest distance. "); mothurOutEndLine(); }
2869 //since they are sorted once you find a bigger one stop looking
2876 catch(exception& e) {
2877 errorOut(e, "MothurOut", "anyLabelsToProcess");
2882 /**************************************************************************************************/
2883 bool MothurOut::checkReleaseVersion(ifstream& file, string version) {
2888 string line = getline(file);
2890 //before we added this check
2891 if (line[0] != '#') { good = false; }
2894 line = line.substr(1);
2896 vector<string> versionVector;
2897 splitAtChar(version, versionVector, '.');
2899 //check file version
2900 vector<string> linesVector;
2901 splitAtChar(line, linesVector, '.');
2903 if (versionVector.size() != linesVector.size()) { good = false; }
2905 for (int j = 0; j < versionVector.size(); j++) {
2907 convert(versionVector[j], num1);
2908 convert(linesVector[j], num2);
2910 //if mothurs version is newer than this files version, then we want to remake it
2911 if (num1 > num2) { good = false; break; }
2917 if (!good) { file.close(); }
2918 else { file.seekg(0); }
2922 catch(exception& e) {
2923 errorOut(e, "MothurOut", "checkReleaseVersion");
2927 /**************************************************************************************************/
2928 bool MothurOut::isContainingOnlyDigits(string input) {
2931 //are you a digit in ascii code
2932 for (int i = 0;i < input.length(); i++){
2933 if( input[i]>47 && input[i]<58){}
2934 else { return false; }
2939 catch(exception& e) {
2940 errorOut(e, "MothurOut", "isContainingOnlyDigits");
2944 /**************************************************************************************************/
2945 int MothurOut::removeConfidences(string& tax) {
2951 while (tax.find_first_of(';') != -1) {
2953 if (control_pressed) { return 0; }
2956 taxon = tax.substr(0,tax.find_first_of(';'));
2958 int pos = taxon.find_last_of('(');
2961 int pos2 = taxon.find_last_of(')');
2963 string confidenceScore = taxon.substr(pos+1, (pos2-(pos+1)));
2964 if (isNumeric1(confidenceScore)) {
2965 taxon = taxon.substr(0, pos); //rip off confidence
2971 tax = tax.substr(tax.find_first_of(';')+1, tax.length());
2979 catch(exception& e) {
2980 errorOut(e, "MothurOut", "removeConfidences");
2984 /**************************************************************************************************/
2985 string MothurOut::removeQuotes(string tax) {
2991 for (int i = 0; i < tax.length(); i++) {
2993 if (control_pressed) { return newTax; }
2995 if ((tax[i] != '\'') && (tax[i] != '\"')) { newTax += tax[i]; }
3001 catch(exception& e) {
3002 errorOut(e, "MothurOut", "removeQuotes");
3006 /**************************************************************************************************/
3007 // function for calculating standard deviation
3008 double MothurOut::getStandardDeviation(vector<int>& featureVector){
3012 for (int i = 0; i < featureVector.size(); i++) { average += featureVector[i]; }
3013 average /= (double) featureVector.size();
3015 //find standard deviation
3017 for (int i = 0; i < featureVector.size(); i++) { //compute the difference of each dist from the mean, and square the result of each
3018 stdDev += ((featureVector[i] - average) * (featureVector[i] - average));
3021 stdDev /= (double) featureVector.size();
3022 stdDev = sqrt(stdDev);
3026 catch(exception& e) {
3027 errorOut(e, "MothurOut", "getStandardDeviation");
3031 /**************************************************************************************************/