5 * Created by westcott on 2/25/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "mothurout.h"
13 /******************************************************/
14 MothurOut* MothurOut::getInstance() {
15 if( _uniqueInstance == 0) {
16 _uniqueInstance = new MothurOut();
18 return _uniqueInstance;
20 /*********************************************************************************************/
21 set<string> MothurOut::getCurrentTypes() {
25 types.insert("fasta");
26 types.insert("summary");
27 types.insert("accnos");
28 types.insert("column");
29 types.insert("design");
30 types.insert("group");
33 types.insert("oligos");
34 types.insert("order");
35 types.insert("ordergroup");
36 types.insert("phylip");
37 types.insert("qfile");
38 types.insert("relabund");
39 types.insert("sabund");
40 types.insert("rabund");
42 types.insert("shared");
43 types.insert("taxonomy");
47 types.insert("count");
48 types.insert("processors");
53 errorOut(e, "MothurOut", "getCurrentTypes");
57 /*********************************************************************************************/
58 void MothurOut::printCurrentFiles() {
62 if (accnosfile != "") { mothurOut("accnos=" + accnosfile); mothurOutEndLine(); }
63 if (columnfile != "") { mothurOut("column=" + columnfile); mothurOutEndLine(); }
64 if (designfile != "") { mothurOut("design=" + designfile); mothurOutEndLine(); }
65 if (fastafile != "") { mothurOut("fasta=" + fastafile); mothurOutEndLine(); }
66 if (groupfile != "") { mothurOut("group=" + groupfile); mothurOutEndLine(); }
67 if (listfile != "") { mothurOut("list=" + listfile); mothurOutEndLine(); }
68 if (namefile != "") { mothurOut("name=" + namefile); mothurOutEndLine(); }
69 if (oligosfile != "") { mothurOut("oligos=" + oligosfile); mothurOutEndLine(); }
70 if (orderfile != "") { mothurOut("order=" + orderfile); mothurOutEndLine(); }
71 if (ordergroupfile != "") { mothurOut("ordergroup=" + ordergroupfile); mothurOutEndLine(); }
72 if (phylipfile != "") { mothurOut("phylip=" + phylipfile); mothurOutEndLine(); }
73 if (qualfile != "") { mothurOut("qfile=" + qualfile); mothurOutEndLine(); }
74 if (rabundfile != "") { mothurOut("rabund=" + rabundfile); mothurOutEndLine(); }
75 if (relabundfile != "") { mothurOut("relabund=" + relabundfile); mothurOutEndLine(); }
76 if (sabundfile != "") { mothurOut("sabund=" + sabundfile); mothurOutEndLine(); }
77 if (sfffile != "") { mothurOut("sff=" + sfffile); mothurOutEndLine(); }
78 if (sharedfile != "") { mothurOut("shared=" + sharedfile); mothurOutEndLine(); }
79 if (taxonomyfile != "") { mothurOut("taxonomy=" + taxonomyfile); mothurOutEndLine(); }
80 if (treefile != "") { mothurOut("tree=" + treefile); mothurOutEndLine(); }
81 if (flowfile != "") { mothurOut("flow=" + flowfile); mothurOutEndLine(); }
82 if (biomfile != "") { mothurOut("biom=" + biomfile); mothurOutEndLine(); }
83 if (counttablefile != "") { mothurOut("count=" + counttablefile); mothurOutEndLine(); }
84 if (processors != "1") { mothurOut("processors=" + processors); mothurOutEndLine(); }
85 if (summaryfile != "") { mothurOut("summary=" + summaryfile); mothurOutEndLine(); }
89 errorOut(e, "MothurOut", "printCurrentFiles");
93 /*********************************************************************************************/
94 bool MothurOut::hasCurrentFiles() {
96 bool hasCurrent = false;
98 if (accnosfile != "") { return true; }
99 if (columnfile != "") { return true; }
100 if (designfile != "") { return true; }
101 if (fastafile != "") { return true; }
102 if (groupfile != "") { return true; }
103 if (listfile != "") { return true; }
104 if (namefile != "") { return true; }
105 if (oligosfile != "") { return true; }
106 if (orderfile != "") { return true; }
107 if (ordergroupfile != "") { return true; }
108 if (phylipfile != "") { return true; }
109 if (qualfile != "") { return true; }
110 if (rabundfile != "") { return true; }
111 if (relabundfile != "") { return true; }
112 if (sabundfile != "") { return true; }
113 if (sfffile != "") { return true; }
114 if (sharedfile != "") { return true; }
115 if (taxonomyfile != "") { return true; }
116 if (treefile != "") { return true; }
117 if (flowfile != "") { return true; }
118 if (biomfile != "") { return true; }
119 if (counttablefile != "") { return true; }
120 if (summaryfile != "") { return true; }
121 if (processors != "1") { return true; }
126 catch(exception& e) {
127 errorOut(e, "MothurOut", "hasCurrentFiles");
132 /*********************************************************************************************/
133 void MothurOut::clearCurrentFiles() {
160 catch(exception& e) {
161 errorOut(e, "MothurOut", "clearCurrentFiles");
165 /***********************************************************************/
166 string MothurOut::findProgramPath(string programName){
169 string envPath = getenv("PATH");
172 //delimiting path char
174 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
180 //break apart path variable by ':'
182 splitAtChar(envPath, dirs, delim);
184 if (debug) { mothurOut("[DEBUG]: dir's in path: \n"); }
186 //get path related to mothur
187 for (int i = 0; i < dirs.size(); i++) {
189 if (debug) { mothurOut("[DEBUG]: " + dirs[i] + "\n"); }
191 //to lower so we can find it
192 string tempLower = "";
193 for (int j = 0; j < dirs[i].length(); j++) { tempLower += tolower(dirs[i][j]); }
195 //is this mothurs path?
196 if (tempLower.find(programName) != -1) { pPath = dirs[i]; break; }
199 if (debug) { mothurOut("[DEBUG]: programPath = " + pPath + "\n"); }
202 //add programName so it looks like what argv would look like
203 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
204 pPath += "/" + programName;
206 pPath += "\\" + programName;
209 //okay programName is not in the path, so the folder programName is in must be in the path
210 //lets find out which one
212 //get path related to the program
213 for (int i = 0; i < dirs.size(); i++) {
215 if (debug) { mothurOut("[DEBUG]: looking in " + dirs[i] + " for " + programName + " \n"); }
217 //is this the programs path?
219 string tempIn = dirs[i];
220 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
221 tempIn += "/" + programName;
223 tempIn += "\\" + programName;
225 openInputFile(tempIn, in, "");
227 //if this file exists
228 if (in) { in.close(); pPath = tempIn; if (debug) { mothurOut("[DEBUG]: found it, programPath = " + pPath + "\n"); } break; }
235 catch(exception& e) {
236 errorOut(e, "MothurOut", "findProgramPath");
240 /*********************************************************************************************/
241 void MothurOut::setFileName(string filename) {
243 logFileName = filename;
247 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
249 if (pid == 0) { //only one process should output to screen
252 openOutputFile(filename, out);
258 catch(exception& e) {
259 errorOut(e, "MothurOut", "setFileName");
263 /*********************************************************************************************/
264 void MothurOut::setDefaultPath(string pathname) {
267 //add / to name if needed
268 string lastChar = pathname.substr(pathname.length()-1);
269 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
270 if (lastChar != "/") { pathname += "/"; }
272 if (lastChar != "\\") { pathname += "\\"; }
275 defaultPath = pathname;
278 catch(exception& e) {
279 errorOut(e, "MothurOut", "setDefaultPath");
283 /*********************************************************************************************/
284 void MothurOut::setOutputDir(string pathname) {
286 outputDir = pathname;
288 catch(exception& e) {
289 errorOut(e, "MothurOut", "setOutputDir");
293 /*********************************************************************************************/
294 void MothurOut::closeLog() {
299 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
301 if (pid == 0) { //only one process should output to screen
310 catch(exception& e) {
311 errorOut(e, "MothurOut", "closeLog");
316 /*********************************************************************************************/
317 MothurOut::~MothurOut() {
322 catch(exception& e) {
323 errorOut(e, "MothurOut", "MothurOut");
327 /*********************************************************************************************/
328 void MothurOut::mothurOut(string output) {
333 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
335 if (pid == 0) { //only one process should output to screen
345 catch(exception& e) {
346 errorOut(e, "MothurOut", "MothurOut");
350 /*********************************************************************************************/
351 void MothurOut::mothurOutJustToScreen(string output) {
356 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
358 if (pid == 0) { //only one process should output to screen
366 catch(exception& e) {
367 errorOut(e, "MothurOut", "MothurOut");
371 /*********************************************************************************************/
372 void MothurOut::mothurOutEndLine() {
376 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
378 if (pid == 0) { //only one process should output to screen
388 catch(exception& e) {
389 errorOut(e, "MothurOut", "MothurOutEndLine");
393 /*********************************************************************************************/
394 void MothurOut::mothurOut(string output, ofstream& outputFile) {
399 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
401 if (pid == 0) { //only one process should output to screen
406 outputFile << output;
414 catch(exception& e) {
415 errorOut(e, "MothurOut", "MothurOut");
419 /*********************************************************************************************/
420 void MothurOut::mothurOutEndLine(ofstream& outputFile) {
424 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
426 if (pid == 0) { //only one process should output to screen
437 catch(exception& e) {
438 errorOut(e, "MothurOut", "MothurOutEndLine");
442 /*********************************************************************************************/
443 void MothurOut::mothurOutJustToLog(string output) {
447 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
449 if (pid == 0) { //only one process should output to screen
458 catch(exception& e) {
459 errorOut(e, "MothurOut", "MothurOutJustToLog");
463 /*********************************************************************************************/
464 void MothurOut::errorOut(exception& e, string object, string function) {
466 //mem_usage(vm, rss);
468 string errorType = toString(e.what());
470 int pos = errorType.find("bad_alloc");
471 mothurOut("[ERROR]: ");
472 mothurOut(errorType);
474 if (pos == string::npos) { //not bad_alloc
475 mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
478 if (object == "cluster"){
479 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. There are two common causes for this, file size and format.\n\nFile Size:\nThe cluster command loads your distance matrix into RAM, and your distance file is most likely too large to fit in RAM. There are two options to help with this. The first is to use a cutoff. By using a cutoff mothur will only load distances that are below the cutoff. If that is still not enough, there is a command called cluster.split, http://www.mothur.org/wiki/cluster.split which divides the distance matrix, and clusters the smaller pieces separately. You may also be able to reduce the size of the original distance matrix by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. \n\nWrong Format:\nThis error can be caused by trying to read a column formatted distance matrix using the phylip parameter. By default, the dist.seqs command generates a column formatted distance matrix. To make a phylip formatted matrix set the dist.seqs command parameter output to lt. \n\nIf you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
480 }else if (object == "shhh.flows"){
481 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. The shhh.flows command is very memory intensive. This error is most commonly caused by trying to process a dataset too large, using multiple processors, or failing to run trim.flows before shhh.flows. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Running trim.flows with an oligos file, and then shhh.flows with the file option may also resolve the issue. If for some reason you are unable to run shhh.flows with your data, a good alternative is to use the trim.seqs command using a 50-bp sliding window and to trim the sequence when the average quality score over that window drops below 35. Our results suggest that the sequencing error rates by this method are very good, but not quite as good as by shhh.flows and that the resulting sequences tend to be a bit shorter. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry. ");
483 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. This is most commonly caused by trying to process a dataset too large, using multiple processors, or a file format issue. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Also, you may be able to reduce the size of your dataset by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
487 /*********************************************************************************************/
488 //The following was originally from http://stackoverflow.com/questions/669438/how-to-get-memory-usage-at-run-time-in-c
489 // process_mem_usage(double &, double &) - takes two doubles by reference,
490 // attempts to read the system-dependent data for a process' virtual memory
491 // size and resident set size, and return the results in KB.
493 // On failure, returns 0.0, 0.0
494 int MothurOut::mem_usage(double& vm_usage, double& resident_set) {
495 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
500 // 'file' stat seems to give the most reliable results
502 ifstream stat_stream("/proc/self/stat",ios_base::in);
504 // dummy vars for leading entries in stat that we don't care about
506 string pid, comm, state, ppid, pgrp, session, tty_nr;
507 string tpgid, flags, minflt, cminflt, majflt, cmajflt;
508 string utime, stime, cutime, cstime, priority, nice;
509 string O, itrealvalue, starttime;
511 // the two fields we want
516 stat_stream >> pid >> comm >> state >> ppid >> pgrp >> session >> tty_nr
517 >> tpgid >> flags >> minflt >> cminflt >> majflt >> cmajflt
518 >> utime >> stime >> cutime >> cstime >> priority >> nice
519 >> O >> itrealvalue >> starttime >> vsize >> rss; // don't care about the rest
521 long page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages
522 vm_usage = vsize / 1024.0;
523 resident_set = rss * page_size_kb;
525 mothurOut("Memory Usage: vm = " + toString(vm_usage) + " rss = " + toString(resident_set) + "\n");
529 /* //windows memory usage
530 // Get the list of process identifiers.
531 DWORD aProcesses[1024], cbNeeded, cProcesses;
533 if ( !EnumProcesses( aProcesses, sizeof(aProcesses), &cbNeeded ) ){ return 1; }
535 // Calculate how many process identifiers were returned.
536 cProcesses = cbNeeded / sizeof(DWORD);
538 // Print the memory usage for each process
539 for (int i = 0; i < cProcesses; i++ ) {
540 DWORD processID = aProcesses[i];
542 PROCESS_MEMORY_COUNTERS pmc;
544 HANDLE hProcess = OpenProcess((PROCESS_QUERY_INFORMATION | PROCESS_VM_READ), FALSE, processID);
546 // Print the process identifier.
547 printf( "\nProcess ID: %u\n", processID);
549 if (NULL != hProcess) {
551 if ( GetProcessMemoryInfo( hProcess, &pmc, sizeof(pmc)) ) {
552 printf( "\tPageFaultCount: 0x%08X\n", pmc.PageFaultCount );
553 printf( "\tPeakWorkingSetSize: 0x%08X\n", pmc.PeakWorkingSetSize );
554 printf( "\tWorkingSetSize: 0x%08X\n", pmc.WorkingSetSize );
555 printf( "\tQuotaPeakPagedPoolUsage: 0x%08X\n", pmc.QuotaPeakPagedPoolUsage );
556 printf( "\tQuotaPagedPoolUsage: 0x%08X\n", pmc.QuotaPagedPoolUsage );
557 printf( "\tQuotaPeakNonPagedPoolUsage: 0x%08X\n", pmc.QuotaPeakNonPagedPoolUsage );
558 printf( "\tQuotaNonPagedPoolUsage: 0x%08X\n", pmc.QuotaNonPagedPoolUsage );
559 printf( "\tPagefileUsage: 0x%08X\n", pmc.PagefileUsage );
560 printf( "\tPeakPagefileUsage: 0x%08X\n", pmc.PeakPagefileUsage );
562 CloseHandle(hProcess);
572 /***********************************************************************/
573 int MothurOut::openOutputFileAppend(string fileName, ofstream& fileHandle){
575 fileName = getFullPathName(fileName);
577 fileHandle.open(fileName.c_str(), ios::app);
579 mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
586 catch(exception& e) {
587 errorOut(e, "MothurOut", "openOutputFileAppend");
591 /***********************************************************************/
592 int MothurOut::openOutputFileBinaryAppend(string fileName, ofstream& fileHandle){
594 fileName = getFullPathName(fileName);
596 fileHandle.open(fileName.c_str(), ios::app | ios::binary);
598 mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
605 catch(exception& e) {
606 errorOut(e, "MothurOut", "openOutputFileAppend");
611 /***********************************************************************/
612 void MothurOut::gobble(istream& f){
616 while(isspace(d=f.get())) { ;}
617 if(!f.eof()) { f.putback(d); }
619 catch(exception& e) {
620 errorOut(e, "MothurOut", "gobble");
624 /***********************************************************************/
625 void MothurOut::gobble(istringstream& f){
628 while(isspace(d=f.get())) {;}
629 if(!f.eof()) { f.putback(d); }
631 catch(exception& e) {
632 errorOut(e, "MothurOut", "gobble");
637 /***********************************************************************/
639 string MothurOut::getline(istringstream& fileHandle) {
644 while (!fileHandle.eof()) {
646 char c = fileHandle.get();
648 //are you at the end of the line
649 if ((c == '\n') || (c == '\r') || (c == '\f')){ break; }
656 catch(exception& e) {
657 errorOut(e, "MothurOut", "getline");
661 /***********************************************************************/
663 string MothurOut::getline(ifstream& fileHandle) {
670 char c = fileHandle.get();
672 //are you at the end of the line
673 if ((c == '\n') || (c == '\r') || (c == '\f') || (c == EOF)){ break; }
680 catch(exception& e) {
681 errorOut(e, "MothurOut", "getline");
685 /***********************************************************************/
687 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
688 #ifdef USE_COMPRESSION
689 inline bool endsWith(string s, const char * suffix){
690 size_t suffixLength = strlen(suffix);
691 return s.size() >= suffixLength && s.substr(s.size() - suffixLength, suffixLength).compare(suffix) == 0;
696 string MothurOut::getRootName(string longName){
699 string rootName = longName;
701 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
702 #ifdef USE_COMPRESSION
703 if (endsWith(rootName, ".gz") || endsWith(rootName, ".bz2")) {
704 int pos = rootName.find_last_of('.');
705 rootName = rootName.substr(0, pos);
706 cerr << "shortening " << longName << " to " << rootName << "\n";
710 if(rootName.find_last_of(".") != rootName.npos){
711 int pos = rootName.find_last_of('.')+1;
712 rootName = rootName.substr(0, pos);
717 catch(exception& e) {
718 errorOut(e, "MothurOut", "getRootName");
722 /***********************************************************************/
724 string MothurOut::getSimpleName(string longName){
726 string simpleName = longName;
729 found=longName.find_last_of("/\\");
731 if(found != longName.npos){
732 simpleName = longName.substr(found+1);
737 catch(exception& e) {
738 errorOut(e, "MothurOut", "getSimpleName");
743 /***********************************************************************/
745 int MothurOut::getRandomIndex(int highest){
748 int random = (int) ((float)(highest+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
752 catch(exception& e) {
753 errorOut(e, "MothurOut", "getRandomIndex");
758 /**********************************************************************/
760 string MothurOut::getPathName(string longName){
762 string rootPathName = longName;
764 if(longName.find_last_of("/\\") != longName.npos){
765 int pos = longName.find_last_of("/\\")+1;
766 rootPathName = longName.substr(0, pos);
771 catch(exception& e) {
772 errorOut(e, "MothurOut", "getPathName");
777 /***********************************************************************/
779 bool MothurOut::dirCheck(string& dirName){
782 if (dirName == "") { return false; }
787 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
792 //add / to name if needed
793 string lastChar = dirName.substr(dirName.length()-1);
794 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
795 if (lastChar != "/") { dirName += "/"; }
797 if (lastChar != "\\") { dirName += "\\"; }
800 //test to make sure directory exists
801 dirName = getFullPathName(dirName);
802 string outTemp = dirName + tag + "temp"+ toString(time(NULL));
804 out.open(outTemp.c_str(), ios::trunc);
806 mothurOut(dirName + " directory does not exist or is not writable."); mothurOutEndLine();
809 mothurRemove(outTemp);
815 catch(exception& e) {
816 errorOut(e, "MothurOut", "dirCheck");
821 //**********************************************************************************************************************
823 map<string, vector<string> > MothurOut::parseClasses(string classes){
825 map<string, vector<string> > parts;
827 //treatment<Early|Late>-age<young|old>
828 vector<string> pieces; splitAtDash(classes, pieces); // -> treatment<Early|Late>, age<young|old>
830 for (int i = 0; i < pieces.size(); i++) {
831 string category = ""; string value = "";
832 bool foundOpen = false;
833 for (int j = 0; j < pieces[i].length(); j++) {
834 if (control_pressed) { return parts; }
836 if (pieces[i][j] == '<') { foundOpen = true; }
837 else if (pieces[i][j] == '>') { j += pieces[i].length(); }
839 if (!foundOpen) { category += pieces[i][j]; }
840 else { value += pieces[i][j]; }
843 vector<string> values; splitAtChar(value, values, '|');
844 parts[category] = values;
849 catch(exception& e) {
850 errorOut(e, "MothurOut", "parseClasses");
854 /***********************************************************************/
856 string MothurOut::hasPath(string longName){
861 found=longName.find_last_of("~/\\");
863 if(found != longName.npos){
864 path = longName.substr(0, found+1);
869 catch(exception& e) {
870 errorOut(e, "MothurOut", "hasPath");
875 /***********************************************************************/
877 string MothurOut::getExtension(string longName){
879 string extension = "";
881 if(longName.find_last_of('.') != longName.npos){
882 int pos = longName.find_last_of('.');
883 extension = longName.substr(pos, longName.length());
888 catch(exception& e) {
889 errorOut(e, "MothurOut", "getExtension");
893 /***********************************************************************/
894 bool MothurOut::isBlank(string fileName){
897 fileName = getFullPathName(fileName);
900 fileHandle.open(fileName.c_str());
902 mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
905 //check for blank file
907 if (fileHandle.eof()) { fileHandle.close(); return true; }
912 catch(exception& e) {
913 errorOut(e, "MothurOut", "isBlank");
917 /***********************************************************************/
919 string MothurOut::getFullPathName(string fileName){
922 string path = hasPath(fileName);
926 if (path == "") { return fileName; } //its a simple name
927 else { //we need to complete the pathname
928 // ex. ../../../filename
929 // cwd = /user/work/desktop
932 //get current working directory
933 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
935 if (path.find("~") != -1) { //go to home directory
938 char *homepath = NULL;
939 homepath = getenv ("HOME");
940 if ( homepath != NULL) { homeDir = homepath; }
941 else { homeDir = ""; }
943 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
946 if (path.rfind("./") == string::npos) { return fileName; } //already complete name
947 else { newFileName = fileName.substr(fileName.rfind("./")+2); } //save the complete part of the name
949 //char* cwdpath = new char[1024];
951 //cwdpath=getcwd(cwdpath,size);
954 char *cwdpath = NULL;
955 cwdpath = getcwd(NULL, 0); // or _getcwd
956 if ( cwdpath != NULL) { cwd = cwdpath; }
962 if (cwd.length() > 0) { simpleCWD = cwd.substr(1); }
964 //break apart the current working directory
966 while (simpleCWD.find_first_of('/') != string::npos) {
967 string dir = simpleCWD.substr(0,simpleCWD.find_first_of('/'));
968 simpleCWD = simpleCWD.substr(simpleCWD.find_first_of('/')+1, simpleCWD.length());
971 //get last one // ex. ../../../filename = /user/work/desktop/filename
972 dirs.push_back(simpleCWD); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
975 int index = dirs.size()-1;
977 while((pos = path.rfind("./")) != string::npos) { //while you don't have a complete path
978 if (pos == 0) { break; //you are at the end
979 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
980 path = path.substr(0, pos-1);
982 if (index == 0) { break; }
983 }else if (path[(pos-1)] == '/') { //you want the current working dir ./
984 path = path.substr(0, pos);
985 }else if (pos == 1) { break; //you are at the end
986 }else { mothurOut("cannot resolve path for " + fileName + "\n"); return fileName; }
989 for (int i = index; i >= 0; i--) {
990 newFileName = dirs[i] + "/" + newFileName;
993 newFileName = "/" + newFileName;
997 if (path.find("~") != string::npos) { //go to home directory
998 string homeDir = getenv ("HOMEPATH");
999 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
1002 if (path.rfind(".\\") == string::npos) { return fileName; } //already complete name
1003 else { newFileName = fileName.substr(fileName.rfind(".\\")+2); } //save the complete part of the name
1005 char *cwdpath = NULL;
1006 cwdpath = getcwd(NULL, 0); // or _getcwd
1007 if ( cwdpath != NULL) { cwd = cwdpath; }
1010 //break apart the current working directory
1011 vector<string> dirs;
1012 while (cwd.find_first_of('\\') != -1) {
1013 string dir = cwd.substr(0,cwd.find_first_of('\\'));
1014 cwd = cwd.substr(cwd.find_first_of('\\')+1, cwd.length());
1015 dirs.push_back(dir);
1019 dirs.push_back(cwd); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
1021 int index = dirs.size()-1;
1023 while((pos = path.rfind(".\\")) != string::npos) { //while you don't have a complete path
1024 if (pos == 0) { break; //you are at the end
1025 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
1026 path = path.substr(0, pos-1);
1028 if (index == 0) { break; }
1029 }else if (path[(pos-1)] == '\\') { //you want the current working dir ./
1030 path = path.substr(0, pos);
1031 }else if (pos == 1) { break; //you are at the end
1032 }else { mothurOut("cannot resolve path for " + fileName + "\n"); return fileName; }
1035 for (int i = index; i >= 0; i--) {
1036 newFileName = dirs[i] + "\\" + newFileName;
1045 catch(exception& e) {
1046 errorOut(e, "MothurOut", "getFullPathName");
1050 /***********************************************************************/
1052 int MothurOut::openInputFile(string fileName, ifstream& fileHandle, string m){
1054 //get full path name
1055 string completeFileName = getFullPathName(fileName);
1056 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1057 #ifdef USE_COMPRESSION
1058 // check for gzipped or bzipped file
1059 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1060 string tempName = string(tmpnam(0));
1061 mkfifo(tempName.c_str(), 0666);
1062 int fork_result = fork();
1063 if (fork_result < 0) {
1064 cerr << "Error forking.\n";
1066 } else if (fork_result == 0) {
1067 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
1068 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1069 system(command.c_str());
1070 cerr << "Done decompressing " << completeFileName << "\n";
1071 mothurRemove(tempName);
1074 cerr << "waiting on child process " << fork_result << "\n";
1075 completeFileName = tempName;
1080 fileHandle.open(completeFileName.c_str());
1082 //mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1085 //check for blank file
1090 catch(exception& e) {
1091 errorOut(e, "MothurOut", "openInputFile - no Error");
1095 /***********************************************************************/
1097 int MothurOut::openInputFile(string fileName, ifstream& fileHandle){
1100 //get full path name
1101 string completeFileName = getFullPathName(fileName);
1102 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1103 #ifdef USE_COMPRESSION
1104 // check for gzipped or bzipped file
1105 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1106 string tempName = string(tmpnam(0));
1107 mkfifo(tempName.c_str(), 0666);
1108 int fork_result = fork();
1109 if (fork_result < 0) {
1110 cerr << "Error forking.\n";
1112 } else if (fork_result == 0) {
1113 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
1114 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1115 system(command.c_str());
1116 cerr << "Done decompressing " << completeFileName << "\n";
1117 mothurRemove(tempName);
1120 cerr << "waiting on child process " << fork_result << "\n";
1121 completeFileName = tempName;
1127 fileHandle.open(completeFileName.c_str());
1129 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1133 //check for blank file
1135 if (fileHandle.eof()) { mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct."); mothurOutEndLine(); }
1140 catch(exception& e) {
1141 errorOut(e, "MothurOut", "openInputFile");
1145 /***********************************************************************/
1146 int MothurOut::openInputFileBinary(string fileName, ifstream& fileHandle){
1149 //get full path name
1150 string completeFileName = getFullPathName(fileName);
1151 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1152 #ifdef USE_COMPRESSION
1153 // check for gzipped or bzipped file
1154 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1155 string tempName = string(tmpnam(0));
1156 mkfifo(tempName.c_str(), 0666);
1157 int fork_result = fork();
1158 if (fork_result < 0) {
1159 cerr << "Error forking.\n";
1161 } else if (fork_result == 0) {
1162 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
1163 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1164 system(command.c_str());
1165 cerr << "Done decompressing " << completeFileName << "\n";
1166 mothurRemove(tempName);
1169 cerr << "waiting on child process " << fork_result << "\n";
1170 completeFileName = tempName;
1176 fileHandle.open(completeFileName.c_str(), ios::binary);
1178 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1182 //check for blank file
1184 if (fileHandle.eof()) { mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct."); mothurOutEndLine(); }
1189 catch(exception& e) {
1190 errorOut(e, "MothurOut", "openInputFileBinary");
1194 /***********************************************************************/
1195 int MothurOut::openInputFileBinary(string fileName, ifstream& fileHandle, string noerror){
1198 //get full path name
1199 string completeFileName = getFullPathName(fileName);
1200 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1201 #ifdef USE_COMPRESSION
1202 // check for gzipped or bzipped file
1203 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1204 string tempName = string(tmpnam(0));
1205 mkfifo(tempName.c_str(), 0666);
1206 int fork_result = fork();
1207 if (fork_result < 0) {
1208 cerr << "Error forking.\n";
1210 } else if (fork_result == 0) {
1211 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
1212 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1213 system(command.c_str());
1214 cerr << "Done decompressing " << completeFileName << "\n";
1215 mothurRemove(tempName);
1218 cerr << "waiting on child process " << fork_result << "\n";
1219 completeFileName = tempName;
1225 fileHandle.open(completeFileName.c_str(), ios::binary);
1227 //mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1231 //check for blank file
1233 //if (fileHandle.eof()) { mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct."); mothurOutEndLine(); }
1238 catch(exception& e) {
1239 errorOut(e, "MothurOut", "openInputFileBinary - no error");
1244 /***********************************************************************/
1246 int MothurOut::renameFile(string oldName, string newName){
1249 if (oldName == newName) { return 0; }
1252 int exist = openInputFile(newName, inTest, "");
1255 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1256 if (exist == 0) { //you could open it so you want to delete it
1257 string command = "rm " + newName;
1258 system(command.c_str());
1261 string command = "mv " + oldName + " " + newName;
1262 system(command.c_str());
1264 mothurRemove(newName);
1265 int renameOk = rename(oldName.c_str(), newName.c_str());
1270 catch(exception& e) {
1271 errorOut(e, "MothurOut", "renameFile");
1276 /***********************************************************************/
1278 int MothurOut::openOutputFile(string fileName, ofstream& fileHandle){
1281 string completeFileName = getFullPathName(fileName);
1282 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1283 #ifdef USE_COMPRESSION
1284 // check for gzipped file
1285 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1286 string tempName = string(tmpnam(0));
1287 mkfifo(tempName.c_str(), 0666);
1288 cerr << "Compressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1289 int fork_result = fork();
1290 if (fork_result < 0) {
1291 cerr << "Error forking.\n";
1293 } else if (fork_result == 0) {
1294 string command = string(endsWith(completeFileName, ".gz") ? "gzip" : "bzip2") + " -v > " + completeFileName + string(" < ") + tempName;
1295 system(command.c_str());
1298 completeFileName = tempName;
1303 fileHandle.open(completeFileName.c_str(), ios::trunc);
1305 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1312 catch(exception& e) {
1313 errorOut(e, "MothurOut", "openOutputFile");
1318 /***********************************************************************/
1320 int MothurOut::openOutputFileBinary(string fileName, ofstream& fileHandle){
1323 string completeFileName = getFullPathName(fileName);
1324 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1325 #ifdef USE_COMPRESSION
1326 // check for gzipped file
1327 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1328 string tempName = string(tmpnam(0));
1329 mkfifo(tempName.c_str(), 0666);
1330 cerr << "Compressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1331 int fork_result = fork();
1332 if (fork_result < 0) {
1333 cerr << "Error forking.\n";
1335 } else if (fork_result == 0) {
1336 string command = string(endsWith(completeFileName, ".gz") ? "gzip" : "bzip2") + " -v > " + completeFileName + string(" < ") + tempName;
1337 system(command.c_str());
1340 completeFileName = tempName;
1345 fileHandle.open(completeFileName.c_str(), ios::trunc | ios::binary);
1347 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1354 catch(exception& e) {
1355 errorOut(e, "MothurOut", "openOutputFileBinary");
1360 /**************************************************************************************************/
1361 int MothurOut::appendFiles(string temp, string filename) {
1366 //open output file in append mode
1367 openOutputFileBinaryAppend(filename, output);
1368 int ableToOpen = openInputFileBinary(temp, input, "no error");
1369 //int ableToOpen = openInputFile(temp, input);
1372 if (ableToOpen == 0) { //you opened it
1375 while (!input.eof()) {
1376 input.read(buffer, 4096);
1377 output.write(buffer, input.gcount());
1378 //count number of lines
1379 for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} }
1388 catch(exception& e) {
1389 errorOut(e, "MothurOut", "appendFiles");
1393 /**************************************************************************************************/
1394 int MothurOut::appendBinaryFiles(string temp, string filename) {
1399 //open output file in append mode
1400 openOutputFileBinaryAppend(filename, output);
1401 int ableToOpen = openInputFileBinary(temp, input, "no error");
1403 if (ableToOpen == 0) { //you opened it
1406 while (!input.eof()) {
1407 input.read(buffer, 4096);
1408 output.write(buffer, input.gcount());
1417 catch(exception& e) {
1418 errorOut(e, "MothurOut", "appendBinaryFiles");
1423 /**************************************************************************************************/
1424 int MothurOut::appendFilesWithoutHeaders(string temp, string filename) {
1429 //open output file in append mode
1430 openOutputFileAppend(filename, output);
1431 int ableToOpen = openInputFile(temp, input, "no error");
1432 //int ableToOpen = openInputFile(temp, input);
1435 if (ableToOpen == 0) { //you opened it
1437 string headers = getline(input); gobble(input);
1438 if (debug) { mothurOut("[DEBUG]: skipping headers " + headers +'\n'); }
1441 while (!input.eof()) {
1442 input.read(buffer, 4096);
1443 output.write(buffer, input.gcount());
1444 //count number of lines
1445 for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} }
1454 catch(exception& e) {
1455 errorOut(e, "MothurOut", "appendFiles");
1459 /**************************************************************************************************/
1460 string MothurOut::sortFile(string distFile, string outputDir){
1463 //if (outputDir == "") { outputDir += hasPath(distFile); }
1464 string outfile = getRootName(distFile) + "sorted.dist";
1467 //if you can, use the unix sort since its been optimized for years
1468 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1469 string command = "sort -n -k +3 " + distFile + " -o " + outfile;
1470 system(command.c_str());
1471 #else //you are stuck with my best attempt...
1472 //windows sort does not have a way to specify a column, only a character in the line
1473 //since we cannot assume that the distance will always be at the the same character location on each line
1474 //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back.
1476 //read in file line by file and put distance first
1477 string tempDistFile = distFile + ".temp";
1480 openInputFile(distFile, input);
1481 openOutputFile(tempDistFile, output);
1483 string firstName, secondName;
1485 while (!input.eof()) {
1486 input >> firstName >> secondName >> dist;
1487 output << dist << '\t' << firstName << '\t' << secondName << endl;
1494 //sort using windows sort
1495 string tempOutfile = outfile + ".temp";
1496 string command = "sort " + tempDistFile + " /O " + tempOutfile;
1497 system(command.c_str());
1499 //read in sorted file and put distance at end again
1502 openInputFile(tempOutfile, input2);
1503 openOutputFile(outfile, output2);
1505 while (!input2.eof()) {
1506 input2 >> dist >> firstName >> secondName;
1507 output2 << firstName << '\t' << secondName << '\t' << dist << endl;
1514 mothurRemove(tempDistFile);
1515 mothurRemove(tempOutfile);
1520 catch(exception& e) {
1521 errorOut(e, "MothurOut", "sortFile");
1525 /**************************************************************************************************/
1526 vector<unsigned long long> MothurOut::setFilePosFasta(string filename, int& num) {
1528 vector<unsigned long long> positions;
1530 //openInputFile(filename, inFASTA);
1531 inFASTA.open(filename.c_str(), ios::binary);
1534 unsigned long long count = 0;
1535 while(!inFASTA.eof()){
1536 //input = getline(inFASTA);
1537 //cout << input << '\t' << inFASTA.tellg() << endl;
1538 //if (input.length() != 0) {
1539 // if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); cout << (pos - input.length() - 1) << endl; }
1541 //gobble(inFASTA); //has to be here since windows line endings are 2 characters and mess up the positions
1542 char c = inFASTA.get(); count++;
1544 positions.push_back(count-1);
1545 if (debug) { mothurOut("[DEBUG]: numSeqs = " + toString(positions.size()) + " count = " + toString(count) + ".\n"); }
1550 num = positions.size();
1551 if (debug) { mothurOut("[DEBUG]: num = " + toString(num) + ".\n"); }
1553 unsigned long long size;
1555 //get num bytes in file
1556 pFile = fopen (filename.c_str(),"rb");
1557 if (pFile==NULL) perror ("Error opening file");
1559 fseek (pFile, 0, SEEK_END);
1564 /*unsigned long long size = positions[(positions.size()-1)];
1566 openInputFile(filename, in);
1571 if(in.eof()) { break; }
1576 if (debug) { mothurOut("[DEBUG]: size = " + toString(size) + ".\n"); }
1578 positions.push_back(size);
1583 catch(exception& e) {
1584 errorOut(e, "MothurOut", "setFilePosFasta");
1588 //**********************************************************************************************************************
1589 vector<consTax> MothurOut::readConsTax(string inputfile){
1592 vector<consTax> taxes;
1595 openInputFile(inputfile, in);
1602 if (control_pressed) { break; }
1604 string otu = ""; string tax = "unknown";
1607 in >> otu >> size >> tax; gobble(in);
1608 consTax temp(otu, tax, size);
1609 taxes.push_back(temp);
1615 catch(exception& e) {
1616 errorOut(e, "MothurOut", "readConsTax");
1620 //**********************************************************************************************************************
1621 int MothurOut::readConsTax(string inputfile, map<string, consTax2>& taxes){
1624 openInputFile(inputfile, in);
1631 if (control_pressed) { break; }
1633 string otu = ""; string tax = "unknown";
1636 in >> otu >> size >> tax; gobble(in);
1637 consTax2 temp(tax, size);
1644 catch(exception& e) {
1645 errorOut(e, "MothurOut", "readConsTax");
1649 /**************************************************************************************************/
1650 vector<unsigned long long> MothurOut::setFilePosEachLine(string filename, int& num) {
1652 filename = getFullPathName(filename);
1654 vector<unsigned long long> positions;
1656 //openInputFile(filename, in);
1657 in.open(filename.c_str(), ios::binary);
1660 unsigned long long count = 0;
1661 positions.push_back(0);
1664 //getline counting reads
1665 char d = in.get(); count++;
1666 while ((d != '\n') && (d != '\r') && (d != '\f') && (d != in.eof())) {
1667 //get next character
1673 d=in.get(); count++;
1674 while(isspace(d) && (d != in.eof())) { d=in.get(); count++;}
1676 positions.push_back(count-1);
1677 //cout << count-1 << endl;
1681 num = positions.size()-1;
1684 unsigned long long size;
1686 //get num bytes in file
1687 pFile = fopen (filename.c_str(),"rb");
1688 if (pFile==NULL) perror ("Error opening file");
1690 fseek (pFile, 0, SEEK_END);
1695 positions[(positions.size()-1)] = size;
1699 catch(exception& e) {
1700 errorOut(e, "MothurOut", "setFilePosEachLine");
1704 /**************************************************************************************************/
1706 vector<unsigned long long> MothurOut::divideFile(string filename, int& proc) {
1708 vector<unsigned long long> filePos;
1709 filePos.push_back(0);
1712 unsigned long long size;
1714 filename = getFullPathName(filename);
1716 //get num bytes in file
1717 pFile = fopen (filename.c_str(),"rb");
1718 if (pFile==NULL) perror ("Error opening file");
1720 fseek (pFile, 0, SEEK_END);
1725 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1727 //estimate file breaks
1728 unsigned long long chunkSize = 0;
1729 chunkSize = size / proc;
1731 //file to small to divide by processors
1732 if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; }
1734 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
1735 for (int i = 0; i < proc; i++) {
1736 unsigned long long spot = (i+1) * chunkSize;
1739 openInputFile(filename, in);
1743 unsigned long long newSpot = spot;
1747 if (c == '>') { in.putback(c); newSpot = in.tellg(); break; }
1748 else if (int(c) == -1) { break; }
1752 //there was not another sequence before the end of the file
1753 unsigned long long sanityPos = in.tellg();
1755 if (sanityPos == -1) { break; }
1756 else { filePos.push_back(newSpot); }
1762 filePos.push_back(size);
1764 //sanity check filePos
1765 for (int i = 0; i < (filePos.size()-1); i++) {
1766 if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; }
1769 proc = (filePos.size() - 1);
1771 mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine();
1773 filePos.push_back(size);
1777 catch(exception& e) {
1778 errorOut(e, "MothurOut", "divideFile");
1782 /**************************************************************************************************/
1784 vector<unsigned long long> MothurOut::divideFilePerLine(string filename, int& proc) {
1786 vector<unsigned long long> filePos;
1787 filePos.push_back(0);
1790 unsigned long long size;
1792 filename = getFullPathName(filename);
1794 //get num bytes in file
1795 pFile = fopen (filename.c_str(),"rb");
1796 if (pFile==NULL) perror ("Error opening file");
1798 fseek (pFile, 0, SEEK_END);
1803 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1805 //estimate file breaks
1806 unsigned long long chunkSize = 0;
1807 chunkSize = size / proc;
1809 //file to small to divide by processors
1810 if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; }
1812 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
1813 for (int i = 0; i < proc; i++) {
1814 unsigned long long spot = (i+1) * chunkSize;
1817 openInputFile(filename, in);
1820 //look for next line break
1821 unsigned long long newSpot = spot;
1825 if ((c == '\n') || (c == '\r') || (c == '\f')) { gobble(in); newSpot = in.tellg(); break; }
1826 else if (int(c) == -1) { break; }
1829 //there was not another line before the end of the file
1830 unsigned long long sanityPos = in.tellg();
1832 if (sanityPos == -1) { break; }
1833 else { filePos.push_back(newSpot); }
1839 filePos.push_back(size);
1841 //sanity check filePos
1842 for (int i = 0; i < (filePos.size()-1); i++) {
1843 if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; }
1846 proc = (filePos.size() - 1);
1848 mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine();
1850 filePos.push_back(size);
1854 catch(exception& e) {
1855 errorOut(e, "MothurOut", "divideFile");
1859 /**************************************************************************************************/
1860 int MothurOut::divideFile(string filename, int& proc, vector<string>& files) {
1863 vector<unsigned long long> filePos = divideFile(filename, proc);
1865 for (int i = 0; i < (filePos.size()-1); i++) {
1869 openInputFile(filename, in);
1870 in.seekg(filePos[i]);
1871 unsigned long long size = filePos[(i+1)] - filePos[i];
1872 char* chunk = new char[size];
1873 in.read(chunk, size);
1877 string fileChunkName = filename + "." + toString(i) + ".tmp";
1879 openOutputFile(fileChunkName, out);
1881 out << chunk << endl;
1886 files.push_back(fileChunkName);
1891 catch(exception& e) {
1892 errorOut(e, "MothurOut", "divideFile");
1896 /***********************************************************************/
1898 bool MothurOut::isTrue(string f){
1901 for (int i = 0; i < f.length(); i++) { f[i] = toupper(f[i]); }
1903 if ((f == "TRUE") || (f == "T")) { return true; }
1904 else { return false; }
1906 catch(exception& e) {
1907 errorOut(e, "MothurOut", "isTrue");
1912 /***********************************************************************/
1914 float MothurOut::roundDist(float dist, int precision){
1916 return int(dist * precision + 0.5)/float(precision);
1918 catch(exception& e) {
1919 errorOut(e, "MothurOut", "roundDist");
1923 /***********************************************************************/
1925 float MothurOut::ceilDist(float dist, int precision){
1927 return int(ceil(dist * precision))/float(precision);
1929 catch(exception& e) {
1930 errorOut(e, "MothurOut", "ceilDist");
1934 /***********************************************************************/
1936 vector<string> MothurOut::splitWhiteSpace(string& rest, char buffer[], int size){
1938 vector<string> pieces;
1940 for (int i = 0; i < size; i++) {
1941 if (!isspace(buffer[i])) { rest += buffer[i]; }
1943 if (rest != "") { pieces.push_back(rest); rest = ""; }
1944 while (i < size) { //gobble white space
1945 if (isspace(buffer[i])) { i++; }
1946 else { rest = buffer[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1953 catch(exception& e) {
1954 errorOut(e, "MothurOut", "splitWhiteSpace");
1958 /***********************************************************************/
1959 vector<string> MothurOut::splitWhiteSpace(string input){
1961 vector<string> pieces;
1964 for (int i = 0; i < input.length(); i++) {
1965 if (!isspace(input[i])) { rest += input[i]; }
1967 if (rest != "") { pieces.push_back(rest); rest = ""; }
1968 while (i < input.length()) { //gobble white space
1969 if (isspace(input[i])) { i++; }
1970 else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1975 if (rest != "") { pieces.push_back(rest); }
1979 catch(exception& e) {
1980 errorOut(e, "MothurOut", "splitWhiteSpace");
1984 /***********************************************************************/
1985 vector<string> MothurOut::splitWhiteSpaceWithQuotes(string input){
1987 vector<string> pieces;
1990 int pos = input.find('\'');
1991 int pos2 = input.find('\"');
1993 if ((pos == string::npos) && (pos2 == string::npos)) { return splitWhiteSpace(input); } //no quotes to worry about
1995 for (int i = 0; i < input.length(); i++) {
1996 if ((input[i] == '\'') || (input[i] == '\"') || (rest == "\'") || (rest == "\"")) { //grab everything til end or next ' or "
1998 for (int j = i+1; j < input.length(); j++) {
1999 if ((input[j] == '\'') || (input[j] == '\"')) { //then quit
2003 }else { rest += input[j]; }
2005 }else if (!isspace(input[i])) { rest += input[i]; }
2007 if (rest != "") { pieces.push_back(rest); rest = ""; }
2008 while (i < input.length()) { //gobble white space
2009 if (isspace(input[i])) { i++; }
2010 else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
2015 if (rest != "") { pieces.push_back(rest); }
2019 catch(exception& e) {
2020 errorOut(e, "MothurOut", "splitWhiteSpace");
2024 //**********************************************************************************************************************
2025 int MothurOut::readTax(string namefile, map<string, string>& taxMap) {
2029 openInputFile(namefile, in);
2033 bool pairDone = false;
2034 bool columnOne = true;
2035 string firstCol, secondCol;
2039 if (control_pressed) { break; }
2041 in.read(buffer, 4096);
2042 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2044 for (int i = 0; i < pieces.size(); i++) {
2045 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2046 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2049 checkName(firstCol);
2050 //are there confidence scores, if so remove them
2051 if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
2052 map<string, string>::iterator itTax = taxMap.find(firstCol);
2054 if(itTax == taxMap.end()) {
2055 bool ignore = false;
2056 if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
2058 if (!ignore) { taxMap[firstCol] = secondCol; }
2059 if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
2061 mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique.\n"); error = true;
2070 vector<string> pieces = splitWhiteSpace(rest);
2072 for (int i = 0; i < pieces.size(); i++) {
2073 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2074 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2077 checkName(firstCol);
2078 //are there confidence scores, if so remove them
2079 if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
2080 map<string, string>::iterator itTax = taxMap.find(firstCol);
2082 if(itTax == taxMap.end()) {
2083 bool ignore = false;
2084 if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
2086 if (!ignore) { taxMap[firstCol] = secondCol; }
2087 if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
2089 mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); error = true;
2097 if (error) { control_pressed = true; }
2098 if (debug) { mothurOut("[DEBUG]: numSeqs saved = '" + toString(taxMap.size()) + "'\n"); }
2099 return taxMap.size();
2102 catch(exception& e) {
2103 errorOut(e, "MothurOut", "readTax");
2107 /**********************************************************************************************************************/
2108 int MothurOut::readNames(string namefile, map<string, string>& nameMap, bool redund) {
2112 openInputFile(namefile, in);
2116 bool pairDone = false;
2117 bool columnOne = true;
2118 string firstCol, secondCol;
2121 if (control_pressed) { break; }
2123 in.read(buffer, 4096);
2124 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2126 for (int i = 0; i < pieces.size(); i++) {
2127 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2128 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2131 checkName(firstCol);
2132 checkName(secondCol);
2134 //parse names into vector
2135 vector<string> theseNames;
2136 splitAtComma(secondCol, theseNames);
2137 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
2145 vector<string> pieces = splitWhiteSpace(rest);
2147 for (int i = 0; i < pieces.size(); i++) {
2148 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2149 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2152 checkName(firstCol);
2153 checkName(secondCol);
2155 //parse names into vector
2156 vector<string> theseNames;
2157 splitAtComma(secondCol, theseNames);
2158 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
2164 return nameMap.size();
2167 catch(exception& e) {
2168 errorOut(e, "MothurOut", "readNames");
2172 /**********************************************************************************************************************/
2173 int MothurOut::readNames(string namefile, map<string, string>& nameMap, int flip) {
2177 openInputFile(namefile, in);
2181 bool pairDone = false;
2182 bool columnOne = true;
2183 string firstCol, secondCol;
2186 if (control_pressed) { break; }
2188 in.read(buffer, 4096);
2189 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2191 for (int i = 0; i < pieces.size(); i++) {
2192 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2193 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2196 checkName(firstCol);
2197 checkName(secondCol);
2198 nameMap[secondCol] = firstCol;
2206 vector<string> pieces = splitWhiteSpace(rest);
2208 for (int i = 0; i < pieces.size(); i++) {
2209 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2210 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2213 checkName(firstCol);
2214 checkName(secondCol);
2215 nameMap[secondCol] = firstCol;
2221 return nameMap.size();
2224 catch(exception& e) {
2225 errorOut(e, "MothurOut", "readNames");
2229 /**********************************************************************************************************************/
2230 int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<string, int>& nameCount) {
2232 nameMap.clear(); nameCount.clear();
2235 openInputFile(namefile, in);
2239 bool pairDone = false;
2240 bool columnOne = true;
2241 string firstCol, secondCol;
2244 if (control_pressed) { break; }
2246 in.read(buffer, 4096);
2247 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2249 for (int i = 0; i < pieces.size(); i++) {
2250 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2251 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2254 checkName(firstCol);
2255 checkName(secondCol);
2256 //parse names into vector
2257 vector<string> theseNames;
2258 splitAtComma(secondCol, theseNames);
2259 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
2260 nameCount[firstCol] = theseNames.size();
2268 vector<string> pieces = splitWhiteSpace(rest);
2270 for (int i = 0; i < pieces.size(); i++) {
2271 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2272 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2275 checkName(firstCol);
2276 checkName(secondCol);
2277 //parse names into vector
2278 vector<string> theseNames;
2279 splitAtComma(secondCol, theseNames);
2280 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
2281 nameCount[firstCol] = theseNames.size();
2287 return nameMap.size();
2290 catch(exception& e) {
2291 errorOut(e, "MothurOut", "readNames");
2295 /**********************************************************************************************************************/
2296 int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
2300 openInputFile(namefile, in);
2304 bool pairDone = false;
2305 bool columnOne = true;
2306 string firstCol, secondCol;
2309 if (control_pressed) { break; }
2311 in.read(buffer, 4096);
2312 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2314 for (int i = 0; i < pieces.size(); i++) {
2315 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2316 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2319 checkName(firstCol);
2320 checkName(secondCol);
2321 nameMap[firstCol] = secondCol; pairDone = false; }
2327 vector<string> pieces = splitWhiteSpace(rest);
2329 for (int i = 0; i < pieces.size(); i++) {
2330 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2331 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2334 checkName(firstCol);
2335 checkName(secondCol);
2336 nameMap[firstCol] = secondCol; pairDone = false; }
2340 return nameMap.size();
2343 catch(exception& e) {
2344 errorOut(e, "MothurOut", "readNames");
2348 /**********************************************************************************************************************/
2349 int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap) {
2353 openInputFile(namefile, in);
2357 bool pairDone = false;
2358 bool columnOne = true;
2359 string firstCol, secondCol;
2362 if (control_pressed) { break; }
2364 in.read(buffer, 4096);
2365 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2367 for (int i = 0; i < pieces.size(); i++) {
2368 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2369 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2372 checkName(firstCol);
2373 checkName(secondCol);
2374 vector<string> temp;
2375 splitAtComma(secondCol, temp);
2376 nameMap[firstCol] = temp;
2384 vector<string> pieces = splitWhiteSpace(rest);
2386 for (int i = 0; i < pieces.size(); i++) {
2387 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2388 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2391 checkName(firstCol);
2392 checkName(secondCol);
2393 vector<string> temp;
2394 splitAtComma(secondCol, temp);
2395 nameMap[firstCol] = temp;
2401 return nameMap.size();
2403 catch(exception& e) {
2404 errorOut(e, "MothurOut", "readNames");
2408 /**********************************************************************************************************************/
2409 map<string, int> MothurOut::readNames(string namefile) {
2411 map<string, int> nameMap;
2415 openInputFile(namefile, in);
2419 bool pairDone = false;
2420 bool columnOne = true;
2421 string firstCol, secondCol;
2424 if (control_pressed) { break; }
2426 in.read(buffer, 4096);
2427 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2429 for (int i = 0; i < pieces.size(); i++) {
2430 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2431 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2434 checkName(firstCol);
2435 checkName(secondCol);
2436 int num = getNumNames(secondCol);
2437 nameMap[firstCol] = num;
2445 vector<string> pieces = splitWhiteSpace(rest);
2446 for (int i = 0; i < pieces.size(); i++) {
2447 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2448 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2451 checkName(firstCol);
2452 checkName(secondCol);
2453 int num = getNumNames(secondCol);
2454 nameMap[firstCol] = num;
2463 catch(exception& e) {
2464 errorOut(e, "MothurOut", "readNames");
2468 /**********************************************************************************************************************/
2469 map<string, int> MothurOut::readNames(string namefile, unsigned long int& numSeqs) {
2471 map<string, int> nameMap;
2476 openInputFile(namefile, in);
2480 bool pairDone = false;
2481 bool columnOne = true;
2482 string firstCol, secondCol;
2485 if (control_pressed) { break; }
2487 in.read(buffer, 4096);
2488 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2490 for (int i = 0; i < pieces.size(); i++) {
2491 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2492 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2495 checkName(firstCol);
2496 checkName(secondCol);
2497 int num = getNumNames(secondCol);
2498 nameMap[firstCol] = num;
2507 vector<string> pieces = splitWhiteSpace(rest);
2508 for (int i = 0; i < pieces.size(); i++) {
2509 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2510 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2513 checkName(firstCol);
2514 checkName(secondCol);
2515 int num = getNumNames(secondCol);
2516 nameMap[firstCol] = num;
2526 catch(exception& e) {
2527 errorOut(e, "MothurOut", "readNames");
2531 /************************************************************/
2532 int MothurOut::checkName(string& name) {
2535 for (int i = 0; i < name.length(); i++) {
2536 if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; }
2541 catch(exception& e) {
2542 errorOut(e, "MothurOut", "checkName");
2546 /**********************************************************************************************************************/
2547 int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, map<string, string>& fastamap) {
2553 openInputFile(namefile, in);
2557 bool pairDone = false;
2558 bool columnOne = true;
2559 string firstCol, secondCol;
2562 if (control_pressed) { break; }
2564 in.read(buffer, 4096);
2565 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2567 for (int i = 0; i < pieces.size(); i++) {
2568 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2569 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2572 checkName(firstCol);
2573 checkName(secondCol);
2574 int num = getNumNames(secondCol);
2576 map<string, string>::iterator it = fastamap.find(firstCol);
2577 if (it == fastamap.end()) {
2579 mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
2581 seqPriorityNode temp(num, it->second, firstCol);
2582 nameVector.push_back(temp);
2592 vector<string> pieces = splitWhiteSpace(rest);
2594 for (int i = 0; i < pieces.size(); i++) {
2595 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2596 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2599 checkName(firstCol);
2600 checkName(secondCol);
2601 int num = getNumNames(secondCol);
2603 map<string, string>::iterator it = fastamap.find(firstCol);
2604 if (it == fastamap.end()) {
2606 mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
2608 seqPriorityNode temp(num, it->second, firstCol);
2609 nameVector.push_back(temp);
2618 catch(exception& e) {
2619 errorOut(e, "MothurOut", "readNames");
2623 //**********************************************************************************************************************
2624 set<string> MothurOut::readAccnos(string accnosfile){
2628 openInputFile(accnosfile, in);
2635 if (control_pressed) { break; }
2637 in.read(buffer, 4096);
2638 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2640 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]);
2641 names.insert(pieces[i]);
2647 vector<string> pieces = splitWhiteSpace(rest);
2648 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); }
2652 catch(exception& e) {
2653 errorOut(e, "MothurOut", "readAccnos");
2657 //**********************************************************************************************************************
2658 int MothurOut::readAccnos(string accnosfile, vector<string>& names){
2662 openInputFile(accnosfile, in);
2669 if (control_pressed) { break; }
2671 in.read(buffer, 4096);
2672 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2674 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
2679 vector<string> pieces = splitWhiteSpace(rest);
2680 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
2685 catch(exception& e) {
2686 errorOut(e, "MothurOut", "readAccnos");
2690 /***********************************************************************/
2692 int MothurOut::getNumNames(string names){
2698 for(int i=0;i<names.size();i++){
2699 if(names[i] == ','){
2707 catch(exception& e) {
2708 errorOut(e, "MothurOut", "getNumNames");
2712 /***********************************************************************/
2714 int MothurOut::getNumChar(string line, char c){
2719 for(int i=0;i<line.size();i++){
2728 catch(exception& e) {
2729 errorOut(e, "MothurOut", "getNumChar");
2733 /***********************************************************************/
2734 string MothurOut::getSimpleLabel(string label){
2738 //remove OTU or phylo tag
2739 string newLabel1 = "";
2740 for (int i = 0; i < label.length(); i++) {
2741 if(label[i]>47 && label[i]<58) { //is a digit
2742 newLabel1 += label[i];
2747 mothurConvert(newLabel1, num1);
2749 simple = toString(num1);
2753 catch(exception& e) {
2754 errorOut(e, "MothurOut", "isLabelEquivalent");
2758 /***********************************************************************/
2759 string MothurOut::mothurGetpid(int threadID){
2763 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
2765 pid += toString(getpid()); if(debug) { mothurOut("[DEBUG]: " + pid + "\n"); }
2766 //remove any weird chars
2768 for (int i = 0; i < pid.length(); i++) {
2769 if(pid[i]>47 && pid[i]<58) { //is a digit
2775 pid += toString(threadID);
2779 catch(exception& e) {
2780 errorOut(e, "MothurOut", "mothurGetpid");
2785 /***********************************************************************/
2787 bool MothurOut::isLabelEquivalent(string label1, string label2){
2791 //remove OTU or phylo tag
2792 string newLabel1 = "";
2793 for (int i = 0; i < label1.length(); i++) {
2794 if(label1[i]>47 && label1[i]<58) { //is a digit
2795 newLabel1 += label1[i];
2799 string newLabel2 = "";
2800 for (int i = 0; i < label2.length(); i++) {
2801 if(label2[i]>47 && label2[i]<58) { //is a digit
2802 newLabel2 += label2[i];
2807 mothurConvert(newLabel1, num1);
2808 mothurConvert(newLabel2, num2);
2810 if (num1 == num2) { same = true; }
2814 catch(exception& e) {
2815 errorOut(e, "MothurOut", "isLabelEquivalent");
2819 //**********************************************************************************************************************
2820 bool MothurOut::isSubset(vector<string> bigset, vector<string> subset) {
2824 if (subset.size() > bigset.size()) { return false; }
2826 //check if each guy in subset is also in bigset
2827 for (int i = 0; i < subset.size(); i++) {
2829 for (int j = 0; j < bigset.size(); j++) {
2830 if (subset[i] == bigset[j]) { match = true; break; }
2833 //you have a guy in subset that had no match in bigset
2834 if (match == false) { return false; }
2840 catch(exception& e) {
2841 errorOut(e, "MothurOut", "isSubset");
2845 /***********************************************************************/
2846 int MothurOut::mothurRemove(string filename){
2848 filename = getFullPathName(filename);
2849 int error = remove(filename.c_str());
2851 // if (errno != ENOENT) { //ENOENT == file does not exist
2852 // string message = "Error deleting file " + filename;
2853 // perror(message.c_str());
2858 catch(exception& e) {
2859 errorOut(e, "MothurOut", "mothurRemove");
2863 /***********************************************************************/
2864 bool MothurOut::mothurConvert(string item, int& num){
2868 if (isNumeric1(item)) {
2873 mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine();
2874 commandInputsConvertError = true;
2879 catch(exception& e) {
2880 errorOut(e, "MothurOut", "mothurConvert");
2884 /***********************************************************************/
2885 bool MothurOut::mothurConvert(string item, intDist& num){
2889 if (isNumeric1(item)) {
2894 mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine();
2895 commandInputsConvertError = true;
2900 catch(exception& e) {
2901 errorOut(e, "MothurOut", "mothurConvert");
2906 /***********************************************************************/
2907 bool MothurOut::isNumeric1(string stringToCheck){
2909 bool numeric = false;
2911 if(stringToCheck.find_first_not_of("0123456789.-") == string::npos) { numeric = true; }
2915 catch(exception& e) {
2916 errorOut(e, "MothurOut", "isNumeric1");
2921 /***********************************************************************/
2922 bool MothurOut::mothurConvert(string item, float& num){
2926 if (isNumeric1(item)) {
2931 mothurOut("[ERROR]: cannot convert " + item + " to a float."); mothurOutEndLine();
2932 commandInputsConvertError = true;
2937 catch(exception& e) {
2938 errorOut(e, "MothurOut", "mothurConvert");
2942 /***********************************************************************/
2943 bool MothurOut::mothurConvert(string item, double& num){
2947 if (isNumeric1(item)) {
2952 mothurOut("[ERROR]: cannot convert " + item + " to a double."); mothurOutEndLine();
2953 commandInputsConvertError = true;
2958 catch(exception& e) {
2959 errorOut(e, "MothurOut", "mothurConvert");
2963 /**************************************************************************************************/
2965 vector<vector<double> > MothurOut::binomial(int maxOrder){
2967 vector<vector<double> > binomial(maxOrder+1);
2969 for(int i=0;i<=maxOrder;i++){
2970 binomial[i].resize(maxOrder+1);
2979 for(int i=2;i<=maxOrder;i++){
2983 for(int i=2;i<=maxOrder;i++){
2984 for(int j=1;j<=maxOrder;j++){
2985 if(i==j){ binomial[i][j]=1; }
2986 if(j>i) { binomial[i][j]=0; }
2987 else { binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j]; }
2994 catch(exception& e) {
2995 errorOut(e, "MothurOut", "binomial");
2999 /**************************************************************************************************/
3000 unsigned int MothurOut::fromBase36(string base36){
3002 unsigned int num = 0;
3004 map<char, int> converts;
3069 while (i < base36.length()) {
3071 num = 36 * num + converts[c];
3078 catch(exception& e) {
3079 errorOut(e, "MothurOut", "fromBase36");
3083 /***********************************************************************/
3084 string MothurOut::findEdianness() {
3086 // find real endian type
3087 unsigned char EndianTest[2] = {1,0};
3088 short x = *(short *)EndianTest;
3090 string endianType = "unknown";
3091 if(x == 1) { endianType = "BIG_ENDIAN"; }
3092 else { endianType = "LITTLE_ENDIAN"; }
3096 catch(exception& e) {
3097 errorOut(e, "MothurOut", "findEdianness");
3101 /***********************************************************************/
3102 double MothurOut::median(vector<double> x) {
3106 if (x.size() == 0) { } //error
3108 //For example, if a < b < c, then the median of the list {a, b, c} is b, and, if a < b < c < d, then the median of the list {a, b, c, d} is the mean of b and c; i.e., it is (b + c)/2.
3109 sort(x.begin(), x.end());
3111 if ((x.size()%2) == 0) { //size() is even. median = average of 2 midpoints
3112 int midIndex1 = (x.size()/2)-1;
3113 int midIndex2 = (x.size()/2);
3114 value = (x[midIndex1]+ x[midIndex2]) / 2.0;
3116 int midIndex = (x.size()/2);
3117 value = x[midIndex];
3122 catch(exception& e) {
3123 errorOut(e, "MothurOut", "median");
3127 /***********************************************************************/
3128 int MothurOut::factorial(int num){
3132 for (int i = 1; i <= num; i++) {
3138 catch(exception& e) {
3139 errorOut(e, "MothurOut", "factorial");
3143 /***********************************************************************/
3145 int MothurOut::getNumSeqs(ifstream& file){
3147 int numSeqs = count(istreambuf_iterator<char>(file),istreambuf_iterator<char>(), '>');
3151 catch(exception& e) {
3152 errorOut(e, "MothurOut", "getNumSeqs");
3156 /***********************************************************************/
3157 void MothurOut::getNumSeqs(ifstream& file, int& numSeqs){
3162 input = getline(file);
3163 if (input.length() != 0) {
3164 if(input[0] == '>'){ numSeqs++; }
3168 catch(exception& e) {
3169 errorOut(e, "MothurOut", "getNumSeqs");
3173 /***********************************************************************/
3174 bool MothurOut::checkLocations(string& filename, string inputDir){
3176 filename = getFullPathName(filename);
3180 ableToOpen = openInputFile(filename, in, "noerror");
3183 //if you can't open it, try input location
3184 if (ableToOpen == 1) {
3185 if (inputDir != "") { //default path is set
3186 string tryPath = inputDir + getSimpleName(filename);
3187 mothurOut("Unable to open " + filename + ". Trying input directory " + tryPath); mothurOutEndLine();
3189 ableToOpen = openInputFile(tryPath, in2, "noerror");
3195 //if you can't open it, try default location
3196 if (ableToOpen == 1) {
3197 if (getDefaultPath() != "") { //default path is set
3198 string tryPath = getDefaultPath() + getSimpleName(filename);
3199 mothurOut("Unable to open " + filename + ". Trying default " + tryPath); mothurOutEndLine();
3201 ableToOpen = openInputFile(tryPath, in2, "noerror");
3207 //if you can't open it its not in current working directory or inputDir, try mothur excutable location
3208 if (ableToOpen == 1) {
3209 string exepath = argv;
3210 string tempPath = exepath;
3211 for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
3212 exepath = exepath.substr(0, (tempPath.find_last_of('m')));
3214 string tryPath = getFullPathName(exepath) + getSimpleName(filename);
3215 mothurOut("Unable to open " + filename + ". Trying mothur's executable location " + tryPath); mothurOutEndLine();
3217 ableToOpen = openInputFile(tryPath, in2, "noerror");
3222 if (ableToOpen == 1) { mothurOut("Unable to open " + filename + "."); mothurOutEndLine(); return false; }
3226 catch(exception& e) {
3227 errorOut(e, "MothurOut", "checkLocations");
3231 /***********************************************************************/
3233 //This function parses the estimator options and puts them in a vector
3234 void MothurOut::splitAtChar(string& estim, vector<string>& container, char symbol) {
3237 if (symbol == '-') { splitAtDash(estim, container); return; }
3239 string individual = "";
3240 int estimLength = estim.size();
3241 for(int i=0;i<estimLength;i++){
3242 if(estim[i] == symbol){
3243 container.push_back(individual);
3247 individual += estim[i];
3250 container.push_back(individual);
3253 catch(exception& e) {
3254 errorOut(e, "MothurOut", "splitAtChar");
3259 /***********************************************************************/
3261 //This function parses the estimator options and puts them in a vector
3262 void MothurOut::splitAtDash(string& estim, vector<string>& container) {
3264 string individual = "";
3265 int estimLength = estim.size();
3266 bool prevEscape = false;
3267 /*for(int i=0;i<estimLength;i++){
3269 individual += estim[i];
3273 if(estim[i] == '\\'){
3276 else if(estim[i] == '-'){
3277 container.push_back(individual);
3282 individual += estim[i];
3289 for(int i=0;i<estimLength;i++){
3290 if(estim[i] == '-'){
3291 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
3293 container.push_back(individual);
3296 }else if(estim[i] == '\\'){
3297 if (i < estimLength-1) {
3298 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
3299 else { individual += estim[i]; prevEscape = false; } //if no, add in
3300 }else { individual += estim[i]; }
3302 individual += estim[i];
3308 container.push_back(individual);
3310 catch(exception& e) {
3311 errorOut(e, "MothurOut", "splitAtDash");
3316 /***********************************************************************/
3317 //This function parses the label options and puts them in a set
3318 void MothurOut::splitAtDash(string& estim, set<string>& container) {
3320 string individual = "";
3321 int estimLength = estim.size();
3322 bool prevEscape = false;
3324 for(int i=0;i<estimLength;i++){
3326 individual += estim[i];
3330 if(estim[i] == '\\'){
3333 else if(estim[i] == '-'){
3334 container.insert(individual);
3339 individual += estim[i];
3346 for(int i=0;i<estimLength;i++){
3347 if(estim[i] == '-'){
3348 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
3350 container.insert(individual);
3353 }else if(estim[i] == '\\'){
3354 if (i < estimLength-1) {
3355 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
3356 else { individual += estim[i]; prevEscape = false; } //if no, add in
3357 }else { individual += estim[i]; }
3359 individual += estim[i];
3362 container.insert(individual);
3365 catch(exception& e) {
3366 errorOut(e, "MothurOut", "splitAtDash");
3370 /***********************************************************************/
3371 //This function parses the line options and puts them in a set
3372 void MothurOut::splitAtDash(string& estim, set<int>& container) {
3374 string individual = "";
3376 int estimLength = estim.size();
3377 bool prevEscape = false;
3379 for(int i=0;i<estimLength;i++){
3381 individual += estim[i];
3385 if(estim[i] == '\\'){
3388 else if(estim[i] == '-'){
3389 convert(individual, lineNum); //convert the string to int
3390 container.insert(lineNum);
3395 individual += estim[i];
3401 for(int i=0;i<estimLength;i++){
3402 if(estim[i] == '-'){
3403 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
3405 convert(individual, lineNum); //convert the string to int
3406 container.insert(lineNum);
3409 }else if(estim[i] == '\\'){
3410 if (i < estimLength-1) {
3411 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
3412 else { individual += estim[i]; prevEscape = false; } //if no, add in
3413 }else { individual += estim[i]; }
3415 individual += estim[i];
3419 convert(individual, lineNum); //convert the string to int
3420 container.insert(lineNum);
3422 catch(exception& e) {
3423 errorOut(e, "MothurOut", "splitAtDash");
3428 /***********************************************************************/
3429 string MothurOut::makeList(vector<string>& names) {
3433 if (names.size() == 0) { return list; }
3435 for (int i = 0; i < names.size()-1; i++) { list += names[i] + ","; }
3438 list += names[names.size()-1];
3442 catch(exception& e) {
3443 errorOut(e, "MothurOut", "makeList");
3448 /***********************************************************************/
3449 //This function parses the a string and puts peices in a vector
3450 void MothurOut::splitAtComma(string& estim, vector<string>& container) {
3452 string individual = "";
3453 int estimLength = estim.size();
3454 for(int i=0;i<estimLength;i++){
3455 if(estim[i] == ','){
3456 container.push_back(individual);
3460 individual += estim[i];
3463 container.push_back(individual);
3468 // string individual;
3470 // while (estim.find_first_of(',') != -1) {
3471 // individual = estim.substr(0,estim.find_first_of(','));
3472 // if ((estim.find_first_of(',')+1) <= estim.length()) { //checks to make sure you don't have comma at end of string
3473 // estim = estim.substr(estim.find_first_of(',')+1, estim.length());
3474 // container.push_back(individual);
3478 // container.push_back(estim);
3480 catch(exception& e) {
3481 errorOut(e, "MothurOut", "splitAtComma");
3485 /***********************************************************************/
3486 //This function splits up the various option parameters
3487 void MothurOut::splitAtChar(string& prefix, string& suffix, char c){
3489 prefix = suffix.substr(0,suffix.find_first_of(c));
3490 if ((suffix.find_first_of(c)+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
3491 suffix = suffix.substr(suffix.find_first_of(c)+1, suffix.length());
3493 while(suffix.at(0) == ' ')
3494 suffix = suffix.substr(1, suffix.length());
3495 }else { suffix = ""; }
3498 catch(exception& e) {
3499 errorOut(e, "MothurOut", "splitAtChar");
3504 /***********************************************************************/
3506 //This function splits up the various option parameters
3507 void MothurOut::splitAtComma(string& prefix, string& suffix){
3509 prefix = suffix.substr(0,suffix.find_first_of(','));
3510 if ((suffix.find_first_of(',')+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
3511 suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
3513 while(suffix.at(0) == ' ')
3514 suffix = suffix.substr(1, suffix.length());
3515 }else { suffix = ""; }
3518 catch(exception& e) {
3519 errorOut(e, "MothurOut", "splitAtComma");
3523 /***********************************************************************/
3525 //This function separates the key value from the option value i.e. dist=96_...
3526 void MothurOut::splitAtEquals(string& key, string& value){
3528 if(value.find_first_of('=') != -1){
3529 key = value.substr(0,value.find_first_of('='));
3530 if ((value.find_first_of('=')+1) <= value.length()) {
3531 value = value.substr(value.find_first_of('=')+1, value.length());
3538 catch(exception& e) {
3539 errorOut(e, "MothurOut", "splitAtEquals");
3544 /**************************************************************************************************/
3546 bool MothurOut::inUsersGroups(string groupname, vector<string> Groups) {
3548 for (int i = 0; i < Groups.size(); i++) {
3549 if (groupname == Groups[i]) { return true; }
3553 catch(exception& e) {
3554 errorOut(e, "MothurOut", "inUsersGroups");
3558 /**************************************************************************************************/
3560 bool MothurOut::inUsersGroups(vector<int> set, vector< vector<int> > sets) {
3562 for (int i = 0; i < sets.size(); i++) {
3563 if (set == sets[i]) { return true; }
3567 catch(exception& e) {
3568 errorOut(e, "MothurOut", "inUsersGroups");
3572 /**************************************************************************************************/
3574 bool MothurOut::inUsersGroups(int groupname, vector<int> Groups) {
3576 for (int i = 0; i < Groups.size(); i++) {
3577 if (groupname == Groups[i]) { return true; }
3581 catch(exception& e) {
3582 errorOut(e, "MothurOut", "inUsersGroups");
3587 /**************************************************************************************************/
3588 //returns true if any of the strings in first vector are in second vector
3589 bool MothurOut::inUsersGroups(vector<string> groupnames, vector<string> Groups) {
3592 for (int i = 0; i < groupnames.size(); i++) {
3593 if (inUsersGroups(groupnames[i], Groups)) { return true; }
3597 catch(exception& e) {
3598 errorOut(e, "MothurOut", "inUsersGroups");
3602 /**************************************************************************************************/
3603 //removes entries that are only white space
3604 int MothurOut::removeBlanks(vector<string>& tempVector) {
3606 vector<string> newVector;
3607 for (int i = 0; i < tempVector.size(); i++) {
3608 bool isBlank = true;
3609 for (int j = 0; j < tempVector[i].length(); j++) {
3610 if (!isspace(tempVector[i][j])) { isBlank = false; j+= tempVector[i].length(); } //contains non space chars, break out and save
3612 if (!isBlank) { newVector.push_back(tempVector[i]); }
3614 tempVector = newVector;
3617 catch(exception& e) {
3618 errorOut(e, "MothurOut", "removeBlanks");
3622 /***********************************************************************/
3623 //this function determines if the user has given us labels that are smaller than the given label.
3624 //if so then it returns true so that the calling function can run the previous valid distance.
3625 //it's a "smart" distance function. It also checks for invalid labels.
3626 bool MothurOut::anyLabelsToProcess(string label, set<string>& userLabels, string errorOff) {
3629 set<string>::iterator it;
3630 vector<float> orderFloat;
3631 map<string, float> userMap; //the conversion process removes trailing 0's which we need to put back
3632 map<string, float>::iterator it2;
3634 bool smaller = false;
3636 //unique is the smallest line
3637 if (label == "unique") { return false; }
3639 if (convertTestFloat(label, labelFloat)) {
3640 convert(label, labelFloat);
3641 }else { //cant convert
3646 //go through users set and make them floats
3647 for(it = userLabels.begin(); it != userLabels.end();) {
3650 if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){
3652 orderFloat.push_back(temp);
3653 userMap[*it] = temp;
3655 }else if (*it == "unique") {
3656 orderFloat.push_back(-1.0);
3657 userMap["unique"] = -1.0;
3660 if (errorOff == "") { mothurOut(*it + " is not a valid label."); mothurOutEndLine(); }
3661 userLabels.erase(it++);
3666 sort(orderFloat.begin(), orderFloat.end());
3668 /*************************************************/
3669 //is this label bigger than any of the users labels
3670 /*************************************************/
3672 //loop through order until you find a label greater than label
3673 for (int i = 0; i < orderFloat.size(); i++) {
3674 if (orderFloat[i] < labelFloat) {
3676 if (orderFloat[i] == -1) {
3677 if (errorOff == "") { mothurOut("Your file does not include the label unique."); mothurOutEndLine(); }
3678 userLabels.erase("unique");
3681 if (errorOff == "") { mothurOut("Your file does not include the label "); mothurOutEndLine(); }
3683 for (it2 = userMap.begin(); it2!= userMap.end(); it2++) {
3684 if (it2->second == orderFloat[i]) {
3686 //remove small labels
3687 userLabels.erase(s);
3691 if (errorOff == "") {mothurOut( s + ". I will use the next smallest distance. "); mothurOutEndLine(); }
3693 //since they are sorted once you find a bigger one stop looking
3700 catch(exception& e) {
3701 errorOut(e, "MothurOut", "anyLabelsToProcess");
3706 /**************************************************************************************************/
3707 bool MothurOut::checkReleaseVersion(ifstream& file, string version) {
3712 string line = getline(file);
3714 //before we added this check
3715 if (line[0] != '#') { good = false; }
3718 line = line.substr(1);
3720 vector<string> versionVector;
3721 splitAtChar(version, versionVector, '.');
3723 //check file version
3724 vector<string> linesVector;
3725 splitAtChar(line, linesVector, '.');
3727 if (versionVector.size() != linesVector.size()) { good = false; }
3729 for (int j = 0; j < versionVector.size(); j++) {
3731 convert(versionVector[j], num1);
3732 convert(linesVector[j], num2);
3734 //if mothurs version is newer than this files version, then we want to remake it
3735 if (num1 > num2) { good = false; break; }
3741 if (!good) { file.close(); }
3742 else { file.seekg(0); }
3746 catch(exception& e) {
3747 errorOut(e, "MothurOut", "checkReleaseVersion");
3751 /**************************************************************************************************/
3752 vector<double> MothurOut::getAverages(vector< vector<double> >& dists) {
3754 vector<double> averages; //averages.resize(numComp, 0.0);
3755 for (int i = 0; i < dists[0].size(); i++) { averages.push_back(0.0); }
3757 for (int thisIter = 0; thisIter < dists.size(); thisIter++) {
3758 for (int i = 0; i < dists[thisIter].size(); i++) {
3759 averages[i] += dists[thisIter][i];
3764 for (int i = 0; i < averages.size(); i++) { averages[i] /= (double) dists.size(); }
3768 catch(exception& e) {
3769 errorOut(e, "MothurOut", "getAverages");
3773 /**************************************************************************************************/
3774 double MothurOut::getAverage(vector<double> dists) {
3778 for (int i = 0; i < dists.size(); i++) {
3779 average += dists[i];
3783 average /= (double) dists.size();
3787 catch(exception& e) {
3788 errorOut(e, "MothurOut", "getAverage");
3793 /**************************************************************************************************/
3794 vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists) {
3797 vector<double> averages = getAverages(dists);
3799 //find standard deviation
3800 vector<double> stdDev; //stdDev.resize(numComp, 0.0);
3801 for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
3803 for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3804 for (int j = 0; j < dists[thisIter].size(); j++) {
3805 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
3808 for (int i = 0; i < stdDev.size(); i++) {
3809 stdDev[i] /= (double) dists.size();
3810 stdDev[i] = sqrt(stdDev[i]);
3815 catch(exception& e) {
3816 errorOut(e, "MothurOut", "getAverages");
3820 /**************************************************************************************************/
3821 vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists, vector<double>& averages) {
3823 //find standard deviation
3824 vector<double> stdDev; //stdDev.resize(numComp, 0.0);
3825 for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
3827 for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3828 for (int j = 0; j < dists[thisIter].size(); j++) {
3829 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
3832 for (int i = 0; i < stdDev.size(); i++) {
3833 stdDev[i] /= (double) dists.size();
3834 stdDev[i] = sqrt(stdDev[i]);
3839 catch(exception& e) {
3840 errorOut(e, "MothurOut", "getStandardDeviation");
3844 /**************************************************************************************************/
3845 vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals, string mode) {
3848 vector< vector<seqDist> > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
3849 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3850 //calcAverages[i].resize(calcDistsTotals[0][i].size());
3851 vector<seqDist> temp;
3852 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3854 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3855 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3856 tempDist.dist = 0.0;
3857 temp.push_back(tempDist);
3859 calcAverages.push_back(temp);
3862 if (mode == "average") {
3863 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
3864 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
3865 for (int j = 0; j < calcAverages[i].size(); j++) {
3866 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
3871 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
3872 for (int j = 0; j < calcAverages[i].size(); j++) {
3873 calcAverages[i][j].dist /= (float) calcDistsTotals.size();
3876 }else { //find median
3877 for (int i = 0; i < calcAverages.size(); i++) { //for each calc
3878 for (int j = 0; j < calcAverages[i].size(); j++) { //for each comparison
3879 vector<double> dists;
3880 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //for each subsample
3881 dists.push_back(calcDistsTotals[thisIter][i][j].dist);
3883 sort(dists.begin(), dists.end());
3884 calcAverages[i][j].dist = dists[(calcDistsTotals.size()/2)];
3889 return calcAverages;
3891 catch(exception& e) {
3892 errorOut(e, "MothurOut", "getAverages");
3896 /**************************************************************************************************/
3897 vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals) {
3900 vector< vector<seqDist> > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
3901 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3902 //calcAverages[i].resize(calcDistsTotals[0][i].size());
3903 vector<seqDist> temp;
3904 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3906 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3907 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3908 tempDist.dist = 0.0;
3909 temp.push_back(tempDist);
3911 calcAverages.push_back(temp);
3915 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
3916 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
3917 for (int j = 0; j < calcAverages[i].size(); j++) {
3918 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
3923 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
3924 for (int j = 0; j < calcAverages[i].size(); j++) {
3925 calcAverages[i][j].dist /= (float) calcDistsTotals.size();
3929 return calcAverages;
3931 catch(exception& e) {
3932 errorOut(e, "MothurOut", "getAverages");
3936 /**************************************************************************************************/
3937 vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals) {
3940 vector< vector<seqDist> > calcAverages = getAverages(calcDistsTotals);
3942 //find standard deviation
3943 vector< vector<seqDist> > stdDev;
3944 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3945 vector<seqDist> temp;
3946 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3948 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3949 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3950 tempDist.dist = 0.0;
3951 temp.push_back(tempDist);
3953 stdDev.push_back(temp);
3956 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3957 for (int i = 0; i < stdDev.size(); i++) {
3958 for (int j = 0; j < stdDev[i].size(); j++) {
3959 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
3964 for (int i = 0; i < stdDev.size(); i++) { //finds average.
3965 for (int j = 0; j < stdDev[i].size(); j++) {
3966 stdDev[i][j].dist /= (float) calcDistsTotals.size();
3967 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
3973 catch(exception& e) {
3974 errorOut(e, "MothurOut", "getAverages");
3978 /**************************************************************************************************/
3979 vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals, vector< vector<seqDist> >& calcAverages) {
3981 //find standard deviation
3982 vector< vector<seqDist> > stdDev;
3983 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3984 vector<seqDist> temp;
3985 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3987 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3988 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3989 tempDist.dist = 0.0;
3990 temp.push_back(tempDist);
3992 stdDev.push_back(temp);
3995 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3996 for (int i = 0; i < stdDev.size(); i++) {
3997 for (int j = 0; j < stdDev[i].size(); j++) {
3998 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
4003 for (int i = 0; i < stdDev.size(); i++) { //finds average.
4004 for (int j = 0; j < stdDev[i].size(); j++) {
4005 stdDev[i][j].dist /= (float) calcDistsTotals.size();
4006 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
4012 catch(exception& e) {
4013 errorOut(e, "MothurOut", "getAverages");
4018 /**************************************************************************************************/
4019 bool MothurOut::isContainingOnlyDigits(string input) {
4022 //are you a digit in ascii code
4023 for (int i = 0;i < input.length(); i++){
4024 if( input[i]>47 && input[i]<58){}
4025 else { return false; }
4030 catch(exception& e) {
4031 errorOut(e, "MothurOut", "isContainingOnlyDigits");
4035 /**************************************************************************************************/
4036 int MothurOut::removeConfidences(string& tax) {
4042 while (tax.find_first_of(';') != -1) {
4044 if (control_pressed) { return 0; }
4047 taxon = tax.substr(0,tax.find_first_of(';'));
4049 int pos = taxon.find_last_of('(');
4052 int pos2 = taxon.find_last_of(')');
4054 string confidenceScore = taxon.substr(pos+1, (pos2-(pos+1)));
4055 if (isNumeric1(confidenceScore)) {
4056 taxon = taxon.substr(0, pos); //rip off confidence
4062 tax = tax.substr(tax.find_first_of(';')+1, tax.length());
4070 catch(exception& e) {
4071 errorOut(e, "MothurOut", "removeConfidences");
4075 /**************************************************************************************************/
4076 string MothurOut::removeQuotes(string tax) {
4082 for (int i = 0; i < tax.length(); i++) {
4084 if (control_pressed) { return newTax; }
4086 if ((tax[i] != '\'') && (tax[i] != '\"')) { newTax += tax[i]; }
4092 catch(exception& e) {
4093 errorOut(e, "MothurOut", "removeQuotes");
4097 /**************************************************************************************************/
4098 // function for calculating standard deviation
4099 double MothurOut::getStandardDeviation(vector<int>& featureVector){
4103 for (int i = 0; i < featureVector.size(); i++) { average += featureVector[i]; }
4104 average /= (double) featureVector.size();
4106 //find standard deviation
4108 for (int i = 0; i < featureVector.size(); i++) { //compute the difference of each dist from the mean, and square the result of each
4109 stdDev += ((featureVector[i] - average) * (featureVector[i] - average));
4112 stdDev /= (double) featureVector.size();
4113 stdDev = sqrt(stdDev);
4117 catch(exception& e) {
4118 errorOut(e, "MothurOut", "getStandardDeviation");
4122 /**************************************************************************************************/
4123 // returns largest value in vector
4124 double MothurOut::max(vector<double>& featureVector){
4126 if (featureVector.size() == 0) { mothurOut("[ERROR]: vector size = 0!\n"); control_pressed=true; return 0.0; }
4129 double largest = featureVector[0];
4130 for (int i = 1; i < featureVector.size(); i++) {
4131 if (featureVector[i] > largest) { largest = featureVector[i]; }
4136 catch(exception& e) {
4137 errorOut(e, "MothurOut", "max");
4141 /**************************************************************************************************/
4142 // returns smallest value in vector
4143 double MothurOut::min(vector<double>& featureVector){
4145 if (featureVector.size() == 0) { mothurOut("[ERROR]: vector size = 0!\n"); control_pressed=true; return 0.0; }
4148 double smallest = featureVector[0];
4149 for (int i = 1; i < featureVector.size(); i++) {
4150 if (featureVector[i] < smallest) { smallest = featureVector[i]; }
4155 catch(exception& e) {
4156 errorOut(e, "MothurOut", "min");
4161 /**************************************************************************************************/