-/*
- * getrepseqscommand.cpp
- * Mothur
- *
- * Created by Sarah Westcott on 5/19/09.
- * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
- *
- */
-
-#include "getrepseqscommand.h"
-
-//**********************************************************************************************************************
-GetRepSeqsCommand::GetRepSeqsCommand(){
- try {
- globaldata = GlobalData::getInstance();
- fastafile = globaldata->getFastaFile();
- namesfile = globaldata->getNameFile();
- openInputFile(fastafile, in);
-
- fasta = new FastaMap();
-
- //read in group map info.
- groupMap = new GroupMap(globaldata->getGroupFile());
- groupMap->readMap();
-
- //fill filehandles with neccessary ofstreams
- int i;
- ofstream* temp;
- //one for each group
- for (i=0; i<groupMap->getNumGroups(); i++) {
- temp = new ofstream;
- filehandles[groupMap->namesOfGroups[i]] = temp;
- }
-
- //one for shared
- temp = new ofstream;
- string s = "shared";
- filehandles[s] = temp;
-
- }
- catch(exception& e) {
- cout << "Standard Error: " << e.what() << " has occurred in the GetRepSeqsCommand class Function GetRepSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
- exit(1);
- }
- catch(...) {
- cout << "An unknown error has occurred in the GetRepSeqsCommand class function GetRepSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
-
-GetRepSeqsCommand::~GetRepSeqsCommand(){
- delete input;
- delete read;
- delete fasta;
- delete list;
-}
-
-//**********************************************************************************************************************
-
-int GetRepSeqsCommand::execute(){
- try {
- int count = 1;
- string binnames, name, sequence;
-
- //read fastafile
- fasta->readFastaFile(in);
-
- //set format to list so input can get listvector
- globaldata->setFormat("list");
-
- //if user gave a namesfile then use it
- if (namesfile != "") {
- readNamesFile();
- }
-
- //read list file
- read = new ReadOTUFile(globaldata->getListFile());
- read->read(&*globaldata);
-
- input = globaldata->ginput;
- list = globaldata->gListVector;
-
- while(list != NULL){
-
- if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){
-
- cout << list->getLabel() << '\t' << count << endl;
-
- //open output list files
- for (int i=0; i<groupMap->getNumGroups(); i++) {//opens an output file for each group
- openOutputFile(fastafile + groupMap->namesOfGroups[i] + list->getLabel() + ".fasta", *(filehandles[groupMap->namesOfGroups[i]]));
- used[groupMap->namesOfGroups[i]] = false;
- }
- string s = "shared";
- openOutputFile(fastafile + s + list->getLabel() + ".fasta", *(filehandles[s]));
- used[s] = false;
-
-
- //for each bin in the list vector
- for (int i = 0; i < list->size(); i++) {
- seq.clear();
- //uses this to determine if the bin is unique to one group or if it is shared
- map<string, string> groups;
-
- //determine if this otu is unique to one group or not
- binnames = list->get(i);
- while (binnames.find_first_of(',') != -1) {
- //parse out each name in bin
- name = binnames.substr(0,binnames.find_first_of(','));
- binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
-
- //do work for that name
- sequence = fasta->getSequence(name);
- if (sequence != "not found") {
- string group = groupMap->getGroup(name);
- if (group != "not found") { groups[group] = group; } //add group to list of groups in this bin
- else {
- cout << "error sequence " << name << " is not assigned a group in your groupfile. Please correct." << endl;
- removeFiles(list->getLabel());
- return 0;
- }
- name = ">" + name + "|" + toString(i+1);
- seq[name] = sequence;
- }else {
- cout << name << " is missing from your fasta or name file. Please correct. " << endl;
- removeFiles(list->getLabel());
- return 0;
- }
-
- }
-
- //get last name
- sequence = fasta->getSequence(binnames);
- if (sequence != "not found") {
- string group = groupMap->getGroup(binnames);
- if (group != "not found") { groups[group] = group; } //add group to list of groups in this bin
- else {
- cout << "error sequence " << binnames << " is not assigned a group in your groupfile. Please correct." << endl;
- removeFiles(list->getLabel());
- return 0;
- }
- binnames = ">" + binnames + "|" + toString(i+1); //attach bin number to name
- seq[binnames] = sequence;
- }else {
- cout << binnames << " is missing from your fasta or name file. Please correct. " << endl;
- removeFiles(list->getLabel());
- return 0;
- }
-
- //output each bin to files
- //what file does this bin need to be outputted to
- if (groups.size() == 1) { //this bin is unique to one group
- it3 = groups.begin();
- string uniqueGroup = it3->first;
- used[uniqueGroup] = true;
- //print out sequences from that bin to shared file
- for (it3 = seq.begin(); it3 != seq.end(); it3++){
- *(filehandles[uniqueGroup]) << it3->first << endl;
- *(filehandles[uniqueGroup]) << it3->second << endl;
- }
- }else {//this bin has sequences from multiple groups in it
- used[s] = true;
- //print out sequences from that bin to shared file
- for (it3 = seq.begin(); it3 != seq.end(); it3++){
- *(filehandles[s]) << it3->first << endl;
- *(filehandles[s]) << it3->second << endl;
- }
- }
- }
-
- //close ostreams and remove unused files
- for (it = filehandles.begin(); it != filehandles.end(); it++) {
- it->second->close();
- if (used[it->first] == false) { string filename = fastafile + it->first + list->getLabel() + ".fasta"; remove(filename.c_str()); }
- }
-
- }
-
- delete list;
- list = input->getListVector();
- count++;
- }
-
- return 0;
- }
- catch(exception& e) {
- cout << "Standard Error: " << e.what() << " has occurred in the GetRepSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
- exit(1);
- }
- catch(...) {
- cout << "An unknown error has occurred in the GetRepSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
-void GetRepSeqsCommand::readNamesFile() {
- try {
- vector<string> dupNames;
- openInputFile(namesfile, inNames);
-
- string name, names, sequence;
-
- while(inNames){
- inNames >> name; //read from first column A
- inNames >> names; //read from second column A,B,C,D
-
- dupNames.clear();
-
- //parse names into vector
- splitAtComma(names, dupNames);
-
- //store names in fasta map
- sequence = fasta->getSequence(name);
- for (int i = 0; i < dupNames.size(); i++) {
- fasta->push_back(dupNames[i], sequence);
- }
-
- gobble(inNames);
- }
- inNames.close();
-
- }
- catch(exception& e) {
- cout << "Standard Error: " << e.what() << " has occurred in the GetRepSeqsCommand class Function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
- exit(1);
- }
- catch(...) {
- cout << "An unknown error has occurred in the GetRepSeqsCommand class function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
- exit(1);
- }
-}
-//**********************************************************************************************************************
-void GetRepSeqsCommand::removeFiles(string label) {
- try {
- //close ostreams
- for (it = filehandles.begin(); it != filehandles.end(); it++) {
- it->second->close();
- }
-
- //remove output files because there was an error
- for (int i=0; i<groupMap->getNumGroups(); i++) {
- string outputFileName = fastafile + groupMap->namesOfGroups[i] + label + ".fasta";
- remove(outputFileName.c_str());
- }
- string outputFileName = fastafile + "shared"+ label + ".fasta";
- remove(outputFileName.c_str());
-
- }
- catch(exception& e) {
- cout << "Standard Error: " << e.what() << " has occurred in the GetRepSeqsCommand class Function removeFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
- exit(1);
- }
- catch(...) {
- cout << "An unknown error has occurred in the GetRepSeqsCommand class function removeFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
-