1 #ifndef SEQUENCEPARSER_H
2 #define SEQUENCEPARSER_H
8 * Created by westcott on 9/9/11.
9 * Copyright 2011 Schloss Lab. All rights reserved.
15 #include "mothurout.h"
16 #include "sequence.hpp"
19 /* This class reads a fasta and group file with a namesfile as optional and parses the data by group.
21 Note: The sum of all the groups unique sequences will be larger than the original number of unique sequences.
22 This is because when we parse the name file we make a unique for each group instead of 1 unique for all
27 class SequenceParser {
31 SequenceParser(string, string); //group, fasta - file mismatches will set m->control_pressed = true
32 SequenceParser(string, string, string); //group, fasta, name - file mismatches will set m->control_pressed = true
37 vector<string> getNamesOfGroups();
38 bool isValidGroup(string); //return true if string is a valid group
39 string getGroup(string); //returns group of a specific sequence
41 int getNumSeqs(string); //returns the number of unique sequences in a specific group
42 vector<Sequence> getSeqs(string); //returns unique sequences in a specific group
43 map<string, string> getNameMap(string); //returns seqName -> namesOfRedundantSeqs separated by commas for a specific group - the name file format, but each line is parsed by group.
51 map<string, vector<Sequence> > seqs; //a vector for each group
52 map<string, map<string, string> > nameMapPerGroup; //nameMap for each group