16 seqname = source = feature = "";
19 strand = 0; //strand is a char variable
21 gene_id = transcript_id = "";
25 bool operator<(const GTFItem& o) const {
26 if (gene_id != o.gene_id) return gene_id < o.gene_id;
27 if (transcript_id != o.transcript_id) return transcript_id < o.transcript_id;
28 return start < o.start;
31 void my_assert(char value, std::string& line, const std::string& msg) {
33 fprintf(stderr, ".gtf file might be corrupted!\n");
34 fprintf(stderr, "Stop at line : %s\n", line.c_str());
35 fprintf(stderr, "Error Message: %s\n", msg.c_str());
40 void parse(std::string line) {
41 std::istringstream strin(line);
44 getline(strin, seqname, '\t');
45 getline(strin, source, '\t');
46 getline(strin, feature, '\t');
47 getline(strin, tmp, '\t');
48 start = atoi(tmp.c_str());
49 getline(strin, tmp, '\t');
50 end = atoi(tmp.c_str());
51 getline(strin, score, '\t');
52 getline(strin, tmp, '\t');
53 my_assert((tmp.length() == 1 && (tmp[0] == '+' || tmp[0] == '-')), line, "Strand is neither '+' nor '-'!");
55 getline(strin, frame, '\t');
57 getline(strin, left); // assign attributes and possible comments into "left"
59 strin.clear(); strin.str(left);
60 bool find_gene_id = false, find_transcript_id = false;
62 while (getline(strin, tmp, ';') && (!find_gene_id || !find_transcript_id)) {
64 size_t pos = tmp.find(' ');
65 my_assert((pos != std::string::npos), line, "Cannot separate the identifier from the value for attribute " + tmp + "!");
66 std::string identifier = tmp.substr(0, pos);
68 if (identifier == "gene_id") {
69 my_assert(!find_gene_id, line, "gene_id appear more than once!");
70 tmp = cleanStr(tmp.substr(pos));
71 my_assert((tmp[0] == '"' && tmp[tmp.length() - 1] == '"'), line, "Textual attributes should be surrounded by doublequotes!");
72 gene_id = tmp.substr(1, tmp.length() - 2);
74 } else if (identifier == "transcript_id") {
75 my_assert(!find_transcript_id, line, "transcript_id appear more than once!");
76 tmp = cleanStr(tmp.substr(pos));
77 my_assert((tmp[0] == '"' && tmp[tmp.length() - 1] == '"'), line, "Textual attributes should be surrounded by doublequotes!");
78 transcript_id = tmp.substr(1, tmp.length() - 2);
79 find_transcript_id = true;
83 my_assert(find_gene_id, line, "Cannot find gene_id!");
84 my_assert(find_transcript_id, line, "Cannot find transcript_id!");
87 std::string getSeqName() { return seqname; }
88 std::string getSource() { return source; }
89 std::string getFeature() { return feature; }
90 int getStart() { return start; }
91 int getEnd() { return end; }
92 char getStrand() { return strand; }
93 std::string getScore() { return score; } // float, integer or "." ; let downstream programs parse it
94 std::string getFrame() { return frame; } // 0, 1, 2, or "."; let downstream programs parse it
95 std::string getGeneID() { return gene_id; }
96 std::string getTranscriptID() { return transcript_id; }
97 std::string getLeft() { return left; }
99 void setGeneID(const std::string& gene_id) {
100 this->gene_id = gene_id;
103 std::string toString() {
105 std::ostringstream strout;
106 strout<<seqname<<'\t'<<source<<'\t'<<feature<<'\t'<<start<<'\t'<<end<<'\t'<<score<<'\t'<<strand<<'\t'<<frame<<'\t';
107 strout<<"gene_id \""<<gene_id<<"\"; transcript_id \""<<transcript_id<<"\";"<<left;
114 std::string seqname, source, feature;
119 std::string gene_id, transcript_id;