X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=GTFItem.h;h=4e502706b88bb55767ff4f7ddf98797bdf8680c7;hb=da827678b21e94c74fd17c9b8683edb60f73e814;hp=e2403d9e6d05f3f66bf373737748b1b72317a99f;hpb=a95154919f950f86de9104b2b9dcf1f0c7e83387;p=rsem.git diff --git a/GTFItem.h b/GTFItem.h index e2403d9..4e50270 100644 --- a/GTFItem.h +++ b/GTFItem.h @@ -54,19 +54,34 @@ class GTFItem { strand = tmp[0]; getline(strin, frame, '\t'); - getline(strin, tmp, ';'); tmp = cleanStr(tmp); - my_assert((tmp.substr(0, 7) == "gene_id"), line, "Identifier should be gene_id!"); - tmp = cleanStr(tmp.substr(7)); - my_assert((tmp[0] == '"' && tmp[tmp.length() - 1] == '"'), line, "Textual attributes should be surrounded by doublequotes!"); - gene_id = tmp.substr(1, tmp.length() - 2); - - getline(strin, tmp, ';'); tmp = cleanStr(tmp); - my_assert((tmp.substr(0, 13) == "transcript_id"), line, "Identifier should be transcript_id!"); - tmp = cleanStr(tmp.substr(13)); - my_assert((tmp[0] == '"' && tmp[tmp.length() - 1] == '"'), line, "Textual attributes should be surrounded by doublequotes!"); - transcript_id = tmp.substr(1, tmp.length() - 2); - - getline(strin, left); + getline(strin, left); // assign attributes and possible comments into "left" + + strin.clear(); strin.str(left); + bool find_gene_id = false, find_transcript_id = false; + + while (getline(strin, tmp, ';') && (!find_gene_id || !find_transcript_id)) { + tmp = cleanStr(tmp); + size_t pos = tmp.find(' '); + my_assert((pos != std::string::npos), line, "Cannot separate the identifier from the value for attribute " + tmp + "!"); + std::string identifier = tmp.substr(0, pos); + + if (identifier == "gene_id") { + my_assert(!find_gene_id, line, "gene_id appear more than once!"); + tmp = cleanStr(tmp.substr(pos)); + my_assert((tmp[0] == '"' && tmp[tmp.length() - 1] == '"'), line, "Textual attributes should be surrounded by doublequotes!"); + gene_id = tmp.substr(1, tmp.length() - 2); + find_gene_id = true; + } else if (identifier == "transcript_id") { + my_assert(!find_transcript_id, line, "transcript_id appear more than once!"); + tmp = cleanStr(tmp.substr(pos)); + my_assert((tmp[0] == '"' && tmp[tmp.length() - 1] == '"'), line, "Textual attributes should be surrounded by doublequotes!"); + transcript_id = tmp.substr(1, tmp.length() - 2); + find_transcript_id = true; + } + } + + my_assert(find_gene_id, line, "Cannot find gene_id!"); + my_assert(find_transcript_id, line, "Cannot find transcript_id!"); } std::string getSeqName() { return seqname; }