X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=lily%2Fsource-file.cc;h=14fdf2beb3b6ef4c10ff835c60deeca2a5811157;hb=97a0169312a260933246ab224e4f8b0969871dd5;hp=90bc5fcabde4a01ecfd8d0e6a55e09d8aa1621f5;hpb=00e3e15364b9d3c94cda1bcab9f889bb95f6832d;p=lilypond.git diff --git a/lily/source-file.cc b/lily/source-file.cc index 90bc5fcabd..14fdf2beb3 100644 --- a/lily/source-file.cc +++ b/lily/source-file.cc @@ -1,22 +1,32 @@ /* - source-file.cc -- implement Source_file + This file is part of LilyPond, the GNU music typesetter. - source file of the GNU LilyPond music typesetter - - (c) 1997--2007 Jan Nieuwenhuizen + Copyright (C) 1997--2015 Jan Nieuwenhuizen Han-Wen Nienhuys + + LilyPond is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + LilyPond is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with LilyPond. If not, see . */ +#if GCC_MAJOR < 4 +#define _GLIBCXX_HAVE_MBSTATE_T +#include +#endif /* GCC_MAJOR < 4 */ + #include "source-file.hh" #include "config.hh" -#if HAVE_UTF8_WCHAR_H -#include /* mbrtowc */ -#else -#include /* mbrtowc */ -#endif - #include #if HAVE_SSTREAM @@ -29,6 +39,7 @@ using namespace std; #include "file-name-map.hh" #include "international.hh" +#include "misc.hh" #include "warn.hh" void @@ -37,14 +48,14 @@ Source_file::load_stdin () characters_.clear (); int c; while ((c = fgetc (stdin)) != EOF) - characters_.push_back (c); + characters_.push_back ((char)c); } /* - return contents of FILENAME. *Not 0-terminated!* + return contents of FILENAME. *Not 0-terminated!* */ vector -gulp_file (string filename, int desired_size) +gulp_file (const string &filename, int desired_size) { /* "b" must ensure to open literally, avoiding text (CR/LF) conversions. */ @@ -63,7 +74,7 @@ gulp_file (string filename, int desired_size) if (desired_size > 0) read_count = min (read_count, desired_size); - + rewind (f); char *str = new char[read_count + 1]; @@ -72,7 +83,7 @@ gulp_file (string filename, int desired_size) int bytes_read = fread (str, sizeof (char), read_count, f); if (bytes_read != read_count) warning (_f ("expected to read %d characters, got %d", bytes_read, - read_count)); + read_count)); fclose (f); int filesize = bytes_read; @@ -80,7 +91,7 @@ gulp_file (string filename, int desired_size) cxx_arr.resize (filesize); copy (str, str + filesize, cxx_arr.begin ()); - + delete[] str; return cxx_arr; } @@ -91,21 +102,20 @@ Source_file::init () istream_ = 0; line_offset_ = 0; str_port_ = SCM_EOL; - self_scm_ = SCM_EOL; smobify_self (); } -Source_file::Source_file (string filename, string data) +Source_file::Source_file (const string &filename, const string &data) { init (); - + name_ = filename; characters_.resize (data.length ()); copy (data.begin (), data.end (), characters_.begin ()); characters_.push_back (0); - + init_port (); for (vsize i = 0; i < characters_.size (); i++) @@ -113,10 +123,10 @@ Source_file::Source_file (string filename, string data) newline_locations_.push_back (&characters_[0] + i); } -Source_file::Source_file (string filename_string) +Source_file::Source_file (const string &filename_string) { init (); - + name_ = filename_string; if (filename_string == "-") @@ -138,25 +148,27 @@ Source_file::Source_file (string filename_string) void Source_file::init_port () { - SCM str = scm_from_locale_string (c_str ()); + // This is somewhat icky: the string will in general be in utf8, but + // we do our own utf8 encoding and verification in the parser, so we + // use the no-conversion equivalent of latin1 + SCM str = scm_from_latin1_string (c_str ()); str_port_ = scm_mkstrport (SCM_INUM0, str, SCM_OPN | SCM_RDNG, __FUNCTION__); scm_set_port_filename_x (str_port_, ly_string2scm (name_)); } - istream * Source_file::get_istream () { if (!istream_) { if (length ()) // can-t this be done without such a hack? - istream_ = new istringstream (c_str ()); + istream_ = new istringstream (c_str ()); else - { - istream_ = new istringstream (""); - istream_->setstate (ios::eofbit); - // istream_->set (ios::eofbit); - } + { + istream_ = new istringstream (""); + istream_->setstate (ios::eofbit); + // istream_->set (ios::eofbit); + } } return istream_; } @@ -168,11 +180,11 @@ Source_file::file_line_column_string (char const *context_str0) const return " (" + _ ("position unknown") + ")"; else { - int l, ch, col; - get_counts (context_str0, &l, &ch, &col); + int l, ch, col, offset; + get_counts (context_str0, &l, &ch, &col, &offset); - return name_string () + ":" + to_string (l) - + ":" + to_string (col); + return name_string () + ":" + ::to_string (l) + + ":" + ::to_string (col + 1); } } @@ -182,13 +194,13 @@ Source_file::quote_input (char const *pos_str0) const if (!contains (pos_str0)) return " (" + _ ("position unknown") + ")"; - int l, ch, col; - get_counts (pos_str0, &l, &ch, &col); + int l, ch, col, offset; + get_counts (pos_str0, &l, &ch, &col, &offset); string line = line_string (pos_str0); - string context = line.substr (0, ch) - + to_string ('\n') - + to_string (' ', col) - + line.substr (ch, line.length ()-ch); + string context = line.substr (0, offset) + + ::to_string ('\n') + + ::to_string (' ', col) + + line.substr (offset, line.length () - offset); return context; } @@ -218,16 +230,16 @@ Source_file::line_slice (char const *pos_str0) const while (begin_str0 > data_str0) if (*--begin_str0 == '\n') { - begin_str0++; - break; + begin_str0++; + break; } char const *end_str0 = pos_str0; while (end_str0 < eof_C_) if (*end_str0++ == '\n') { - end_str0--; - break; + end_str0--; + break; } return Slice (begin_str0 - data_str0, end_str0 - data_str0); @@ -246,14 +258,17 @@ Source_file::line_string (char const *pos_str0) const void Source_file::get_counts (char const *pos_str0, - int *line_number, - int *line_char, - int *column) const + int *line_number, + int *line_char, + int *column, + int *byte_offset) const { + // Initialize arguments to defaults, needed if pos_str0 is not in source *line_number = 0; *line_char = 0; *column = 0; - + *byte_offset = 0; + if (!contains (pos_str0)) return; @@ -264,49 +279,27 @@ Source_file::get_counts (char const *pos_str0, char const *line_start = (char const *)data + line[LEFT]; ssize left = (char const *) pos_str0 - line_start; + *byte_offset = left; + string line_begin (line_start, left); char const *line_chars = line_begin.c_str (); - *column = 0; - *line_char = 0; - - mbstate_t state; - - /* Initialize the state. */ - memset (&state, '\0', sizeof (state)); - - while (left > 0) + for (; left > 0; --left, ++line_chars) { - /* - FIXME, this is apparently locale dependent. - */ -#if HAVE_MBRTOWC - wchar_t multibyte[2]; - size_t thislen = mbrtowc (multibyte, line_chars, left, &state); -#else - size_t thislen = 1; -#endif /* !HAVE_MBRTOWC */ - - /* Stop converting at invalid character; - this can mean we have read just the first part - of a valid character. */ - if (thislen == (size_t) -1) - break; - - /* We want to handle embedded NUL bytes - but the return value is 0. Correct this. */ - if (thislen == 0) - thislen = 1; - - if (thislen == 1 && line_chars[0] == '\t') - (*column) = (*column / 8 + 1) * 8; + // Skip UTF-8 continuation bytes. This is simplistic but + // robust, and we warn against non-UTF-8 input in the lexer + // already. In the case of non-UTF-8 or of this function being + // called in mid-character, the results are somewhat arbitrary, + // but there is no really sane definition anyway. + if ((*line_chars & 0xc0) == 0x80) + continue; + + if (*line_chars == '\t') + (*column) = (*column / 8 + 1) * 8; else - (*column)++; + (*column)++; (*line_char)++; - /* Advance past this character. */ - line_chars += thislen; - left -= thislen; } } @@ -323,12 +316,12 @@ Source_file::get_line (char const *pos_str0) const return 0; if (!newline_locations_.size ()) - return 1; + return 1 + line_offset_; /* this will find the '\n' character at the end of our line */ vsize lo = lower_bound (newline_locations_, - pos_str0, - less ()); + pos_str0, + less ()); /* the return value will be indexed from 1 */ return lo + 1 + line_offset_; @@ -337,10 +330,15 @@ Source_file::get_line (char const *pos_str0) const void Source_file::set_line (char const *pos_str0, int line) { - int current_line = get_line (pos_str0); - line_offset_ += line - current_line; + if (pos_str0) + { + int current_line = get_line (pos_str0); + line_offset_ += line - current_line; - assert (line == get_line (pos_str0)); + assert (line == get_line (pos_str0)); + } + else + line_offset_ = line; } int @@ -363,31 +361,22 @@ Source_file::get_port () const /****************************************************************/ -#include "ly-smobs.icc" -IMPLEMENT_SMOBS (Source_file); -IMPLEMENT_DEFAULT_EQUAL_P (Source_file); -IMPLEMENT_TYPE_P (Source_file, "ly:source-file?"); +const char * const Source_file::type_p_name_ = "ly:source-file?"; SCM -Source_file::mark_smob (SCM smob) +Source_file::mark_smob () const { - Source_file *sc = (Source_file *) SCM_CELL_WORD_1 (smob); - - return sc->str_port_; + return str_port_; } - int -Source_file::print_smob (SCM smob, SCM port, scm_print_state *) +Source_file::print_smob (SCM port, scm_print_state *) const { - Source_file *sc = (Source_file *) SCM_CELL_WORD_1 (smob); - scm_puts ("#name_.c_str (), port); + scm_puts (name_.c_str (), port); /* Do not print properties, that is too much hassle. */ scm_puts (" >", port); return 1; } -