X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=lily%2Fsource-file.cc;h=14fdf2beb3b6ef4c10ff835c60deeca2a5811157;hb=90e4d7057f3857da049dfda3d130017d4719bd6b;hp=c90483812486203b91bc1d1a1c6f6857ab3b7b47;hpb=933ea175663dc544f1357dc087a653d8a4e4a7bd;p=lilypond.git diff --git a/lily/source-file.cc b/lily/source-file.cc index c904838124..14fdf2beb3 100644 --- a/lily/source-file.cc +++ b/lily/source-file.cc @@ -1,22 +1,32 @@ /* - source-file.cc -- implement Source_file + This file is part of LilyPond, the GNU music typesetter. - source file of the GNU LilyPond music typesetter + Copyright (C) 1997--2015 Jan Nieuwenhuizen + Han-Wen Nienhuys - (c) 1997--2005 Jan Nieuwenhuizen - Han-Wen Nienhuys + LilyPond is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + LilyPond is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with LilyPond. If not, see . */ +#if GCC_MAJOR < 4 +#define _GLIBCXX_HAVE_MBSTATE_T +#include +#endif /* GCC_MAJOR < 4 */ + #include "source-file.hh" #include "config.hh" -#if HAVE_UTF8_WCHAR_H -#include /* wcrtomb */ -#else -#include /* wcrtomb */ -#endif - #include #if HAVE_SSTREAM @@ -25,129 +35,176 @@ #include #define istringstream(x) istrstream (x, length ()) #endif +using namespace std; -#include "warn.hh" #include "file-name-map.hh" +#include "international.hh" +#include "misc.hh" +#include "warn.hh" void Source_file::load_stdin () { - length_ = 0; - + characters_.clear (); int c; - Array chs; // ugh. while ((c = fgetc (stdin)) != EOF) - chs.push (c); - - chs.push (0); - length_ = chs.size (); - contents_str0_ = chs.remove_array (); + characters_.push_back ((char)c); } -char * -gulp_file (String filename, int *filesize) +/* + return contents of FILENAME. *Not 0-terminated!* + */ +vector +gulp_file (const string &filename, int desired_size) { /* "b" must ensure to open literally, avoiding text (CR/LF) conversions. */ - FILE *f = fopen (filename.to_str0 (), "rb"); + FILE *f = fopen (filename.c_str (), "rb"); if (!f) { - warning (_f ("can't open file: `%s'", filename.to_str0 ())); - return 0; + warning (_f ("cannot open file: `%s'", filename.c_str ())); + + vector cxx_arr; + return cxx_arr; } fseek (f, 0, SEEK_END); - *filesize = ftell (f); + int real_size = ftell (f); + int read_count = real_size; + + if (desired_size > 0) + read_count = min (read_count, desired_size); + rewind (f); - char *str = new char[*filesize + 1]; - str[*filesize] = 0; + char *str = new char[read_count + 1]; + str[read_count] = 0; - int bytes_read = fread (str, sizeof (char), *filesize, f); - if (bytes_read != *filesize) + int bytes_read = fread (str, sizeof (char), read_count, f); + if (bytes_read != read_count) warning (_f ("expected to read %d characters, got %d", bytes_read, - *filesize)); + read_count)); fclose (f); + int filesize = bytes_read; - return str; + vector cxx_arr; + cxx_arr.resize (filesize); + + copy (str, str + filesize, cxx_arr.begin ()); + + delete[] str; + return cxx_arr; } -Source_file::Source_file (String filename, String data) +void +Source_file::init () { - name_ = filename; istream_ = 0; - contents_str0_ = data.get_copy_str0 (); - length_ = data.length (); - pos_str0_ = to_str0 (); + line_offset_ = 0; + str_port_ = SCM_EOL; + smobify_self (); +} + +Source_file::Source_file (const string &filename, const string &data) +{ + init (); + + name_ = filename; + + characters_.resize (data.length ()); + copy (data.begin (), data.end (), characters_.begin ()); + + characters_.push_back (0); + init_port (); - for (int i = 0; i < length_; i++) - if (contents_str0_[i] == '\n') - newline_locations_.push (contents_str0_ + i); + for (vsize i = 0; i < characters_.size (); i++) + if (characters_[i] == '\n') + newline_locations_.push_back (&characters_[0] + i); } -Source_file::Source_file (String filename_string) +Source_file::Source_file (const string &filename_string) { + init (); + name_ = filename_string; - istream_ = 0; - contents_str0_ = 0; if (filename_string == "-") load_stdin (); else - contents_str0_ = gulp_file (filename_string, &length_); + { + characters_ = gulp_file (filename_string, -1); + } - pos_str0_ = to_str0 (); + characters_.push_back (0); init_port (); - for (int i = 0; i < length_; i++) - if (contents_str0_[i] == '\n') - newline_locations_.push (contents_str0_ + i); + for (vsize i = 0; i < characters_.size (); i++) + if (characters_[i] == '\n') + newline_locations_.push_back (&characters_[0] + i); } void Source_file::init_port () { - SCM str = scm_makfrom0str (contents_str0_); + // This is somewhat icky: the string will in general be in utf8, but + // we do our own utf8 encoding and verification in the parser, so we + // use the no-conversion equivalent of latin1 + SCM str = scm_from_latin1_string (c_str ()); str_port_ = scm_mkstrport (SCM_INUM0, str, SCM_OPN | SCM_RDNG, __FUNCTION__); - scm_set_port_filename_x (str_port_, scm_makfrom0str (name_.get_str0 ())); -} - -int -Source_file::tell () const -{ - return pos_str0_ - contents_str0_; + scm_set_port_filename_x (str_port_, ly_string2scm (name_)); } -std::istream* +istream * Source_file::get_istream () { if (!istream_) { if (length ()) // can-t this be done without such a hack? - istream_ = new std::istringstream (to_str0 ()); + istream_ = new istringstream (c_str ()); else - { - istream_ = new std::istringstream (""); - istream_->setstate (std::ios::eofbit); - // istream_->set (ios::eofbit); - } + { + istream_ = new istringstream (""); + istream_->setstate (ios::eofbit); + // istream_->set (ios::eofbit); + } } return istream_; } -String +string Source_file::file_line_column_string (char const *context_str0) const { - if (!to_str0 ()) + if (!c_str ()) return " (" + _ ("position unknown") + ")"; else - return name_string () + ":" + to_string (get_line (context_str0)) - + ":" + to_string (get_column (context_str0)); + { + int l, ch, col, offset; + get_counts (context_str0, &l, &ch, &col, &offset); + + return name_string () + ":" + ::to_string (l) + + ":" + ::to_string (col + 1); + } +} + +string +Source_file::quote_input (char const *pos_str0) const +{ + if (!contains (pos_str0)) + return " (" + _ ("position unknown") + ")"; + + int l, ch, col, offset; + get_counts (pos_str0, &l, &ch, &col, &offset); + string line = line_string (pos_str0); + string context = line.substr (0, offset) + + ::to_string ('\n') + + ::to_string (' ', col) + + line.substr (offset, line.length () - offset); + return context; } -String +string Source_file::name_string () const { return map_file_name (name_); @@ -156,8 +213,6 @@ Source_file::name_string () const Source_file::~Source_file () { delete istream_; - istream_ = 0; - delete[] contents_str0_; } Slice @@ -166,7 +221,7 @@ Source_file::line_slice (char const *pos_str0) const if (!contains (pos_str0)) return Slice (0, 0); - char const *data_str0 = to_str0 (); + char const *data_str0 = c_str (); char const *eof_C_ = data_str0 + length (); if (pos_str0 == eof_C_) @@ -175,191 +230,153 @@ Source_file::line_slice (char const *pos_str0) const while (begin_str0 > data_str0) if (*--begin_str0 == '\n') { - begin_str0++; - break; + begin_str0++; + break; } - char const* end_str0 = pos_str0; + char const *end_str0 = pos_str0; while (end_str0 < eof_C_) if (*end_str0++ == '\n') { - end_str0--; - break; + end_str0--; + break; } return Slice (begin_str0 - data_str0, end_str0 - data_str0); } -String -Source_file::line_string (char const* pos_str0) const +string +Source_file::line_string (char const *pos_str0) const { if (!contains (pos_str0)) return ""; Slice line = line_slice (pos_str0); - char const *data_str0 = to_str0 (); - return String ((Byte const *)data_str0 + line[LEFT], line.length ()); + char const *data_str0 = c_str (); + return string (data_str0 + line[LEFT], line.length ()); } -int -Source_file::get_char_of_line (char const *pos_str0) const +void +Source_file::get_counts (char const *pos_str0, + int *line_number, + int *line_char, + int *column, + int *byte_offset) const { - if (!contains (pos_str0)) - return 0; + // Initialize arguments to defaults, needed if pos_str0 is not in source + *line_number = 0; + *line_char = 0; + *column = 0; + *byte_offset = 0; - char const *data_str0 = to_str0 (); - return pos_str0 - (line_slice (pos_str0)[SMALLER] + data_str0); -} - -int -Source_file::get_column (char const *pos_str0) const -{ if (!contains (pos_str0)) - return 0; + return; + + *line_number = get_line (pos_str0); Slice line = line_slice (pos_str0); - char const *data = to_str0 (); - Byte const *line_start = (Byte const *)data + line[LEFT]; + char const *data = c_str (); + char const *line_start = (char const *)data + line[LEFT]; - int left = (Byte const*) pos_str0 - line_start; - String line_begin (line_start, left); - char const *line_chars = line_begin.to_str0(); - - int column = 0; - mbstate_t state; + ssize left = (char const *) pos_str0 - line_start; + *byte_offset = left; - /* Initialize the state. */ - memset (&state, '\0', sizeof (state)); + string line_begin (line_start, left); + char const *line_chars = line_begin.c_str (); - while (left > 0) + for (; left > 0; --left, ++line_chars) { - wchar_t multibyte[2]; - size_t thislen = mbrtowc (multibyte, line_chars, left, &state); - - /* Stop converting at invalid character; - this can mean we have read just the first part - of a valid character. */ - if (thislen == (size_t) -1) - break; - /* We want to handle embedded NUL bytes - but the return value is 0. Correct this. */ - if (thislen == 0) - thislen = 1; - - if (thislen == 1 && line_chars[0] == '\t') - column = (column / 8 + 1) * 8; + // Skip UTF-8 continuation bytes. This is simplistic but + // robust, and we warn against non-UTF-8 input in the lexer + // already. In the case of non-UTF-8 or of this function being + // called in mid-character, the results are somewhat arbitrary, + // but there is no really sane definition anyway. + if ((*line_chars & 0xc0) == 0x80) + continue; + + if (*line_chars == '\t') + (*column) = (*column / 8 + 1) * 8; else - column ++; - - /* Advance past this character. */ - line_chars += thislen; - left -= thislen; - } - - return column; -} + (*column)++; -String -Source_file::error_string (char const* pos_str0) const -{ - if (!contains (pos_str0)) - return " (" + _ ("position unknown") + ")"; - - int ch_i = get_char_of_line (pos_str0); - String line = line_string (pos_str0); - String context = line.left_string (ch_i) - + to_string ('\n') - + to_string (' ', get_column (pos_str0)) - + line.cut_string (ch_i, INT_MAX); - - return context; + (*line_char)++; + } } bool -Source_file::contains (char const* pos_str0) const +Source_file::contains (char const *pos_str0) const { - return (pos_str0 && (pos_str0 >= to_str0 ()) && (pos_str0 <= to_str0 () + length ())); + return (pos_str0 && (pos_str0 >= c_str ()) && (pos_str0 <= c_str () + length ())); } int -Source_file::get_line (char const* pos_str0) const +Source_file::get_line (char const *pos_str0) const { if (!contains (pos_str0)) return 0; if (!newline_locations_.size ()) - return 1; - - int lo = 0; - int hi = newline_locations_.size (); - - if (newline_locations_[lo] > pos_str0) - return 1; - - if (newline_locations_[hi-1] < pos_str0) - return hi; - - binary_search_bounds (newline_locations_, - pos_str0, - Link_array::default_compare, - &lo, &hi); - - if (*pos_str0 == '\n') - lo--; - return lo + 2; + return 1 + line_offset_; + + /* this will find the '\n' character at the end of our line */ + vsize lo = lower_bound (newline_locations_, + pos_str0, + less ()); + + /* the return value will be indexed from 1 */ + return lo + 1 + line_offset_; +} + +void +Source_file::set_line (char const *pos_str0, int line) +{ + if (pos_str0) + { + int current_line = get_line (pos_str0); + line_offset_ += line - current_line; + + assert (line == get_line (pos_str0)); + } + else + line_offset_ = line; } int Source_file::length () const { - return length_; + return characters_.size (); } char const * -Source_file::to_str0 () const +Source_file::c_str () const { - return contents_str0_; + return &characters_[0]; } -void -Source_file::set_pos (char const * pos_str0) +SCM +Source_file::get_port () const { - if (contains (pos_str0)) - pos_str0_ = pos_str0; - else - error (error_string (pos_str0) + "invalid pos"); + return str_port_; } -char const * -Source_file::seek_str0 (int n) -{ - char const *new_str0 = to_str0 () + n; - if (n < 0) - new_str0 += length (); - if (contains (new_str0)) - pos_str0_ = new_str0; - else - error (error_string (new_str0) + "seek past eof"); +/****************************************************************/ - return pos_str0_; -} -char const * -Source_file::forward_str0 (int n) -{ - char const *old_pos = pos_str0_; - char const *new_str0 = pos_str0_ + n; - if (contains (new_str0)) - pos_str0_ = new_str0; - else - error (error_string (new_str0) + "forward past eof"); +const char * const Source_file::type_p_name_ = "ly:source-file?"; - return old_pos; +SCM +Source_file::mark_smob () const +{ + return str_port_; } -String -Source_file::get_string (int n) +int +Source_file::print_smob (SCM port, scm_print_state *) const { - String str = String ((Byte const *)forward_str0 (n), n); - return str; + scm_puts ("#", port); + return 1; }