/*
- source-file.cc -- implement Source_file
+ This file is part of LilyPond, the GNU music typesetter.
- source file of the GNU LilyPond music typesetter
-
- (c) 1997--2007 Jan Nieuwenhuizen <janneke@gnu.org>
+ Copyright (C) 1997--2015 Jan Nieuwenhuizen <janneke@gnu.org>
Han-Wen Nienhuys <hanwen@xs4all.nl>
+
+ LilyPond is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ LilyPond is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with LilyPond. If not, see <http://www.gnu.org/licenses/>.
*/
+#if GCC_MAJOR < 4
+#define _GLIBCXX_HAVE_MBSTATE_T
+#include <wchar.h>
+#endif /* GCC_MAJOR < 4 */
+
#include "source-file.hh"
#include "config.hh"
-#if HAVE_UTF8_WCHAR_H
-#include <utf8/wchar.h> /* mbrtowc */
-#else
-#include <cwchar> /* mbrtowc */
-#endif
-
#include <cstdio>
#if HAVE_SSTREAM
#include "file-name-map.hh"
#include "international.hh"
+#include "misc.hh"
#include "warn.hh"
void
characters_.clear ();
int c;
while ((c = fgetc (stdin)) != EOF)
- characters_.push_back (c);
+ characters_.push_back ((char)c);
}
/*
- return contents of FILENAME. *Not 0-terminated!*
+ return contents of FILENAME. *Not 0-terminated!*
*/
vector<char>
-gulp_file (string filename, int desired_size)
+gulp_file (const string &filename, int desired_size)
{
/* "b" must ensure to open literally, avoiding text (CR/LF)
conversions. */
if (desired_size > 0)
read_count = min (read_count, desired_size);
-
+
rewind (f);
char *str = new char[read_count + 1];
int bytes_read = fread (str, sizeof (char), read_count, f);
if (bytes_read != read_count)
warning (_f ("expected to read %d characters, got %d", bytes_read,
- read_count));
+ read_count));
fclose (f);
int filesize = bytes_read;
cxx_arr.resize (filesize);
copy (str, str + filesize, cxx_arr.begin ());
-
+
delete[] str;
return cxx_arr;
}
istream_ = 0;
line_offset_ = 0;
str_port_ = SCM_EOL;
- self_scm_ = SCM_EOL;
smobify_self ();
}
-Source_file::Source_file (string filename, string data)
+Source_file::Source_file (const string &filename, const string &data)
{
init ();
-
+
name_ = filename;
characters_.resize (data.length ());
copy (data.begin (), data.end (), characters_.begin ());
characters_.push_back (0);
-
+
init_port ();
for (vsize i = 0; i < characters_.size (); i++)
newline_locations_.push_back (&characters_[0] + i);
}
-Source_file::Source_file (string filename_string)
+Source_file::Source_file (const string &filename_string)
{
init ();
-
+
name_ = filename_string;
if (filename_string == "-")
void
Source_file::init_port ()
{
- SCM str = scm_from_locale_string (c_str ());
+ // This is somewhat icky: the string will in general be in utf8, but
+ // we do our own utf8 encoding and verification in the parser, so we
+ // use the no-conversion equivalent of latin1
+ SCM str = scm_from_latin1_string (c_str ());
str_port_ = scm_mkstrport (SCM_INUM0, str, SCM_OPN | SCM_RDNG, __FUNCTION__);
scm_set_port_filename_x (str_port_, ly_string2scm (name_));
}
-
istream *
Source_file::get_istream ()
{
if (!istream_)
{
if (length ()) // can-t this be done without such a hack?
- istream_ = new istringstream (c_str ());
+ istream_ = new istringstream (c_str ());
else
- {
- istream_ = new istringstream ("");
- istream_->setstate (ios::eofbit);
- // istream_->set (ios::eofbit);
- }
+ {
+ istream_ = new istringstream ("");
+ istream_->setstate (ios::eofbit);
+ // istream_->set (ios::eofbit);
+ }
}
return istream_;
}
return " (" + _ ("position unknown") + ")";
else
{
- int l, ch, col;
- get_counts (context_str0, &l, &ch, &col);
+ int l, ch, col, offset;
+ get_counts (context_str0, &l, &ch, &col, &offset);
- return name_string () + ":" + to_string (l)
- + ":" + to_string (col);
+ return name_string () + ":" + ::to_string (l)
+ + ":" + ::to_string (col + 1);
}
}
if (!contains (pos_str0))
return " (" + _ ("position unknown") + ")";
- int l, ch, col;
- get_counts (pos_str0, &l, &ch, &col);
+ int l, ch, col, offset;
+ get_counts (pos_str0, &l, &ch, &col, &offset);
string line = line_string (pos_str0);
- string context = line.substr (0, ch)
- + to_string ('\n')
- + to_string (' ', col)
- + line.substr (ch, line.length()-ch);
+ string context = line.substr (0, offset)
+ + ::to_string ('\n')
+ + ::to_string (' ', col)
+ + line.substr (offset, line.length () - offset);
return context;
}
while (begin_str0 > data_str0)
if (*--begin_str0 == '\n')
{
- begin_str0++;
- break;
+ begin_str0++;
+ break;
}
char const *end_str0 = pos_str0;
while (end_str0 < eof_C_)
if (*end_str0++ == '\n')
{
- end_str0--;
- break;
+ end_str0--;
+ break;
}
return Slice (begin_str0 - data_str0, end_str0 - data_str0);
void
Source_file::get_counts (char const *pos_str0,
- int *line_number,
- int *line_char,
- int *column) const
+ int *line_number,
+ int *line_char,
+ int *column,
+ int *byte_offset) const
{
+ // Initialize arguments to defaults, needed if pos_str0 is not in source
*line_number = 0;
*line_char = 0;
*column = 0;
-
+ *byte_offset = 0;
+
if (!contains (pos_str0))
return;
char const *line_start = (char const *)data + line[LEFT];
ssize left = (char const *) pos_str0 - line_start;
+ *byte_offset = left;
+
string line_begin (line_start, left);
char const *line_chars = line_begin.c_str ();
- *column = 0;
- *line_char = 0;
-
- mbstate_t state;
-
- /* Initialize the state. */
- memset (&state, '\0', sizeof (state));
-
- while (left > 0)
+ for (; left > 0; --left, ++line_chars)
{
- /*
- FIXME, this is apparently locale dependent.
- */
-#if HAVE_MBRTOWC
- wchar_t multibyte[2];
- size_t thislen = mbrtowc (multibyte, line_chars, left, &state);
-#else
- size_t thislen = 1;
-#endif /* !HAVE_MBRTOWC */
-
- /* Stop converting at invalid character;
- this can mean we have read just the first part
- of a valid character. */
- if (thislen == (size_t) -1)
- break;
-
- /* We want to handle embedded NUL bytes
- but the return value is 0. Correct this. */
- if (thislen == 0)
- thislen = 1;
-
- if (thislen == 1 && line_chars[0] == '\t')
- (*column) = (*column / 8 + 1) * 8;
+ // Skip UTF-8 continuation bytes. This is simplistic but
+ // robust, and we warn against non-UTF-8 input in the lexer
+ // already. In the case of non-UTF-8 or of this function being
+ // called in mid-character, the results are somewhat arbitrary,
+ // but there is no really sane definition anyway.
+ if ((*line_chars & 0xc0) == 0x80)
+ continue;
+
+ if (*line_chars == '\t')
+ (*column) = (*column / 8 + 1) * 8;
else
- (*column)++;
+ (*column)++;
(*line_char)++;
- /* Advance past this character. */
- line_chars += thislen;
- left -= thislen;
}
}
return 0;
if (!newline_locations_.size ())
- return 1;
+ return 1 + line_offset_;
/* this will find the '\n' character at the end of our line */
vsize lo = lower_bound (newline_locations_,
- pos_str0,
- less<char const*> ());
+ pos_str0,
+ less<char const *> ());
/* the return value will be indexed from 1 */
return lo + 1 + line_offset_;
void
Source_file::set_line (char const *pos_str0, int line)
{
- int current_line = get_line (pos_str0);
- line_offset_ += line - current_line;
+ if (pos_str0)
+ {
+ int current_line = get_line (pos_str0);
+ line_offset_ += line - current_line;
- assert (line == get_line (pos_str0));
+ assert (line == get_line (pos_str0));
+ }
+ else
+ line_offset_ = line;
}
int
/****************************************************************/
-#include "ly-smobs.icc"
-IMPLEMENT_SMOBS(Source_file);
-IMPLEMENT_DEFAULT_EQUAL_P(Source_file);
-IMPLEMENT_TYPE_P(Source_file, "ly:source-file?");
+const char * const Source_file::type_p_name_ = "ly:source-file?";
SCM
-Source_file::mark_smob (SCM smob)
+Source_file::mark_smob () const
{
- Source_file *sc = (Source_file *) SCM_CELL_WORD_1 (smob);
-
- return sc->str_port_;
+ return str_port_;
}
-
int
-Source_file::print_smob (SCM smob, SCM port, scm_print_state *)
+Source_file::print_smob (SCM port, scm_print_state *) const
{
- Source_file *sc = (Source_file *) SCM_CELL_WORD_1 (smob);
-
scm_puts ("#<Source_file ", port);
- scm_puts (sc->name_.c_str (), port);
+ scm_puts (name_.c_str (), port);
/* Do not print properties, that is too much hassle. */
scm_puts (" >", port);
return 1;
}
-