lily/source-file.cc

   1 /*
   2   This file is part of LilyPond, the GNU music typesetter.
   3
   4   Copyright (C) 1997--2015 Jan Nieuwenhuizen <janneke@gnu.org>
   5   Han-Wen Nienhuys <hanwen@xs4all.nl>
   6
   7   LilyPond is free software: you can redistribute it and/or modify
   8   it under the terms of the GNU General Public License as published by
   9   the Free Software Foundation, either version 3 of the License, or
  10   (at your option) any later version.
  11
  12   LilyPond is distributed in the hope that it will be useful,
  13   but WITHOUT ANY WARRANTY; without even the implied warranty of
  14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15   GNU General Public License for more details.
  16
  17   You should have received a copy of the GNU General Public License
  18   along with LilyPond.  If not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21 #if GCC_MAJOR < 4
  22 #define _GLIBCXX_HAVE_MBSTATE_T
  23 #include <wchar.h>
  24 #endif /* GCC_MAJOR < 4 */
  25
  26 #include "source-file.hh"
  27
  28 #include "config.hh"
  29
  30 #include <cstdio>
  31
  32 #if HAVE_SSTREAM
  33 #include <sstream>
  34 #else
  35 #include <strstream>
  36 #define istringstream(x) istrstream (x, length ())
  37 #endif
  38 using namespace std;
  39
  40 #include "file-name-map.hh"
  41 #include "international.hh"
  42 #include "misc.hh"
  43 #include "warn.hh"
  44
  45 void
  46 Source_file::load_stdin ()
  47 {
  48   characters_.clear ();
  49   int c;
  50   while ((c = fgetc (stdin)) != EOF)
  51     characters_.push_back ((char)c);
  52 }
  53
  54 /*
  55   return contents of FILENAME. *Not 0-terminated!*
  56  */
  57 vector<char>
  58 gulp_file (const string &filename, int desired_size)
  59 {
  60   /* "b" must ensure to open literally, avoiding text (CR/LF)
  61      conversions.  */
  62   FILE *f = fopen (filename.c_str (), "rb");
  63   if (!f)
  64     {
  65       warning (_f ("cannot open file: `%s'", filename.c_str ()));
  66
  67       vector<char> cxx_arr;
  68       return cxx_arr;
  69     }
  70
  71   fseek (f, 0, SEEK_END);
  72   int real_size = ftell (f);
  73   int read_count = real_size;
  74
  75   if (desired_size > 0)
  76     read_count = min (read_count, desired_size);
  77
  78   rewind (f);
  79
  80   char *str = new char[read_count + 1];
  81   str[read_count] = 0;
  82
  83   int bytes_read = fread (str, sizeof (char), read_count, f);
  84   if (bytes_read != read_count)
  85     warning (_f ("expected to read %d characters, got %d", bytes_read,
  86                  read_count));
  87   fclose (f);
  88   int filesize = bytes_read;
  89
  90   vector<char> cxx_arr;
  91   cxx_arr.resize (filesize);
  92
  93   copy (str, str + filesize, cxx_arr.begin ());
  94
  95   delete[] str;
  96   return cxx_arr;
  97 }
  98
  99 void
 100 Source_file::init ()
 101 {
 102   istream_ = 0;
 103   line_offset_ = 0;
 104   str_port_ = SCM_EOL;
 105   smobify_self ();
 106 }
 107
 108 Source_file::Source_file (const string &filename, const string &data)
 109 {
 110   init ();
 111
 112   name_ = filename;
 113
 114   characters_.resize (data.length ());
 115   copy (data.begin (), data.end (), characters_.begin ());
 116
 117   characters_.push_back (0);
 118
 119   init_port ();
 120
 121   for (vsize i = 0; i < characters_.size (); i++)
 122     if (characters_[i] == '\n')
 123       newline_locations_.push_back (&characters_[0] + i);
 124 }
 125
 126 Source_file::Source_file (const string &filename_string)
 127 {
 128   init ();
 129
 130   name_ = filename_string;
 131
 132   if (filename_string == "-")
 133     load_stdin ();
 134   else
 135     {
 136       characters_ = gulp_file (filename_string, -1);
 137     }
 138
 139   characters_.push_back (0);
 140
 141   init_port ();
 142
 143   for (vsize i = 0; i < characters_.size (); i++)
 144     if (characters_[i] == '\n')
 145       newline_locations_.push_back (&characters_[0] + i);
 146 }
 147
 148 void
 149 Source_file::init_port ()
 150 {
 151   // This is somewhat icky: the string will in general be in utf8, but
 152   // we do our own utf8 encoding and verification in the parser, so we
 153   // use the no-conversion equivalent of latin1
 154   SCM str = scm_from_latin1_string (c_str ());
 155   str_port_ = scm_mkstrport (SCM_INUM0, str, SCM_OPN | SCM_RDNG, __FUNCTION__);
 156   scm_set_port_filename_x (str_port_, ly_string2scm (name_));
 157 }
 158
 159 istream *
 160 Source_file::get_istream ()
 161 {
 162   if (!istream_)
 163     {
 164       if (length ()) // can-t this be done without such a hack?
 165         istream_ = new istringstream (c_str ());
 166       else
 167         {
 168           istream_ = new istringstream ("");
 169           istream_->setstate (ios::eofbit);
 170           //      istream_->set (ios::eofbit);
 171         }
 172     }
 173   return istream_;
 174 }
 175
 176 string
 177 Source_file::file_line_column_string (char const *context_str0) const
 178 {
 179   if (!c_str ())
 180     return " (" + _ ("position unknown") + ")";
 181   else
 182     {
 183       int l, ch, col, offset;
 184       get_counts (context_str0, &l, &ch, &col, &offset);
 185
 186       return name_string () + ":" + ::to_string (l)
 187              + ":" + ::to_string (col + 1);
 188     }
 189 }
 190
 191 string
 192 Source_file::quote_input (char const *pos_str0) const
 193 {
 194   if (!contains (pos_str0))
 195     return " (" + _ ("position unknown") + ")";
 196
 197   int l, ch, col, offset;
 198   get_counts (pos_str0, &l, &ch, &col, &offset);
 199   string line = line_string (pos_str0);
 200   string context = line.substr (0, offset)
 201                    + ::to_string ('\n')
 202                    + ::to_string (' ', col)
 203                    + line.substr (offset, line.length () - offset);
 204   return context;
 205 }
 206
 207 string
 208 Source_file::name_string () const
 209 {
 210   return map_file_name (name_);
 211 }
 212
 213 Source_file::~Source_file ()
 214 {
 215   delete istream_;
 216 }
 217
 218 Slice
 219 Source_file::line_slice (char const *pos_str0) const
 220 {
 221   if (!contains (pos_str0))
 222     return Slice (0, 0);
 223
 224   char const *data_str0 = c_str ();
 225   char const *eof_C_ = data_str0 + length ();
 226
 227   if (pos_str0 == eof_C_)
 228     pos_str0--;
 229   char const *begin_str0 = pos_str0;
 230   while (begin_str0 > data_str0)
 231     if (*--begin_str0 == '\n')
 232       {
 233         begin_str0++;
 234         break;
 235       }
 236
 237   char const *end_str0 = pos_str0;
 238   while (end_str0 < eof_C_)
 239     if (*end_str0++ == '\n')
 240       {
 241         end_str0--;
 242         break;
 243       }
 244
 245   return Slice (begin_str0 - data_str0, end_str0 - data_str0);
 246 }
 247
 248 string
 249 Source_file::line_string (char const *pos_str0) const
 250 {
 251   if (!contains (pos_str0))
 252     return "";
 253
 254   Slice line = line_slice (pos_str0);
 255   char const *data_str0 = c_str ();
 256   return string (data_str0 + line[LEFT], line.length ());
 257 }
 258
 259 void
 260 Source_file::get_counts (char const *pos_str0,
 261                          int *line_number,
 262                          int *line_char,
 263                          int *column,
 264                          int *byte_offset) const
 265 {
 266   // Initialize arguments to defaults, needed if pos_str0 is not in source
 267   *line_number = 0;
 268   *line_char = 0;
 269   *column = 0;
 270   *byte_offset = 0;
 271
 272   if (!contains (pos_str0))
 273     return;
 274
 275   *line_number = get_line (pos_str0);
 276
 277   Slice line = line_slice (pos_str0);
 278   char const *data = c_str ();
 279   char const *line_start = (char const *)data + line[LEFT];
 280
 281   ssize left = (char const *) pos_str0 - line_start;
 282   *byte_offset = left;
 283
 284   string line_begin (line_start, left);
 285   char const *line_chars = line_begin.c_str ();
 286
 287   for (; left > 0; --left, ++line_chars)
 288     {
 289       // Skip UTF-8 continuation bytes.  This is simplistic but
 290       // robust, and we warn against non-UTF-8 input in the lexer
 291       // already.  In the case of non-UTF-8 or of this function being
 292       // called in mid-character, the results are somewhat arbitrary,
 293       // but there is no really sane definition anyway.
 294       if ((*line_chars & 0xc0) == 0x80)
 295         continue;
 296
 297       if (*line_chars == '\t')
 298         (*column) = (*column / 8 + 1) * 8;
 299       else
 300         (*column)++;
 301
 302       (*line_char)++;
 303     }
 304 }
 305
 306 bool
 307 Source_file::contains (char const *pos_str0) const
 308 {
 309   return (pos_str0 && (pos_str0 >= c_str ()) && (pos_str0 <= c_str () + length ()));
 310 }
 311
 312 int
 313 Source_file::get_line (char const *pos_str0) const
 314 {
 315   if (!contains (pos_str0))
 316     return 0;
 317
 318   if (!newline_locations_.size ())
 319     return 1 + line_offset_;
 320
 321   /* this will find the '\n' character at the end of our line */
 322   vsize lo = lower_bound (newline_locations_,
 323                           pos_str0,
 324                           less<char const *> ());
 325
 326   /* the return value will be indexed from 1 */
 327   return lo + 1 + line_offset_;
 328 }
 329
 330 void
 331 Source_file::set_line (char const *pos_str0, int line)
 332 {
 333   if (pos_str0)
 334     {
 335       int current_line = get_line (pos_str0);
 336       line_offset_ += line - current_line;
 337
 338       assert (line == get_line (pos_str0));
 339     }
 340   else
 341     line_offset_ = line;
 342 }
 343
 344 int
 345 Source_file::length () const
 346 {
 347   return characters_.size ();
 348 }
 349
 350 char const *
 351 Source_file::c_str () const
 352 {
 353   return &characters_[0];
 354 }
 355
 356 SCM
 357 Source_file::get_port () const
 358 {
 359   return str_port_;
 360 }
 361
 362 /****************************************************************/
 363
 364
 365 const char * const Source_file::type_p_name_ = "ly:source-file?";
 366
 367 SCM
 368 Source_file::mark_smob () const
 369 {
 370   return str_port_;
 371 }
 372
 373 int
 374 Source_file::print_smob (SCM port, scm_print_state *) const
 375 {
 376   scm_puts ("#<Source_file ", port);
 377   scm_puts (name_.c_str (), port);
 378
 379   /* Do not print properties, that is too much hassle.  */
 380   scm_puts (" >", port);
 381   return 1;
 382 }