lily/source-file.cc

   1 /*
   2   This file is part of LilyPond, the GNU music typesetter.
   3
   4   Copyright (C) 1997--2015 Jan Nieuwenhuizen <janneke@gnu.org>
   5   Han-Wen Nienhuys <hanwen@xs4all.nl>
   6
   7   LilyPond is free software: you can redistribute it and/or modify
   8   it under the terms of the GNU General Public License as published by
   9   the Free Software Foundation, either version 3 of the License, or
  10   (at your option) any later version.
  11
  12   LilyPond is distributed in the hope that it will be useful,
  13   but WITHOUT ANY WARRANTY; without even the implied warranty of
  14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15   GNU General Public License for more details.
  16
  17   You should have received a copy of the GNU General Public License
  18   along with LilyPond.  If not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21 #if GCC_MAJOR < 4
  22 #define _GLIBCXX_HAVE_MBSTATE_T
  23 #include <wchar.h>
  24 #endif /* GCC_MAJOR < 4 */
  25
  26 #include "source-file.hh"
  27
  28 #include "config.hh"
  29
  30 #include <cstdio>
  31
  32 #if HAVE_SSTREAM
  33 #include <sstream>
  34 #else
  35 #include <strstream>
  36 #define istringstream(x) istrstream (x, length ())
  37 #endif
  38 using namespace std;
  39
  40 #include "file-name-map.hh"
  41 #include "international.hh"
  42 #include "misc.hh"
  43 #include "warn.hh"
  44
  45 ADD_SMOB_INIT (Source_file);
  46
  47 void
  48 Source_file::load_stdin ()
  49 {
  50   characters_.clear ();
  51   int c;
  52   while ((c = fgetc (stdin)) != EOF)
  53     characters_.push_back ((char)c);
  54 }
  55
  56 /*
  57   return contents of FILENAME. *Not 0-terminated!*
  58  */
  59 vector<char>
  60 gulp_file (const string &filename, int desired_size)
  61 {
  62   /* "b" must ensure to open literally, avoiding text (CR/LF)
  63      conversions.  */
  64   FILE *f = fopen (filename.c_str (), "rb");
  65   if (!f)
  66     {
  67       warning (_f ("cannot open file: `%s'", filename.c_str ()));
  68
  69       vector<char> cxx_arr;
  70       return cxx_arr;
  71     }
  72
  73   fseek (f, 0, SEEK_END);
  74   int real_size = ftell (f);
  75   int read_count = real_size;
  76
  77   if (desired_size > 0)
  78     read_count = min (read_count, desired_size);
  79
  80   rewind (f);
  81
  82   char *str = new char[read_count + 1];
  83   str[read_count] = 0;
  84
  85   int bytes_read = fread (str, sizeof (char), read_count, f);
  86   if (bytes_read != read_count)
  87     warning (_f ("expected to read %d characters, got %d", bytes_read,
  88                  read_count));
  89   fclose (f);
  90   int filesize = bytes_read;
  91
  92   vector<char> cxx_arr;
  93   cxx_arr.resize (filesize);
  94
  95   copy (str, str + filesize, cxx_arr.begin ());
  96
  97   delete[] str;
  98   return cxx_arr;
  99 }
 100
 101 void
 102 Source_file::init ()
 103 {
 104   istream_ = 0;
 105   line_offset_ = 0;
 106   str_port_ = SCM_EOL;
 107   smobify_self ();
 108 }
 109
 110 Source_file::Source_file (const string &filename, const string &data)
 111 {
 112   init ();
 113
 114   name_ = filename;
 115
 116   characters_.resize (data.length ());
 117   copy (data.begin (), data.end (), characters_.begin ());
 118
 119   characters_.push_back (0);
 120
 121   init_port ();
 122
 123   for (vsize i = 0; i < characters_.size (); i++)
 124     if (characters_[i] == '\n')
 125       newline_locations_.push_back (&characters_[0] + i);
 126 }
 127
 128 Source_file::Source_file (const string &filename_string)
 129 {
 130   init ();
 131
 132   name_ = filename_string;
 133
 134   if (filename_string == "-")
 135     load_stdin ();
 136   else
 137     {
 138       characters_ = gulp_file (filename_string, -1);
 139     }
 140
 141   characters_.push_back (0);
 142
 143   init_port ();
 144
 145   for (vsize i = 0; i < characters_.size (); i++)
 146     if (characters_[i] == '\n')
 147       newline_locations_.push_back (&characters_[0] + i);
 148 }
 149
 150 void
 151 Source_file::init_port ()
 152 {
 153   // This is somewhat icky: the string will in general be in utf8, but
 154   // we do our own utf8 encoding and verification in the parser, so we
 155   // use the no-conversion equivalent of latin1
 156   SCM str = scm_from_latin1_string (c_str ());
 157   str_port_ = scm_mkstrport (SCM_INUM0, str, SCM_OPN | SCM_RDNG, __FUNCTION__);
 158   scm_set_port_filename_x (str_port_, ly_string2scm (name_));
 159 }
 160
 161 istream *
 162 Source_file::get_istream ()
 163 {
 164   if (!istream_)
 165     {
 166       if (length ()) // can-t this be done without such a hack?
 167         istream_ = new istringstream (c_str ());
 168       else
 169         {
 170           istream_ = new istringstream ("");
 171           istream_->setstate (ios::eofbit);
 172           //      istream_->set (ios::eofbit);
 173         }
 174     }
 175   return istream_;
 176 }
 177
 178 string
 179 Source_file::file_line_column_string (char const *context_str0) const
 180 {
 181   if (!c_str ())
 182     return " (" + _ ("position unknown") + ")";
 183   else
 184     {
 185       int l, ch, col, offset;
 186       get_counts (context_str0, &l, &ch, &col, &offset);
 187
 188       return name_string () + ":" + ::to_string (l)
 189              + ":" + ::to_string (col + 1);
 190     }
 191 }
 192
 193 string
 194 Source_file::quote_input (char const *pos_str0) const
 195 {
 196   if (!contains (pos_str0))
 197     return " (" + _ ("position unknown") + ")";
 198
 199   int l, ch, col, offset;
 200   get_counts (pos_str0, &l, &ch, &col, &offset);
 201   string line = line_string (pos_str0);
 202   string context = line.substr (0, offset)
 203                    + ::to_string ('\n')
 204                    + ::to_string (' ', col)
 205                    + line.substr (offset, line.length () - offset);
 206   return context;
 207 }
 208
 209 string
 210 Source_file::name_string () const
 211 {
 212   return map_file_name (name_);
 213 }
 214
 215 Source_file::~Source_file ()
 216 {
 217   delete istream_;
 218 }
 219
 220 Slice
 221 Source_file::line_slice (char const *pos_str0) const
 222 {
 223   if (!contains (pos_str0))
 224     return Slice (0, 0);
 225
 226   char const *data_str0 = c_str ();
 227   char const *eof_C_ = data_str0 + length ();
 228
 229   if (pos_str0 == eof_C_)
 230     pos_str0--;
 231   char const *begin_str0 = pos_str0;
 232   while (begin_str0 > data_str0)
 233     if (*--begin_str0 == '\n')
 234       {
 235         begin_str0++;
 236         break;
 237       }
 238
 239   char const *end_str0 = pos_str0;
 240   while (end_str0 < eof_C_)
 241     if (*end_str0++ == '\n')
 242       {
 243         end_str0--;
 244         break;
 245       }
 246
 247   return Slice (begin_str0 - data_str0, end_str0 - data_str0);
 248 }
 249
 250 string
 251 Source_file::line_string (char const *pos_str0) const
 252 {
 253   if (!contains (pos_str0))
 254     return "";
 255
 256   Slice line = line_slice (pos_str0);
 257   char const *data_str0 = c_str ();
 258   return string (data_str0 + line[LEFT], line.length ());
 259 }
 260
 261 void
 262 Source_file::get_counts (char const *pos_str0,
 263                          int *line_number,
 264                          int *line_char,
 265                          int *column,
 266                          int *byte_offset) const
 267 {
 268   // Initialize arguments to defaults, needed if pos_str0 is not in source
 269   *line_number = 0;
 270   *line_char = 0;
 271   *column = 0;
 272   *byte_offset = 0;
 273
 274   if (!contains (pos_str0))
 275     return;
 276
 277   *line_number = get_line (pos_str0);
 278
 279   Slice line = line_slice (pos_str0);
 280   char const *data = c_str ();
 281   char const *line_start = (char const *)data + line[LEFT];
 282
 283   ssize left = (char const *) pos_str0 - line_start;
 284   *byte_offset = left;
 285
 286   string line_begin (line_start, left);
 287   char const *line_chars = line_begin.c_str ();
 288
 289   for (; left > 0; --left, ++line_chars)
 290     {
 291       // Skip UTF-8 continuation bytes.  This is simplistic but
 292       // robust, and we warn against non-UTF-8 input in the lexer
 293       // already.  In the case of non-UTF-8 or of this function being
 294       // called in mid-character, the results are somewhat arbitrary,
 295       // but there is no really sane definition anyway.
 296       if ((*line_chars & 0xc0) == 0x80)
 297         continue;
 298
 299       if (*line_chars == '\t')
 300         (*column) = (*column / 8 + 1) * 8;
 301       else
 302         (*column)++;
 303
 304       (*line_char)++;
 305     }
 306 }
 307
 308 bool
 309 Source_file::contains (char const *pos_str0) const
 310 {
 311   return (pos_str0 && (pos_str0 >= c_str ()) && (pos_str0 <= c_str () + length ()));
 312 }
 313
 314 int
 315 Source_file::get_line (char const *pos_str0) const
 316 {
 317   if (!contains (pos_str0))
 318     return 0;
 319
 320   if (!newline_locations_.size ())
 321     return 1 + line_offset_;
 322
 323   /* this will find the '\n' character at the end of our line */
 324   vsize lo = lower_bound (newline_locations_,
 325                           pos_str0,
 326                           less<char const *> ());
 327
 328   /* the return value will be indexed from 1 */
 329   return lo + 1 + line_offset_;
 330 }
 331
 332 void
 333 Source_file::set_line (char const *pos_str0, int line)
 334 {
 335   if (pos_str0)
 336     {
 337       int current_line = get_line (pos_str0);
 338       line_offset_ += line - current_line;
 339
 340       assert (line == get_line (pos_str0));
 341     }
 342   else
 343     line_offset_ = line;
 344 }
 345
 346 int
 347 Source_file::length () const
 348 {
 349   return characters_.size ();
 350 }
 351
 352 char const *
 353 Source_file::c_str () const
 354 {
 355   return &characters_[0];
 356 }
 357
 358 SCM
 359 Source_file::get_port () const
 360 {
 361   return str_port_;
 362 }
 363
 364 /****************************************************************/
 365
 366
 367 const char Source_file::type_p_name_[] = "ly:source-file?";
 368
 369 SCM
 370 Source_file::mark_smob ()
 371 {
 372   return str_port_;
 373 }
 374
 375 int
 376 Source_file::print_smob (SCM port, scm_print_state *)
 377 {
 378   scm_puts ("#<Source_file ", port);
 379   scm_puts (name_.c_str (), port);
 380
 381   /* Do not print properties, that is too much hassle.  */
 382   scm_puts (" >", port);
 383   return 1;
 384 }