mysql-server/sql/server_component/log_source_backtrace.cc

/* Copyright (c) 2022, 2024, Oracle and/or its affiliates.

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2.0,
as published by the Free Software Foundation.

This program is designed to work with certain software (including
but not limited to OpenSSL) that is licensed under separate terms,
as designated in a particular file or component or in included license
documentation.  The authors of MySQL hereby grant you an additional
permission to link the program and your derivative works with the
separately licensed software that they have either included with
the program or referenced in the documentation.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License, version 2.0, for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */

/**
  @file log_source_backtrace.cc

  This file contains contains code to search for a recent stackdump in
  a file formatted in the "traditional error log" format. If one is
  found, it is prepended to the buffered log-events.
*/

#include <my_time.h>                                 // MYSQL_TIME_STATUS
#include <mysql_time.h>                              // MYSQL_TIME
#include <iostream>                                  // std::cerr
#include "log_sink_buffer.h"                         // struct log_line_buffer
#include "log_sink_trad.h"                           // parse_trad_field()
#include "my_dir.h"                                  // MY_STAT
#include "mysql/components/services/log_builtins.h"  // LogEvent()
#include "mysql/components/services/log_service.h"
#include "mysql/my_loglevel.h"
#include "mysqld_error.h"  // ER_STACKTRACE
#include "sql/log.h"       // iso8601_timestamp_to_microseconds()
#include "sql/sql_list.h"  // List

/// How far back in the trad error-log should we start looking?
static const size_t max_backtrace = 32 * 1024;  // be generous, it's temporary

/// Latest ISO8601 timestamp found outside a stackdump.
static ulonglong iso8601_outside_stack = 0;
/// ISO8601 timestamp in current stackdump.
static ulonglong iso8601_during_stack = 0;

/// Pointer to first character of a textual stacktrace, or nullptr.
static char *backtrace_beg = nullptr;
/// Pointer to last character of a textual stacktrace, or nullptr.
static char *backtrace_end = nullptr;

/**
  If a stack backtrace was found in the traditional error log,
  prepend its lines to buffered logging (so the backtrace may
  be flushed to performance_schema.error_log and all configured
  log-sinks later).

  `backtrace_beg` must be set to the beginning of a char-buffer
  containing a textual stacktrace.

  `backtrace_end` must point to the last valid character in the
  textual stacktrace.

  The "textual representation" must consist of one or several
  lines terminated by '\n'. These '\n' will be replaced with '\0'.

  Each line will become one log-event.
  The list of log-events, if any, will be prepended to the list of log-events
  buffered during start-up.

  After this function returns, the caller is free to free() the buffer
  containing the log file fragment.
*/
static log_service_error log_source_backtrace_add_events() {
  // Head of the list of buffered log messages we're creating
  log_line_buffer *log_line_backtrace_head = nullptr;

  // Where to write the pointer to the newly-created tail-element of the list
  log_line_buffer **log_line_backtrace_tail = &log_line_backtrace_head;

  // List element we creating
  log_line_buffer *lle;

  char *my_line = backtrace_beg;
  log_service_error ret = LOG_SERVICE_SUCCESS;  // return value

  assert((backtrace_beg != nullptr) && (backtrace_end != nullptr));

  // Add backtrace to buffered log, each line becoming one log event.
  while (my_line < backtrace_end) {
    char *my_eol =
        (char *)memchr(my_line, '\n', (size_t)(backtrace_end - my_line + 1));

    // Fail-safe. Failing to parse is bad, but breaking start-up is worse.
    if (my_eol == nullptr) break;

    // Only add lines that aren't empty.
    if (my_eol > my_line) {
      *my_eol = '\0';

      // Allocate an element for our singly-linked list.
      if ((lle = (log_line_buffer *)my_malloc(
               key_memory_log_error_stack, sizeof(log_line_buffer), MYF(0))) ==
          nullptr) {
        ret = LOG_SERVICE_OUT_OF_MEMORY; /* purecov: inspected */
        break;                           /* purecov: inspected */
      }

      /*
        Create a log-event from the backtrace line.
        Note the use of message() instead of verbatim() in order to copy
        the message into the LogEvent's allocation.
        To use verbatim(), we'd have to manage the message's life-cycle
        ourselves instead.
      */
      log_line *ll;
      LogEvent()
          .type(LOG_TYPE_ERROR)
          .errcode(ER_STACK_BACKTRACE)
          .subsys(LOG_SUBSYSTEM_TAG)
          .prio(SYSTEM_LEVEL)      // make it unfilterable
          .message("%s", my_line)  // line from the backtrace
          .steal(&ll);             // obtain pointer to the event-data

      // Add a timestamp to the event.
      // The first event gets the timestamp we parsed from the textual
      // stacktrace (contained in `iso8601_during_stack`); subsequent
      // lines strictly increase this timestamp to guarantee row order.
      log_line_item_set(ll, LOG_ITEM_LOG_BUFFERED)->data_integer =
          iso8601_during_stack++;

      // Put event in our single-linked list list of backtrace-events.
      lle->next = nullptr;
      lle->ll = *ll;  // Shallow-copy the log-event we created above.

      // Release the event we created, but keep its allocations
      // (as they're still used by the copy).
      log_line_exit(ll);

      // Append event to list of backtrace log-events.
      *log_line_backtrace_tail = lle;  // current_tail->next = new_element
      log_line_backtrace_tail = &(lle->next);
    }

    my_line = ++my_eol;  // Next line in buffer starts after '\n'.
  }

  // Prepend our singly-linked list of stacktrace events
  // to the singly-linked list of start-up events.
  // Flushing this later will free it.
  log_sink_buffer_prepend_list(log_line_backtrace_head,
                               log_line_backtrace_tail);

  return ret;
}

/**
  Parse an ISO-8601 compliant timestamp.

  @param line_start  Pointer to the first character of the line.
  @param line_end    Pointer to the '\n' ending the line.

  @retval Number of micro-seconds since the epoch represented by the timestamp
*/
ulonglong log_iso8601_from_line(const char *line_start, const char *line_end) {
  ulonglong iso8601_time = 0;
  const char *end;
  char timestamp[iso8601_size];  // space for timestamp + '\0'
  ssize_t len;

  if ((len = parse_trad_field(line_start, &end, line_end)) > 0) {
    if (len < iso8601_size) {
      memcpy(timestamp, line_start, len);
      timestamp[len] = '\0';  // terminate target buffer

      // The parse-function corrects for timezone.
      iso8601_time = iso8601_timestamp_to_microseconds(timestamp, len);
    }
  }

  return iso8601_time;
}

/**
  Parse a single line in the traditional error-log.

  This function may be called to examine a buffer containing
  part of an error log line by line. If the header of a stack
  backtrace is recognized, certain variables are set up; if
  the end of a backtrace is recognized, those variables are
  cleared again. In other words, if the all the loaded log-lines
  have been examined and the variables are non-zero, the last
  chunk in the error log was a backtrace, and we have saved

  - `iso8601_during_stack` is non-zero and contains the
    time/date (micro-seconds since the epoch, UTC) of the backtrace.

  - `backtrace_beg` is non-null and points to the first character
    of the backtrace.

  - `backtrace_end` is non-null and points to the last character
    in the backtrace.

  `backtrace_beg` and `backtrace_end` if set point to addresses
  in the temporary buffer that contains the error log fragment.

  This function will attempt to determine whether the last item
  in the traditional error log is a backtrace and if so, identify
  the beginning and end of that backtrace in the buffer. It does
  _not_ copy the information, add it to performance_schema.error_log,
  and so forth; this is something the caller can see to if desired.

  @param line_start   Pointer to the first character of the line.
  @param line_length  Length of the line in bytes.

  @retval LOG_SERVICE_MISC_ERROR  log was written to after server start
*/
static log_service_error log_source_backtrace_parse_line(char *line_start,
                                                         size_t line_length) {
  const char *line_end = line_start + line_length;
  bool is_iso8601 = false;

  if (*line_end != '\n')
    return LOG_SERVICE_PARSE_ERROR;  // lines are '\n' separated

  /*
    ISO8601 sanity check ("2022-04-26T23:45:06[Z.]")
    If it's a stacktrace, there will be no micro-seconds ('Z');
    if it's a normal trad-log line, micro-seconds will follow ('.').

    Note that before WL#14955, stacktrace timestamps had no date part.
    These old stacktraces will be disregarded here. This is intentional.
  */
  if ((line_length >= 20) &&  // must start with timestamp
      (line_start[0] == '2') && (line_start[4] == '-') &&
      (line_start[7] == '-') && (line_start[10] == 'T') &&
      (line_start[13] == ':') && (line_start[16] == ':') &&
      ((line_start[19] == '.') || (line_start[19] == 'Z')))
    is_iso8601 = true;

  // First, let's see whether it's the beginning of a stacktrace.
  // e.g. "08:00:18 UTC - mysqld got signal 7 ;"
  // Stacktraces are always in UTC (i.e. 'Z': no timezone offset).
  if (is_iso8601 && (line_length >= 27) &&
      (0 == strncmp(line_start + 19, "Z UTC - ", 8))) {  // It's always UTC.
    // Get timestamp. There will be no micro-second part.
    iso8601_during_stack = log_iso8601_from_line(line_start, line_end);

    // If it's before the last full timestamp we've seen, adjust micro-seconds.
    if (iso8601_during_stack <= iso8601_outside_stack) {
      /*
        This can happen if both timestamps are in the same second
        since the trad-log timestamp has a micro-second part, and
        the backtrace-timestamp doesn't.
        In this case, we just advance the backtrace timestamp to
        the next "free" microsecond, i.e. the micro-second after
        the last full timestamp we've seen.
      */
      iso8601_during_stack = iso8601_outside_stack + 1;
    }

    // We save the beginning of the line as the beginning of the stacktrace.
    backtrace_beg = line_start;
    backtrace_end = line_start + line_length;
  }

  // See whether it's a trad log line. (If so, remember the timestamp.)
  // The trad log's timestamp will have microseconds ('.').
  // Its timezone could be anything.
  else if (is_iso8601 && (line_start[19] == '.')) {
    // ISO8601: 2022-02-21T03:30:34.561771 (timezone could be anything)

    /*
      If we find a trad-line after a backtrace, throw the backtrace away.
      The assumption is that either

      a) the server crashed, wrote the backtrace, and ended --
         in which case, the backtrace is the very last thing in the trad log
         (and we'll process it):

         ...
         2022-03-16T16:17:28.299339Z 0 [System] [MY-010116] [Server] trad stuff
         12:34:56 UTC - mysqld got signal 1 ;   // backtrace header
         // log ends

         or

      b) there is a backtrace, but it's not the last section in the trad log --
         in which case, the server has run between the failure we found and the
         current start-up. In this case, we assume that the stackdump has
         already been processed in that previous run. In this case, there
         should be a processed (correctly formatted) copy of the stackdump
         immediately following the raw version. Thus, we'll skip the raw
         version, since we'll be reading the processed one later, anyway.

         ...
         2022-03-16T06:17:28.299339Z 0 [System] [MY-010116] [Server] trad stuff
         12:34:56 UTC - mysqld got signal 1 ;   // backtrace header
         // log ends

         // server restarted, processed above backtrace, and logged it:
         2022-03-16T12:34:56.000001Z 0 [System] [MY-010116] [trace] 12:34:56 UTC
         2022-03-16T12:35:00.299339Z 0 [System] [MY-010116] [Server] trad stuff
         // Later, we shutdown cleanly, and the log ends, now containing both
         // the raw stackdump followed by the properly formatted one.

         // Server is restarted a second time. The raw backtrace may be found,
         // but is ignored as it is not the last section in the log. The
         // formatted version of the stackdump behind it however is parsed
         // and read into performance_schema.error_log -- if the traditional
         // log is configured to be the main error log. If a different log
         // (e.g. JSON) is configured to be the main error log, the formatted
         // stackdump will be read thence instead (i.e. different log, same
         // principle).
    */

    // Get timestamp from line.
    iso8601_outside_stack = log_iso8601_from_line(line_start, line_end);

    if (iso8601_outside_stack > 0) {  // If we got a timestamp from the line ...
      // If read timestamp is later than server_start, stop reading.
      // This should not happen, but we need a failsafe.
      if (iso8601_outside_stack > log_builtins_started()) {
        return LOG_SERVICE_MISC_ERROR;
      }

      // If we already processed a backtrace, ignore it, it's not current.
      backtrace_beg = backtrace_end = nullptr;
      iso8601_during_stack = 0;

      return LOG_SERVICE_NOTHING_DONE;  // success, of a sort
    }
  }  // if trad-line ...

  // It's not a trad-line, and we've already detected a backtrace header.
  // Expand the backtrace-buffer to include this line.
  if (iso8601_during_stack > 0) {
    backtrace_end = line_start + line_length;
    return LOG_SERVICE_SUCCESS;
  }

  /*
    If we get here, we haven't seen a backtrace-header yet (after which
    we relax the rules and accept all kinds of lines), but we didn't get
    a correct ISO8601 timestamp, either.

    This is slightly odd, but can happen in situations such as:

    a) A 3rd party library writes debug info to stdout/stderr.

    b) The last item in the log was a stacktrace written by an
       older, pre-WL#14955 server that does not include the date
       in stacktrace-timestamps. We intentionally disregard such
       stacktraces.

    c) The server stopped as the result of a failed assert(),
       the output of which we just encountered.

    d) mysql-test-run.pl includes "\nCURRENT_TEST: " line.
  */

  // We ignore empty lines (for mysql-test-run.pl etc.).
  if ((line_end - line_start) == 0) return LOG_SERVICE_NOTHING_DONE;

  // Unrecognized line in traditional error log file.
  return LOG_SERVICE_PARSE_ERROR;
}

/**
  Read tail end of the traditional error-log as a single chunk.
  Look for a recent stacktrace in that data.

  @param  log_file   The file's name
  @param  size       length of the input file (in bytes)

  @retval LOG_SERVICE_SUCCESS            success
  @retval LOG_SERVICE_OPEN_FAILED        failed to open file
  @retval LOG_SERVICE_SEEK_FAILED        seek failed
  @retval LOG_SERVICE_UNABLE_TO_READ     read failed or file empty
  @retval LOG_SERVICE_OUT_OF_MEMORY      out of memory
  @retval LOG_SERVICE_PARSE_ERROR        could not find delimiter ('\n')
  @retval LOG_SERVICE_MISC_ERROR         parsing: sanity test failed
*/
static log_service_error log_error_read_backtrace_loop(const char *log_file,
                                                       size_t size) {
  FILE *fh;                     // file-handle
  off_t pos;                    // where in the file to start reading
  char *chunk;                  // beginning of allocation
  char *line_start, *line_end;  // beginning/end of line
  log_service_error ret = LOG_SERVICE_SUCCESS;  // return value

  // Reset backtrace window.
  backtrace_beg = backtrace_end = nullptr;

  // Is there any data to read?
  if (size <= 0) return LOG_SERVICE_UNABLE_TO_READ;

  if ((fh = my_fopen(log_file, O_RDONLY, MYF(0))) == nullptr)
    return LOG_SERVICE_OPEN_FAILED;

  // Allocate memory to read the tail of the log into.
  if ((chunk = (char *)my_malloc(key_memory_log_sink_pfs, max_backtrace,
                                 MYF(0))) == nullptr) {
    ret = LOG_SERVICE_OUT_OF_MEMORY; /* purecov: inspected */
    goto fail_with_close;
  }

  /*
    If the file would fit into the buffer entirely, we'll read it from
    the beginning. Otherwise, we start reading from a point in the file
    where about the size of the buffer remains as input.
  */
  if (size <= max_backtrace)
    pos = 0;
  else {
    pos = size - max_backtrace;
    size = max_backtrace;  // Correct size to the part we'll actually read.

    // Seek to the approximate position of the row to start reading at.
    if (my_fseek(fh, (long)pos, SEEK_SET)) { /* purecov: begin inspected */
      ret = LOG_SERVICE_SEEK_FAILED;
      goto fail_with_free; /* purecov: end */
    }
  }

  if (my_fread(fh, (uchar *)chunk, size, MYF(0)) < size) {
    ret = LOG_SERVICE_UNABLE_TO_READ; /* purecov: begin inspected */
    goto fail_with_free;              /* purecov: end */
  }

  if (pos > 0) {
    // We're likely in the middle of a row, skip forward to the next.
    if ((line_start = (char *)memchr(chunk, '\n', size)) == nullptr) {
      ret = LOG_SERVICE_PARSE_ERROR;
      goto fail_with_free;
    }
  } else
    line_start = chunk;

  /*
    Process the data line-by-line until
    - we reach the end of the data
    - the line-delimiter '\n' is not found
    - the parse function suggests we stop (LOG_SERVICE_MISC_ERROR)
  */
  do {
    const size_t processed = (size_t)(line_start - chunk);
    const size_t rest = size - processed;

    // Find EOL ('\n'). If last line is partial, skip it.
    if ((line_end = (char *)memchr(line_start, '\n', rest)) == nullptr) break;

    // Parse the current line. We are trying to determine whether the
    // last part of the error log is a backtrace. If so, we'll prepend
    // its lines to buffered logging further below. This data will then
    // be flushed to all configured log-sinks in their respective formats
    // (e.g. in JSON for the JSON-log) as well as to
    // performance_schema.error_log.
    ret = log_source_backtrace_parse_line(line_start, line_end - line_start);

    // Proceed to the next line (i.e. past '\n').
    line_start = line_end + 1;

  } while ((ret != LOG_SERVICE_MISC_ERROR) && (line_start < (chunk + size)));

  // If we found a backtrace, prepend it to buffered logging.
  if (backtrace_beg != nullptr) ret = log_source_backtrace_add_events();

fail_with_free:
  backtrace_beg = backtrace_end = nullptr;  // Any pointers stale at this point.
  my_free(chunk);

fail_with_close:
  my_fclose(fh, MYF(0));

  return ret;
}

/**
  Read stacktrace from previous failure.

  The signal-handler attempts to write a stacktrace to stderr.
  As stderr (and stdout) are redirected to the "traditional"
  error-log, that's where we'll have to look for stacktraces,
  even if we use a different log-sink otherwise (e.g. JSON,
  syslog, etc.).

  Once we have determined whether such a log exists and is readable,
  we call @see log_error_read_backtrace_loop() to do the actual reading
  and parsing.

  It should be noted that at the point this function is normally
  called, buffered error logging will not have been flushed yet.

  @param  log_name  The log file to read (log_error_dest).

  @retval LOG_SERVICE_SUCCESS                 Success (log read and parsed)
  @retval LOG_SERVICE_UNABLE_TO_READ          Could not read/access() file
  @retval LOG_SERVICE_INVALID_ARGUMENT        Invalid log-file name
  @retval LOG_SERVICE_ARGUMENT_TOO_LONG       File-name too long
  @retval otherwise                           Return value from reader
*/
log_service_error log_error_read_backtrace(const char *log_name) {
  MY_STAT stat_log;
  log_service_error ret;

  if ((log_name == nullptr) || (log_name[0] == '\0'))
    return LOG_SERVICE_INVALID_ARGUMENT;
  if (strlen(log_name) >= FN_REFLEN)
    return LOG_SERVICE_ARGUMENT_TOO_LONG; /* purecov: inspected */

  assert(0 != strcmp(log_name, "stderr"));

  /*
    Lock the error-logger while we're restoring the error-log so nobody
    writes to the log-file while we're reading it. That way, we won't
    have to deal with half-written lines or the file-size changing.
  */

  if (my_access(log_name, R_OK) ||
      (my_stat(log_name, &stat_log, MYF(0)) == nullptr))
    ret = LOG_SERVICE_UNABLE_TO_READ; /* purecov: inspected */
  else
    ret = log_error_read_backtrace_loop(log_name, (size_t)stat_log.st_size);

  return ret;
}