# ==============================================================================
# extensions.nm
# Author: Tony Balinski
#
# This file contains the following functions:
#   (Character classes)
#       ctype_charstr
#       ctype_change_charset
#       tochar
#       toascii
#       posinstring
#       charatpos
#       isinstring
#       tolowercase
#       touppercase
#       islowercase islower isuppercase isupper isalpha isasciialpha
#       isdigit isxdigit isalnum isspace ispunct isgraph isprint iscntrl isascii
#       isblank isword
#       togglecase
#   (Short forms for substring() and replace_in_string(..., "copy"))
#       substr
#       replace_in_str
#   (Extensions - use string or document as required)
#       subtext
#       search_text
#   (Octal/Hexadecimal numeric conversion)
#       base_to_int
#       oct_to_int
#       hex_to_int
#       hex_to_char
#       hex2_to_str
#       base64_to_chars
#       int_to_binbase
#       int_to_hex
#       int_to_HEX
#       int_to_oct
#       int_to_bin
#       char_to_hex
#       char_to_HEX
#       char_to_oct
#   (Line information)
#       start_of_line_pos
#       end_of_line_pos
#       line_of_pos
#   (Wrapping/formatting)
#       break_lines_over
#   (Regexes)
#       quote_literal_as_c
#       quote_literal_as_regex
#       quote_literal_as_subst
#       quote_literal_for_shell_esc
#       regex_to_quoted_string
#       regex_capturing_parens
#       unquote
#   (Positions, lines and columns)
#       line_col_to_pos
#       str_line_col_to_pos
#       pos_to_line
#       pos_to_column
#   (word extraction)
#       get_word_at_pos
#   (Text justification, formatting)
#       longest_line_len
#       longest_line
#       rjust_s rjust
#       ljust_s ljust
#       trim
#       chomp
#       compress
#       repeat
#       reverse_string
#   (String to number conversions)
#       number tonumber
#   (String comparison)
#       nuls nz
#       eqs nes lts les gts ges
#       eqsi nesi ltsi lesi gtsi gesi
#   (Numeric comparison)
#       eq ne lt le gt ge
#       lt_lt lt_le le_le le_lt between
#   (Bit manipulation)
#       compl
#       xor
#       lshift rshift urshift
#   (Pattern matching - short versions of search_string(), returning boolean)
#       match_as
#       matched_prefix matched_suffix matched_text matched_start matched_end
#       matched_groups matched_part matched_[1-9]
#       match_re match_rei match_ren match_reni
#       match_w match_wi
#       match_s match_si
#   (Pattern replacing - returning replace_in_string(..., "copy") result)
#       sub_re
#       sub_s
#   (Colors)
#       background_is_dark
# ==============================================================================

# ------------------------------------------------------------------------------
# Globals:
#       $EXTENSIONS_NM[]: an array holding static data used by functions in this
#       module.
#       For character classes we have the following key:
#           chars   refers to an array of two entries:
#               ascii, iso8859_1    each holding character class keys:
#                   upper, lower, alpha, digit, alnum
#                   punct, space, xdigit, graph, print
#                   word, blank, cntrl
#               each of these contains a string of all characters in the
#               character class
#       For the regular expression matching routines (see match_as(),
#       matched_prefix(), matched_suffix(), matched_text(), matched_start(),
#       matched_end(), matched_groups(), matched_part(), matched_[1-9](),
#       match_re(), match_rei(), match_ren(), match_reni, match_w(), match_wi(),
#       match_s(), match_si()) we have the keys:
#           match_as__start                 starting position of the match
#           match_as__end                   end position of the match
#           match_as__before                text prefixing the found match
#           match_as__matched               text that matched the pattern
#           match_as__after                 text following the found match
#           match_as__pattern               search pattern
#           match_as__type                  search type
#           match_as__paren_groups          number of regex grouping parentheses
#                                           (see matched_groups)
#       For background_is_dark() we have the keys:
#           background_is_dark__value       previous result of function
#           background_is_dark__rgb_text_bg result of a color lookup
# ------------------------------------------------------------------------------

$EXTENSIONS_NM[""] = 0

# ==============================================================================

if (!("chars" in $EXTENSIONS_NM))
  {
  # in chars_all, the leading " " is a dummy character for the zero index
  # (character NUL)
  chars_all = \
             " \x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" \
          "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" \
          "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" \
          "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" \
          "\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" \
          "\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" \
          "\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" \
          "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" \
          "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" \
          "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" \
          "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" \
          "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" \
          "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" \
          "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" \
          "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" \
          "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"

  ascii["upper"]  = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  ascii["lower"]  = "abcdefghijklmnopqrstuvwxyz"
  ascii["alpha"]  = ascii["upper"] ascii["lower"]
  ascii["digit"]  = "0123456789"
  ascii["alnum"]  = ascii["alpha"] ascii["digit"]
  ascii["punct"]  = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
  ascii["space"]  = "\t\n\v\f\r "
  ascii["xdigit"] = ascii["digit"] "ABCDEFabcdef"
  ascii["graph"]  = ascii["alnum"] ascii["punct"]
  ascii["print"]  = ascii["graph"] " "
  ascii["word"]   = ascii["alnum"] "_"
  ascii["blank"]  = " \t"
  ascii["cntrl"]  =     "\x01\x02\x03\x04\x05\x06\x07" \
                    "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" \
                    "\x10\x11\x12\x13\x14\x15\x16\x17" \
                    "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
  # 7-bit ascii
  ascii["ascii"]  = substring(chars_all, 0, 128)

  chars["ascii"] = ascii

  # for ßÿ we cheat: pretend upper(ß) == ß (it should be SS); upper(ÿ) == Y
  iso8859_1["upper"]  = ascii["upper"] "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßY"
  iso8859_1["lower"]  = ascii["lower"] "àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþßÿ"
  iso8859_1["alpha"]  = iso8859_1["upper"] iso8859_1["lower"]
  iso8859_1["digit"]  = "0123456789"
  iso8859_1["alnum"]  = iso8859_1["alpha"] iso8859_1["digit"]
  iso8859_1["punct"]  = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" \
                        "¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿×÷"
  iso8859_1["space"]  = "\t\n\v\f\r "
  iso8859_1["xdigit"] = iso8859_1["digit"] "ABCDEFabcdef"
  iso8859_1["graph"]  = iso8859_1["alnum"] iso8859_1["punct"]
  iso8859_1["print"]  = iso8859_1["graph"] " "
  iso8859_1["word"]   = iso8859_1["alnum"] "_"
  iso8859_1["blank"]  = " \t"
  iso8859_1["cntrl"]  =     "\x01\x02\x03\x04\x05\x06\x07" \
                        "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" \
                        "\x10\x11\x12\x13\x14\x15\x16\x17" \
                        "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
  iso8859_1["ascii"]  = ascii["ascii"]

  chars["iso8859_1"] = iso8859_1

  $EXTENSIONS_NM["chars"] = chars
  $EXTENSIONS_NM["chars_all"] = chars_all
  $EXTENSIONS_NM["charset"] = "iso8859_1" # which set to use: ascii, iso8859_1
  }

# ==============================================================================
# ctype_charstr(charclass [, charset]): return the string of characters of the
#       named class using the current character set (ascii or iso8859_1), or a
#       named set.
# ==============================================================================

define ctype_charstr
  {
  charclass = $1
  if ($n_args > 1)
    charset = $2
  else
    charset = $EXTENSIONS_NM["charset"]
  return $EXTENSIONS_NM["chars"][charset][charclass]
  }

# ==============================================================================
# ctype_change_charset(charset): change the global character set used in the
#       character class functions. If charset exists as an index to
#       chars, it is stored in $EXTENSIONS_NM["charset"] for future use. If
#       not, the operation is ignored. Return the current charset name.
# ==============================================================================

define ctype_change_charset
  {
  if ($n_args > 0)
    {
    charset = $1
    if (charset in $EXTENSIONS_NM["chars"])
      $EXTENSIONS_NM["charset"] = charset
    }
  return $EXTENSIONS_NM["charset"]
  }

# ==============================================================================
# tochar(int): returns the ascii char corresponding to the integer int, if in
#       range, as a single character string. Fails with the empty string (as for
#       int = 0).
# ==============================================================================

define tochar
  {
  i = $1
  if (i < -128 || i == 0 || i >= 256)
    return ""
  else if (i < 0)
    i += 256
  all = $EXTENSIONS_NM["chars_all"]
  return substring(all, i, i + 1)
  }

# ==============================================================================
# toascii(char): returns the ascii code corresponding to the first character in
#       char, if present, as an integer. Fails with -1.
# ==============================================================================

define toascii
  {
  if ($1 == "")
    return 0
  # skip initial character in $EXTENSIONS_NM["chars_all"]: it's a dummy
  all = $EXTENSIONS_NM["chars_all"]
  return search_string(all, substring($1, 0, 1), 1, "case")
  }

# ==============================================================================
# posinstring(sub, str): returns the position of the first occurrence of string
#       sub in string str. Fails with -1.
# ==============================================================================

define posinstring
  {
  return search_string($2, $1, 0, "case")
  }

# ==============================================================================
# charatpos(str, pos): returns the character of string sub at position pos.
#       Fails with "".
# ==============================================================================

define charatpos
  {
  if (pos < 0)
    return ""
  return substring($1, $2, $2+1)
  }

# ==============================================================================
# isinstring(ch, str): returns true if the first character of ch is in str.
# ==============================================================================

define isinstring
  {
  ch = substring($1, 0, 1)

  return (search_string($2, ch, 0, "case") != -1)
  }

# ==============================================================================
# tolowercase(string): converts to lower case, returning the converted string.
# ==============================================================================

define tolowercase
  {
  str = $1
  res = ""
  len = length(str)
  ch = ""

  uppers = ctype_charstr("upper")
  lowers = ctype_charstr("lower")
  for (i = 0; i < len; i++)
    {
    ch = substring(str, i, i+1)
    pos = search_string(uppers, ch, 0, "case")
    if (pos == -1)
      res = res ch
    else
      res = res substring(lowers, pos, pos + 1)
    }
  return res
  }

# ==============================================================================
# touppercase(string): converts to upper case, returning the converted string.
# ==============================================================================

define touppercase
  {
  str = $1
  res = ""
  len = length(str)
  ch = ""

  uppers = ctype_charstr("upper")
  lowers = ctype_charstr("lower")
  for (i = 0; i < len; i++)
    {
    ch = substring(str, i, i+1)
    pos = search_string(lowers, ch, 0, "case")
    if (pos == -1)
      res = res ch
    else
      res = res substring(uppers, pos, pos + 1)
    }
  return res
  }

# ==============================================================================
# isctype(ch, classname [, charset]): returns true if the first character of ch
#       is in the character class named classname for the character set charset.
# ==============================================================================

define isctype
  {
  if ($n_args < 3)
    return isinstring($1, ctype_charstr($2))
  else
    return isinstring($1, ctype_charstr($2, $3))
  }

# ==============================================================================
# islowercase(string), islower(string),
# isuppercase(string), isupper(string),
# isalpha(string), isasciialpha(string),
# isdigit(string), isxdigit(string),
# isalnum(string), isspace(string),
# ispunct(string), isgraph(string),
# isprint(string), iscntrl(string),
# isascii(string): return true if the first character of string is of the
#       appropriate class.
# isblank(string): return true if the first character is space or tab
# isword(string): return true if isalnum(string) or the first character is '_'
# ==============================================================================

# islowercase(string): checks the first character of string (aka islower())
define islowercase
  {
  return isctype($1, "lower")
  }
# islower(string): checks the first character of the string
define islower
  {
  return isctype($1, "lower")
  }
# isuppercase(string): checks the first character of the string (aka isupper())
define isuppercase
  {
  return isctype($1, "upper")
  }
# isupper(string): checks the first character of the string
define isupper
  {
  return isctype($1, "upper")
  }
# isalpha(string): checks the first character of the string (current encoding)
#       cf isasciialpha()
define isalpha
  {
  return isctype($1, "alpha")
  }
# isasciialpha(string): checks the first character of the string (ascii only)
define isasciialpha
  {
  return isctype($1, "alpha", "ascii")
  }
# isdigit(string): checks the first character of the string
define isdigit
  {
  return isctype($1, "digit")
  }
# isxdigit(string): checks the first character of the string (hexadecimal)
define isxdigit
  {
  return isctype($1, "xdigit")
  }
# isalnum(string): checks the first character of the string (current encoding)
define isalnum
  {
  return isctype($1, "alnum")
  }
# isspace(string): checks the first character of the string
define isspace
  {
  return isctype($1, "space")
  }
# ispunct(string): checks the first character of the string (current encoding)
define ispunct
  {
  return isctype($1, "punct")
  }
# isgraph(string): checks the first character of the string (current encoding)
define isgraph
  {
  return isctype($1, "graph")
  }
# isprint(string): checks the first character of the string (current encoding)
define isprint
  {
  return isctype($1, "print")
  }
# iscntrl(string): checks the first character of the string (current encoding)
define iscntrl
  {
  return isctype($1, "cntrl")
  }
# isascii(string): checks the first character of the string
define isascii
  {
  return toascii($1) < 128
  }
# isblank(string): return true if the first character is space or tab
define isblank
  {
  return isctype($1, "blank")
  }
# isword(string): return true if the first character is '_', an ascii letter or
#       a digit
define isword
  {
  return isctype($1, "word", "ascii")
  }

# ==============================================================================
# togglecase(string): returns the string with all uppercase characters switched
#       to lowercase and vice-versa.
# ==============================================================================

define togglecase
  {
  s = $1
  r = ""
  for (i = 0, c = substring(s, i, i+1); \
       c != ""; \
       i++, c = substring(s, i, i+1))
    {
    if (touppercase(c) != c)
      r = r touppercase(c)
    else
      r = r tolowercase(c)
    }
  return r
  }

# ==============================================================================
# substr(string, [startpos, [endpos]]): calls substring() for given string,
#       start position, end position. Position parameters are optional. If any
#       are negative, measurement is made from the end of the string.
#
# Parameters:
#       $1 - source string (default "")
#       $2 - start position (default 0)
#       $3 - end position (default length($1))
# ==============================================================================

define substr
  {
  # get parameters
  if ($n_args < 1)
    return ""

  string = $1
  startpos = 0
  if ($n_args >= 2)
    startpos = $2

  len = length(string)
  endpos = len
  if ($n_args >= 3)
    endpos = $3

  if (startpos < 0)
    startpos += len
  if (endpos < 0)
    endpos += len

  if (startpos < 0)
    startpos = 0
  if (endpos < 0)
    endpos = 0

  return substring(string, startpos, endpos)
  }

# ==============================================================================
# replace_in_str(string, search_for, replace_with, [type]): calls
#       replace_in_string() with its parameters. If replace_in_string() fails,
#       returns the original string unchanged.
#
# Parameters:
#       $1 - original string in which patterns should be matched
#       $2 - patterns to replace
#       $3 - what to replace with (default "")
#       $4 - type of matching to use (default "literal")
# ==============================================================================

define replace_in_str
  {
  # get parameters
  if ($n_args < 2)
    return ""

  string = $1
  search_for = $2

  replace_with = ""
  if ($n_args >= 3)
    replace_with = $3

  type = "literal"
  if ($n_args >= 4)
    type = $4

  return replace_in_string(string, search_for, replace_with, type, "copy")
  }

# ------------------------------------------------------------------------------
# subtext(str, left [, right] [, "file" | "document" | "string"]): returns a
#       substring of str, if str is not empty; returns a substring of the
#       document otherwise. Acts like the built-in substring() function. If str
#       might be empty, a final parameter can force the document-versus-string
#       input choice.
# ------------------------------------------------------------------------------

define subtext
  {
  str = $1
  left = $2
  right = 0
  have_right = 0
  use_file = 0

  if ($n_args > 2)
    {
    if (valid_number($3))
      {
      right = $3
      have_right = 1
      }
    }
  # use which input?
  if ($n_args > 2 + have_right)
    {
    arg = $args[2 + have_right]
    use_file = (arg == "file" || arg == "document")
    }
  else if (str == "")
    use_file = 1

  if (use_file)
    len = $text_length                  # use document text
  else
    len = length(str)

  if (!have_right)
    right = len

  if (left < 0)
    left = max(0, len + left)
  if (right < 0)
    right = max(0, len + right)

  left = min(left, len)
  right = min(right, len)

  if (left >= right)
    return ""

  if (str == "")
    return get_range(left, right)       # use document text
  else
    return substring(str, left, right)
  }

# ------------------------------------------------------------------------------
# search_text(str, ...): returns the results of a call to search() or
#       search_string() depending on whether the string str is empty, or whether
#       the keyword "file", "document" or "string" is seen as one of the
#       arguments beyond the third.
# ------------------------------------------------------------------------------

define search_text
  {
  str = $1
  pat = $2
  pos = $3
  type = ""
  direction = ""
  wrap = ""
  use_file = -1

  # get other arguments
  for (argn = 4; argn <= $n_args; argn++)
    {
    arg = $args[argn]
    if (arg == "wrap"       || \
        arg == "nowrap")
      {
      if (wrap != "") search_text_wrap_value_assigned_twice()
      wrap = arg
      }
    else
    if (arg == "forward"    || \
        arg == "backward")
      {
      if (direction != "") search_text_direction_value_assigned_twice()
      direction = arg
      }
    else
    if (arg == "literal"    || \
        arg == "case"       || \
        arg == "word"       || \
        arg == "caseWord"   || \
        arg == "regex"      || \
        arg == "regexNoCase")
      {
      if (type != "") search_text_type_value_assigned_twice()
      type = arg
      }
    else
    if (arg == "file"       || \
        arg == "document")
      {
      if (use_file != -1) search_text_use_file_value_assigned_twice()
      use_file = 1
      }
    else
    if (arg == "string")
      {
      if (use_file != -1) search_text_use_file_value_assigned_twice()
      use_file = 0
      }
    else
      search_text_unknown_keyword_argument()
    }

  # set up defaults
  if (type == "")      type = "literal"
  if (direction == "") direction = "forward"
  if (wrap == "")      wrap = "nowrap"
  if (use_file == -1)  use_file = (str == "")

  if (use_file)
    return search(pat, pos, type, direction, wrap)
  else
    return search_string(str, pat, pos, type, direction, wrap)
  }

# ==============================================================================
# base_to_int(base, str [, "strict"]): returns the value of the string as an
#       integer of base base. All (and only) valid base characters are
#       considered. Scanning stops at first invalid character, returning "" if
#       the argument "strict" is passed. For bases 2 to 36, the characters are
#       0-9, a-z (case insignificant); for 37 to 64 they are the base64
#       characters A-Z, a-z, 0-9, +, / and are case significant. (See RFC 1421,
#       RFC 2045.) Bases outside the range 2 - 64 are not allowed.
# ==============================================================================

define base_to_int
  {
  # get base
  base = $1
  if (!(2 <= base && base <= 64))
    return base_to_int_called_with_invalid_base()

  lc = "0123456789abcdefghijklmnopqrstuvwxyz"
  uc = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"

  b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"

  # get parameter
  if ($n_args < 2)
    return 0

  is_strict = 0
  if ($n_args > 2)
    is_strict = ($3 == "strict")

  if (base > 36)
    {
    uc = b64
    lc = b64
    }

  pos = -1
  val = 0
  len = length($2)
  for (i = 0; i < len; ++i)
    {
    c = substring($2, i, i+1)
    pos = search_string(lc, c, 0)
    if (pos < 0)
      pos = search_string(uc, c, 0)
    if (pos < 0 || pos >= base)
      {
      pos = -1
      break
      }

    val = (base * val) + pos
    }
  if (pos < 0 && is_strict)
    return ""

  return val
  }

# ==============================================================================
# oct_to_int(oct_str [, "strict"]): returns the value of the oct string. All
#       (and only) oct characters are considered. Scanning stops at first
#       non-oct character. If a second parameter is supplied and equals "strict"
#       all input characters must be oct digits, otherwise the function fails
#       with the string "".
# ==============================================================================

define oct_to_int
  {
  if ($n_args == 0)
    return 0
  else if ($n_args == 1)
    return base_to_int(8, $1)
  else
    return base_to_int(8, $1, $2)
  }

# ==============================================================================
# hex_to_int(hex_str [, "strict"]): returns the value of the hex string. All
#       (and only) hex characters are considered. Scanning stops at first
#       non-hex character. If a second parameter is supplied and equals "strict"
#       all input characters must be hex digits, otherwise the function fails
#       with the string "".
# ==============================================================================

define hex_to_int
  {
  if ($n_args == 0)
    return 0
  else if ($n_args == 1)
    return base_to_int(16, $1)
  else
    return base_to_int(16, $1, $2)
  }

# ==============================================================================
# hex_to_char(hex_str): returns the value of the hex string. All (and only) hex
#       characters are considered.
# ==============================================================================

define hex_to_char
  {
  i = hex_to_int($1)
  if (i <= 0 || i >= 256)
    return ""
  all = $EXTENSIONS_NM["chars_all"]
  return substring(all, i, i+1)
  }

# ==============================================================================
# hex2_to_str(hex_str): returns a string whose value is that of the
#       concatenation of characters whose hex values are listed, two digits at
#       a time. All non-digits in the input are ignored. A nul value is
#       translated to the unlikely string [[[---!NUL!---]]]
# ==============================================================================

define hex2_to_str
  {
  # remove all hex characters
  s = replace_in_string($1, "[^0-9A-Fa-f]|\n", "", "regex")

  res = ""
  i = 0
  for (cc = substring(s, i, i+2); cc != ""; i += 2, cc = substring(s, i, i+2))
    {
    if (cc == "00")
      res = res "[[[---!NUL!---]]]"
    else
      res = res hex_to_char(cc)
    }

  return res
  }

# ==============================================================================
# base64_to_chars(str [, b62, b63]): decodes string str in base64 as a sequence
#       of byte valued characters returned. The +/ values for 62 and 63 can be
#       replaced. All (and only) valid base characters are considered, except
#       that whitespace is ignored. Scanning stops at first invalid input
#       character, or the terminator. (See RFC1421, RFC 2045.) It also stops if
#       a zero character is found in the output.
# ==============================================================================

define base64_to_chars
  {
  str = $1
  b0_61 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
  b62 = "+"
  b63 = "/"
  if ($n_args >= 2)
    b62 = $2
  if ($n_args >= 3)
    b63 = $3
  if (length(b62) != 1|| !isgraph(b62)|| !isascii(b62)|| isinstring(b62, b0_61))
    return base64_to_chars_called_with_invalid_b62()
  if (length(b63) != 1|| !isgraph(b63)|| !isascii(b63)|| isinstring(b63, b0_61)\
      || b62 == b63)
    return base64_to_chars_called_with_invalid_b63()

  b64 = b0_61 b62 b63

  str = replace_in_string(str, "(?n\\s+)", "", "regex", "copy")
  inp = replace_in_string(str, "[^"b0_61"\\"b62"\\"b63"].*", "", "regex","copy")

  s06 = 64
  s12 = 64 * s06
  s18 = 64 * s12

  len = length(inp)
  res = ""

  for (i = 0; i < len; i += 4)
    {
    # eat off 4 chars at a time, to make 3 on output
    A = search_string(b64, substring(inp,   i, i+1), 0)
    B = search_string(b64, substring(inp, i+1, i+2), 0)
    C = search_string(b64, substring(inp, i+2, i+3), 0)
    D = search_string(b64, substring(inp, i+3, i+4), 0)
    if (A == -1) A = 0
    if (B == -1) B = 0
    if (C == -1) C = 0
    if (D == -1) D = 0

    i = A * s18 + B * s12 + C * s06 + D

    z = i & 255
    i /= 256
    y = i & 255
    i /= 256
    x = i & 255

    r = tochar(x) tochar(y) tochar(z)
    res = res r

    if (length(r) < 3)
      break
    }

  return res
  }

# ==============================================================================
# chars_to_base64(str [, b62, b63]): codes string str to the returned base64
#       sequence. The +/ values for 62 and 63 can be replaced. The sequence is
#       broken with a new line every 64 coding characters. (See  RFC1421,
#       RFC 2045.)
# ==============================================================================

define chars_to_base64
  {
  str = $1
  b0_61 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
  b62 = "+"
  b63 = "/"
  if ($n_args >= 2)
    b62 = $2
  if ($n_args >= 3)
    b63 = $3
  if (length(b62) != 1|| !isgraph(b62)|| !isascii(b62)|| isinstring(b62, b0_61))
    return chars_to_base64_called_with_invalid_b62()
  if (length(b63) != 1|| !isgraph(b63)|| !isascii(b63)|| isinstring(b63, b0_61)\
      || b62 == b63)
    return chars_to_base64_called_with_invalid_b63()

  b64 = b0_61 b62 b63

  str = replace_in_string(str, "(?n\\s+)", "", "regex", "copy")
  inp = replace_in_string(str, "[^"b0_61"\\"b62"\\"b63"].*", "", "regex","copy")

  s08 = 64
  s16 = 64 * s08

  len = length(inp)
  res = ""

  for (i = 0; i < len; i += 3)
    {
    # eat off 3 chars at a time, to make 4 on output
    A = toascii(substring(inp,   i, i+1))
    B = toascii(substring(inp, i+1, i+2))
    C = toascii(substring(inp, i+2, i+3))
    if (A == -1) A = 0
    if (B == -1) B = 0
    if (C == -1) C = 0

    i = A * s16 + B * s08 + C

    z = i & 63
    i /= 64
    y = i & 63
    i /= 64
    x = i & 63
    i /= 64
    w = i & 63

    r = substring(b63, w, w+1) substring(b63, x, x+1) \
        substring(b63, y, y+1) substring(b63, z, z+1)

    if (!B)
      r = substring(r, 0, 2)
    else if (!C)
      r = substring(r, 0, 3)

    res = res r
    }

  return res
  }

# ==============================================================================
# int_to_binbase(bits, digits, i, mindigits): returns the binary representation
#       of the integer i in the base of 2^bits. Values are treated as unsigned;
#       this will never have a minus sign. digits is a string containing the
#       digits to use, in order, starting with the zero digit. Used by
#       int_to_hex() and int_to_oct().
# ==============================================================================

define int_to_binbase
  {
  bits = $1
  digits = $2
  i = $3
  mindigits = $4

  carry = (i < 0)
  max_len = 1

  res = ""
  acc = 0
  accbit = 1
  accbits = 0
  while ((mindigits > 0 || i != 0) && max_len != 0)
    {
    p = (i % 2) != 0
    i -= p & carry
    acc += (p * accbit)
    accbit *= 2
    ++accbits
    if (accbits == bits)
      {
      res = substring(digits, acc, acc + 1) res
      accbits = 0
      acc = 0
      accbit = 1
      --mindigits
      }
    max_len *= 2 # shift up
    i = i / 2
    }
  if (acc > 0)
    {
    res = substring(digits, acc, acc + 1) res
    --mindigits
    }
  while (mindigits-- > 0)
    {
    res = substring(digits, 0, 1) res
    }

  return res
  }

# ==============================================================================
# int_to_hex(int, mindigits): returns the hex representation of the integer.
#       This will never have a minus sign. By default, mindigits is set to one.
#       Hex letter "digits" will be lowercase: use int_to_HEX() for uppercase.
# ==============================================================================

define int_to_hex
  {
  mindigits = 1
  int = $1
  if ($n_args > 1)
    mindigits = $2
  if (mindigits < 1)
    mindigits = 1
  return int_to_binbase(4, "0123456789abcdef", int, mindigits)
  }

# ==============================================================================
# int_to_HEX(int, mindigits): returns the hex representation of the integer.
#       This will never have a minus sign. By default, mindigits is set to one.
#       Hex letter "digits" will be uppercase: use int_to_hex() for lowercase.
# ==============================================================================

define int_to_HEX
  {
  mindigits = 1
  int = $1
  if ($n_args > 1)
    mindigits = $2
  if (mindigits < 1)
    mindigits = 1
  return int_to_binbase(4, "0123456789ABCDEF", int, mindigits)
  }

# ==============================================================================
# int_to_oct(int, mindigits): returns the octal representation of the integer.
#       This will never have a minus sign. By default, mindigits is set to one.
# ==============================================================================

define int_to_oct
  {
  mindigits = 1
  int = $1
  if ($n_args > 1)
    mindigits = $2
  if (mindigits < 1)
    mindigits = 1
  return int_to_binbase(3, "01234567", int, mindigits)
  }

# ==============================================================================
# int_to_bin(int, mindigits): returns the binary representation of the integer.
#       This will never have a minus sign. By default, mindigits is set to one.
# ==============================================================================

define int_to_bin
  {
  mindigits = 1
  i = $1
  if ($n_args > 1)
    mindigits = $2
  if (mindigits < 1)
    mindigits = 1
  carry = (i < 0)
  max_len = -1 # assume all 1s

  res = ""
  while ((mindigits > 0 || i != 0) && max_len != 0)
    {
    p = (i % 2) != 0
    res = p res
    i -= p & carry
    mindigits--
    max_len *= 2 # shift up
    i = i / 2
    }
  while (mindigits-- > 0)
    res = "0" res

  return res
  }

# ==============================================================================
# char_to_hex(chars [, prefix]): returns the hex values of each of the
#       characters passed. Each character is transformed into 2 hex digits.
#       A prefix can be added in front of each character, eg "\\x".
#       Hex letter "digits" will be lowercase: use char_to_HEX() for uppercase.
# ==============================================================================

define char_to_hex
  {
  res = ""
  prefix = ""
  if ($n_args > 1)
    prefix = $2
  all = $EXTENSIONS_NM["chars_all"]
  i = 0
  for (c = substring($1, i, i+1); c != ""; ++i, c = substring($1, i, i+1))
    {
    pos = search_string(all, c, 1, "case")
    if (pos >= 1)
      {
      res = res prefix int_to_hex(pos, 2)
      }
    }
  return res
  }

# ==============================================================================
# char_to_HEX(chars [, prefix]): returns the hex values of each of the
#       characters passed. Each character is transformed into 2 hex digits.
#       A prefix can be added in front of each character, eg "\\x".
#       Hex letter "digits" will be uppercase: use char_to_hex() for lowercase.
# ==============================================================================

define char_to_HEX
  {
  res = ""
  prefix = ""
  if ($n_args > 1)
    prefix = $2
  all = $EXTENSIONS_NM["chars_all"]
  i = 0
  for (c = substring($1, i, i+1); c != ""; ++i, c = substring($1, i, i+1))
    {
    pos = search_string(all, c, 1, "case")
    if (pos >= 1)
      {
      res = res prefix int_to_HEX(pos, 2)
      }
    }
  return res
  }

# ==============================================================================
# char_to_oct(chars [, prefix]): returns the octal values of each of the
#       characters passed. Each character is transformed into 3 octal digits.
#       A prefix can be added in front of each character, eg "\\".
# ==============================================================================

define char_to_oct
  {
  res = ""
  prefix = ""
  if ($n_args > 1)
    prefix = $2
  all = $EXTENSIONS_NM["chars_all"]
  i = 0
  for (c = substring($1, i, i+1); c != ""; ++i, c = substring($1, i, i+1))
    {
    pos = search_string(all, c, 1, "case")
    if (pos >= 1)
      {
      res = res prefix int_to_oct(pos, 3)
      }
    }
  return res
  }

# ==============================================================================
# start_of_line_pos([pos [, string]]): returns the position of the start of the
#       line containing pos (default $cursor) - ie the position following the
#       previous newline. If string is supplied, it is used instead of the
#       current document.
# ==============================================================================

define start_of_line_pos
  {
  if ($n_args < 1)
    pos = $cursor
  else
    pos = $1

  if ($n_args < 2)
    p = search("\n", pos - 1, "case", "backward")
  else
    p = search_string($2, "\n", pos - 1, "case", "backward")

  if (p < 0)
    return 0

  return $search_end
  }

# ==============================================================================
# end_of_line_pos([pos [, string]]): returns the position of the end of the line
#       containing pos (default $cursor) - ie the position of the next newline.
#       If string is supplied, it is used instead of the current document.
# ==============================================================================

define end_of_line_pos
  {
  if ($n_args < 1)
    pos = $cursor
  else
    pos = $1

  if ($n_args < 2)
    {
    e = search("\n", pos, "case")
    if (e < 0)
      return $text_length
    }
  else
    {
    e = search_string($2, "\n", pos, "case")
    if (e < 0)
      return length($2)
    }

  return e
  }

# ==============================================================================
# line_of_pos([pos [, string]]): returns the line containing the position pos
#       (default $cursor) as a string without a trailing newline. If string is
#       supplied, it is used instead of the current document.
# ==============================================================================

define line_of_pos
  {
  if ($n_args < 1)
    pos = $cursor
  else
    pos = $1

  if ($n_args < 2)
    {
    e = end_of_line_pos(pos)                        # find end of THIS line
    pe = search("\n", e - 1, "case", "backward")    # now find the previous one
    }
  else
    {
    e = end_of_line_pos(pos, $2)
    pe = search_string($2, "\n", e - 1, "case", "backward")
    }

  if (pe >= 0)
    b = $search_end
  else
    b = 0

  return get_range(b, e)
  }

# ==============================================================================
# break_lines_over(maxchars, string [, pref]): tries to break string at word
#       boundaries adding newlines followed by the string pref (if present).
#       Returns the modified string.
# ==============================================================================

define break_lines_over
  {
  maxchars = $1
  string = $2
  if ($n_args > 2)
    pref = $3
  else
    pref = ""

  res = ""
  while (length(string) > maxchars)
    {
    nlpos = search_string(string, "\n", 0)
    if (0 <= nlpos && nlpos <= maxchars)
      {
      res = res substring(string, 0, nlpos + 1)
      string = substring(string, nlpos + 1)
      }
    else
      {
      nlpos = search_string(string, ">", maxchars, "regex", "backward")
      if (!(0 <= nlpos && nlpos <= maxchars))
        {
        nlpos = maxchars
        }
      res = res substring(string, 0, nlpos) "\n"
      string = substring(string, nlpos)
      }
    }
  res = res string

  return res
  }

# ==============================================================================
# quote_literal_as_c(string): returns a string representing a valid C
#       string for the parameter passed.
# ==============================================================================

define quote_literal_as_c
  {
  string = $1
  res = ""
  len = length(string)

  for (i = 0; i < len; i++)
    {
    c = substring(string, i, i + 1)
    if (search_string("\\'\"", c, 0, "case") >= 0)
      res = res "\\" c
    else if (c == "\a") res = res "\\a"
    else if (c == "\b") res = res "\\b"
    else if (c == "\f") res = res "\\f"
    else if (c == "\n") res = res "\\n"
    else if (c == "\r") res = res "\\r"
    else if (c == "\t") res = res "\\t"
    else if (c == "\v") res = res "\\v"
    else if (!(isprint(c) && isascii(c))) res = res char_to_hex(c, "\\x")
    else                res = res c
    }

  return res
  }

# ==============================================================================
# quote_literal_as_regex(string): returns a string representing a valid regex
#       search string for the parameter passed.
# ==============================================================================

define quote_literal_as_regex
  {
  string = $1
  res = ""
  len = length(string)

  for (i = 0; i < len; i++)
    {
    c = substring(string, i, i + 1)
    if (search_string("\\|()[]{}<>.*+?^$&-", c, 0, "case") >= 0)
      res = res "\\" c
    else if (c == "\a") res = res "\\a"
    else if (c == "\b") res = res "\\b"
    else if (c == "\e") res = res "\\e"
    else if (c == "\f") res = res "\\f"
    else if (c == "\n") res = res "\\n"
    else if (c == "\r") res = res "\\r"
    else if (c == "\t") res = res "\\t"
    else if (c == "\v") res = res "\\v"
    else                res = res c
    }

  return res
  }

# ==============================================================================
# quote_literal_as_subst(string): returns a string representing a valid
#       replace substitution string for the parameter passed.
# ==============================================================================

define quote_literal_as_subst
  {
  string = $1
  res = ""
  len = length(string)

  for (i = 0; i < len; i++)
    {
    c = substring(string, i, i + 1)
    if (search_string("\\&", c, 0, "case") >= 0)
      res = res "\\" c
    else if (c == "\a") res = res "\\a"
    else if (c == "\b") res = res "\\b"
    else if (c == "\e") res = res "\\e"
    else if (c == "\f") res = res "\\f"
    else if (c == "\n") res = res "\\n"
    else if (c == "\r") res = res "\\r"
    else if (c == "\t") res = res "\\t"
    else if (c == "\v") res = res "\\v"
    else                res = res c
    }

  return res
  }

# ==============================================================================
# quote_literal_for_shell_esc(string): returns a string representing the input
#       string with backslash escapes and/or octal escapes. It quotes the
#       following with a backslash:
#           space ! " # $ & ' ( ) * ; < > ? [ \ ] ^ ` { | } ~
#       It "C-style" backslash-escapes the following control characters:
#           bell (\a), back-space (\b), form-feed (\f), new-line/line-feed (\n),
#           carriage-return (\r), tab (\t), vertical-tab (\v)
#       It "C-style" octal-escapes the other characters that fall into the
#       iscntrl(ch) group, as defined by the iscntrl() macro function.
# ==============================================================================

define quote_literal_for_shell_esc
  {
  string = $1
  res = ""
  len = length(string)

  for (i = 0; i < len; i++)
    {
    c = substring(string, i, i + 1)
    if (search_string(" !\"#$&'()*;<>?[\\]^`{|}~", c, 0, "case") >= 0)
      res = res "\\" c
    else if (c == "\a") res = res "\\a"
    else if (c == "\b") res = res "\\b"
    else if (c == "\f") res = res "\\f"
    else if (c == "\n") res = res "\\n"
    else if (c == "\r") res = res "\\r"
    else if (c == "\t") res = res "\\t"
    else if (c == "\v") res = res "\\v"
    else if (iscntrl(c)) res = res char_to_oct(c, "\\")
    else                res = res c
    }

  return res
  }

# ==============================================================================
# regex_to_quoted_string(regex): adds quotes and backslashes to convert a
#       string containing a valid regex into one usable in NEdit Macro code.
# ==============================================================================

define regex_to_quoted_string
  {
  string = $1
  res = ""
  len = length(string)

  for (i = 0; i < len; i++)
    {
    c  = substring(string, i, i + 1)
    c2 = substring(string, i, i + 2)
    if (search_string(c2, "^\\\\[abefnrtv]", 0, "regex") == 0)
      {
      res = res c2      # already a back-slash escaped control char sequence
      i++               # skip the next character
      }
    else if (search_string("\"\\", c, 0, "case") >= 0)
      res = res "\\" c                  # quote that quote or back-slash
    else if (c == "\a") res = res "\\a" # convert control char to escape seq
    else if (c == "\b") res = res "\\b"
    else if (c == "\e") res = res "\\e"
    else if (c == "\f") res = res "\\f"
    else if (c == "\n") res = res "\\n"
    else if (c == "\r") res = res "\\r"
    else if (c == "\t") res = res "\\t"
    else if (c == "\v") res = res "\\v"
    else                res = res c     # leave anything else alone
    }

  return "\"" res "\""
  }

# ==============================================================================
# regex_capturing_parens(re): counts the number of regular expression capturing
#       parentheses in a string. These are non-escaped open-parentheses not
#       followed by a question mark. It does not check for valid regexes.
# ==============================================================================

define regex_capturing_parens
  {
  re = $1 ""
  group_paren = "(?<!\\\\)\\((?!\\?)"
  i = 0
  for (pos = search_string(re, group_paren, 0, "regex"); \
       pos >= 0; \
       pos = search_string(re, group_paren, $search_end, "regex"))
    {
    ++i
    }
  return i
  }

# ==============================================================================
# unquote(str): returns the result of removing quoting backslashes from str. It
#       only understands the special control character backslash sequences and
#       backslash itself. Otherwise, backslashes are removed.
# ==============================================================================

define unquote
  {
  str = $1 ""
  group_paren = "(?<!\\\\)\\((?!\\?)"
  end = 0
  res = ""
  for (pos = search_string(str, "\\", end); \
       pos >= 0; \
       pos = search_string(str, "\\", end))
    {
    res = res substring(str, end, pos)
    end = $search_end
    next = substring(str, end, end + 1)
    ind = search_string("abefnrtv\\", 0, next, "case")
    if (ind >= 0)
      {
      end++
      res = res substring("\a\b\e\f\n\r\t\v\\", ind, ind + 1)
      }
    }
  res = res substring(str, end)
  return res
  }

# ==============================================================================
# line_col_to_pos(lineNum [, colNum [, tabSize [, string]]]): returns the
#       position of the character position indicated by lineNum and colNum,
#       assuming a particular tabSize. By default, colNum is zero and tabSize
#       equals $tab_dist. Lines are numbered from 1, columns from zero. If there
#       are not enough lines, -1 is returned; if there are not enough columns in
#       the addressed line, the position of the last character is returned; if
#       the column is "inside" a tab, return the tab's position. If string is
#       present, measurements take place within it; otherwise the current
#       document is used.
# ==============================================================================

define line_col_to_pos
  {
  lineNum = $1
  colNum = 0
  if ($n_args >= 2 && $2 >= 0)
    colNum = $2
  tabSize = $tab_dist
  if ($n_args >= 3 && $3 >= 0)
    tabSize = $3

  useWin = ($n_args <= 3)
  if (useWin)
    string = ""
  else
    string = $4

  bufferPos = 0

  if (lineNum > 1)
    {
    if (useWin)
      res = search("(?:^.*\n){" lineNum - 1 "}", 0, "regex")
    else
      res = search_string(string, "(?:^.*\n){" lineNum - 1 "}", 0, "regex")

    if (res >= 0)
      bufferPos = $search_end
    else
      bufferPos = -1
    }

  if (bufferPos >= 0 && colNum > 0)
    {
    if (useWin)
      res = search("^.*$", bufferPos, "regex")
    else
      res = search_string(string, "^.*$", bufferPos, "regex")

    if (res >= 0)
      {
      last = $search_end
      pos = bufferPos
      end = bufferPos
      col = 0
      while (pos < last)
        {
        if (useWin)
          nexttab = search("\t", pos)
        else
          nexttab = search_string(string, "\t", pos)

        if (nexttab < pos || nexttab > last)
          nexttab = last
        if (nexttab == pos)
          {
          # pos is at a tab: if the tab extends beyond colNum, return this pos
          nextCol = col + tabSize - (col % tabSize)
          if (nextCol > colNum)
            return pos
          pos++             # skip the tab
          }
        else
          {
          # pos is at a non-tab: if the non-tab sequence extends beyond colNum,
          # we have an overrun of col + nexttab - pos - colNum;
          # return end-of-non-tab-sequence - overrun
          nextCol = col + nexttab - pos
          if (nextCol >= colNum)
            return nexttab - (nextCol - colNum)
          pos = nexttab     # skip to next tab
          }
        # move column count forward
        col = nextCol
        }
      # not found during the loop
      return last
      }
    }
  return bufferPos
  }

# ==============================================================================
# str_line_col_to_pos(string, lineNum [, colNum [, tabSize]]):  returns the
#       position of the character position indicated by lineNum and colNum,
#       assuming a particular tabSize. By default, colNum is zero and tabSize
#       equals $tab_dist. Lines are numbered from 1, columns from zero. If there
#       are not enough lines, -1 is returned; if there are not enough columns in
#       the addressed line, the position of the last character is returned; if
#       the column is "inside" a tab, return the tab's position. All
#       measurements take place within string. This function calls
#       line_col_to_pos(lineNum, colNum, tabSize, string) with appropriate
#       defaults.
# ==============================================================================

define str_line_col_to_pos
  {
  string = $1
  lineNum = $2
  colNum = 0
  if ($n_args >= 3 && $3 >= 0)
    colNum = $3
  tabSize = $tab_dist
  if ($n_args >= 4 && $4 >= 0)
    tabSize = $4

  return line_col_to_pos(lineNum, colNum, tabSize, string)
  }

# ==============================================================================
# pos_to_line_forwardNlines(pos, nLines [, string]): returns the position of
#       the start of the line nLines away; fails with -1. If string is supplied
#       the search is performed in the string, otherwise in the current window.
# ==============================================================================

define pos_to_line_forwardNlines
  {
  pos = $1
  nLines = $2 + 0
  useWin = 1
  string = ""
  lim = $text_length
  if ($n_args > 2)
    {
    useWin = 0
    string = $3
    lim = length($3)
    }

  if (nLines == 0)
    return pos
  if (nLines < 0)
    return -1

  rx = "(?:.*\n){" nLines "}"

  if (useWin)
    {
    if (search(rx, pos, "regex") >= 0)
      return $search_end
    }
  else if (search_string(string, rx, pos, "regex") >= 0)
    return $search_end

  return -1
  }

# ==============================================================================
# pos_to_line(pos [, string]): returns the line number (counted from 1) of the
#       position pos. If pos is too large, this returns the last line number
#       for the document (so the minimum value is 1). If string is present,
#       measurement takes place in the string; otherwise the current document
#       is used.
# ==============================================================================

define pos_to_line
  {
  line = 1
  delta = 1
  pos = 0
  posNum = $1

  useWin = ($n_args < 2)
  if (useWin)
    {
    string = ""
    len = $text_length
    }
  else
    {
    string = $2
    len = length(string)
    }

  if (posNum >= len)
    posNum = len

  # get start of line containing position $1 in string
  if (useWin)
    npos = search("\n", posNum - 1, "backward")
  else
    npos = search_string(string, "\n", posNum - 1, "backward")

  if (npos >= 0)
    posNum = $search_end
  else
    posNum = 0                          # on the first line, no earlier "\n"

  while (pos < posNum)
    {
    if (useWin)
      npos = pos_to_line_forwardNlines(pos, delta)
    else
      npos = pos_to_line_forwardNlines(pos, delta, string)

    # dialog("pos=" pos " < posNum=" posNum "\ndelta=" delta \
    #        "\nnew pos: " npos "\nline: " line "\nnew line: " (line + delta))
    if (npos > posNum || npos == -1)
      delta /= 2
    else
      {
      line += delta
      pos = npos
      # avoid backtracking too far in pos_to_line_forwardNlines()
      if (delta < 64)
        delta *= 2
      }
    }

  return line
  }

# old in-window only version of pos_to_line:
#   define pos_to_line__
#     {
#     line = 0
#     pos = 0
#     posNum = $1
#
#     if (posNum >= $text_length)
#       posNum = $text_length
#
#     while (pos <= posNum)
#       {
#       line++
#       if (search("\n", pos) < 0)
#         break
#       else
#         pos = $search_end
#       }
#
#     return line
#     }
# alternate version using string replacement to count newlines:
#   define pos_to_line___
#     {
#     pos = $1
#     if ($n_args > 1)
#       str = substring($2, 0, pos)
#     else
#       str = get_range(0, pos)
#
#     s = replace_in_string(str, ".*", "", "regex")
#     return 1 + length(s)
#     }

# ==============================================================================
# pos_to_column(pos [, tabSize [, string]]): returns the column number (counted
#       from 0) of the position pos in its line. If pos is too large, this
#       returns the last column of the last line for the document (so the
#       minimum value is 1). If string is present, measurements are taken
#       within string; otherwise within the current document.
# ==============================================================================

define pos_to_column
  {
  posNum = $1
  tabSize = $tab_dist
  if ($n_args >= 2)
    tabSize = $2
  useWin = ($n_args <= 2)

  if (useWin)
    {
    string = ""
    text_length = $text_length
    # find previous start of line
    pos = search("^", posNum, "regex", "backward")
    }
  else
    {
    string = $3
    text_length = length(string)
    # find previous start of line
    pos = search_string(string, "^", posNum, "regex", "backward")
    }

  if (posNum > text_length)
    posNum = text_length

  col = 0

  end = pos
  while (end < posNum)
    {
    # include "$" to search to stop searching too far beyond area of interest
    # note that the end-of-line will always be at or beyond posNum
    if (useWin)
      pos = search("\t+|$", end, "regex")
    else
      pos = search_string(string, "\t+|$", end, "regex")

    # search will never fail: it will always find $ (since posNum <= length)
    if (pos > posNum)                   # we searched too far, so we're done
      return col + posNum - end
    col += pos - end                    # add normal chars from last end to tab
    end = $search_end                   # end of tab sequence
    if (end > posNum)                   # is posNum in the sequence?
      end = posNum                      # yes - so consider only to posNum
    col += tabSize * (end - pos)        # account for tab sequence
    col -= col % tabSize                # and adjust end column to tab boundary
    }

  return col
  }

# ==============================================================================
# colwidth(s [, colpos [, tab_dist]]): returns the width in columns of a string
#       s positioned at column colpos in a line (defaults to zero), and uses a
#       tab width of tab_dest (defaults to $tab_dest) - it assumes no control
#       characters in the string other than tab, and no newlines.
# ==============================================================================

define colwidth
  {
  s = $1

  if ($n_args > 1)
    colpos = $2
  else
    colpos = 0

  if ($n_args > 2)
    tab_dist = $3
  else
    tab_dist = $tab_dist

  etab = 0
  last = 0
  col = colpos
  for (tab = search_string(s, "\t+", 0, "regex"); \
       tab >= 0; \
       tab = search_string(s, "\t+", etab, "regex"))
    {
    etab = $search_end
    # add non-tab char widths
    col += tab - last
    # add variable width of first tab in tab sequence
    col += tab_dist - (col % tab_dist)
    # add width of following tabs in tab sequence
    col += (etab - tab - 1) * tab_dist
    }
  # add width of chars following last tab
  col += length(s) - etab
  # finally, remove our start column position
  return col - colpos
  }

# ==============================================================================
# get_word_at_pos([pos [, string [, rebeg, reend]]]) - return the word at
#       position pos in the string string. If string is not given, use the
#       current document's text; if pos is not given, use the current cursor
#       position in the document. If the position is not at the start, inside,
#       or at the end of a word, an empty string is returned.
#         Words are found using the regular expressions rebeg, reend, if
#       supplied; by default "<" and ">" are used. If rebeg, reend are given,
#       and string is empty, use the current document's text. For example,
#       rebeg = "(?<=\\s|^)(?=\\S)", reend = "(?<=\\S)(?=\\s|$)" will pick out
#       a non-space sequence, and rebeg = "^", reend = "$" will pick out a line.
# ==============================================================================

define get_word_at_pos
  {
  pos = $cursor
  string = ""
  use_string = 0
  rebeg = "<"
  reend = ">"
  if ($n_args >= 1)
    pos = $1 + 0
  if ($n_args >= 2)
    {
    string = $2 ""
    use_string = 1
    }
  if ($n_args >= 4)
    {
    use_string = (string != "")
    rebeg = $3 ""
    reend = $4 ""
    }

  # look to start and end of previous word; this must overlap pos
  if (use_string == 0)
    {
    beg = search(rebeg, pos, "regex", "backward")
    end = search(reend, beg, "regex")
    word = get_range(beg, end)
    }
  else
    {
    beg = search_string(string, rebeg, pos, "regex", "backward")
    end = search_string(string, reend, beg, "regex")
    word = substring(string, beg, end)
    }
  if (beg <= pos && pos <= end)
    return word
  return "" # failed
  }

# ==============================================================================
# longest_line_len(strs): returns the length of the longest line in the string
#       strs.
# ==============================================================================

define longest_line_len
  {
  strs = split($1, "\n")

  len = 0
  # measure longest line, and maintain padding string of that length
  nstr = strs[]
  for (i = 0; i < nstr; i++)
    len = max(len, length(strs[i]))

  return len
  }

# ==============================================================================
# longest_line(strs): returns the longest line in the string strs.
# ==============================================================================

define longest_line
  {
  strs = split($1, "\n")

  maxlen = 0
  str = ""
  # measure longest line, and maintain padding string of that length
  for (i in strs)
    {
    s = strs[i]
    len = length(s)
    if (len > maxlen)
      {
      str = s
      maxlen = len
      }
    }

  return str
  }

# ==============================================================================
# longest_line_base_re(strs): returns information about the longest line in the
#       string strs. The line is found using regular expressions. The result is
#       an array with elements
#           ["start"]   position of the first character of the line
#           ["end"]     position of the line's newline (or end-of-string)
#           ["line"]    the line's content
#           ["length"]  the line's length
# ==============================================================================

define longest_line_base_re
  {
  have = 0
  want = 1

  strs = $1

  found = 0
  pos = 0
  while (found == 0)
    {
    RE = "^.{" (have + want) "}"
    newpos = search_string(strs, RE, pos, "regex")
    if (newpos < 0)         # no line with that many columns
      {
      if (want > 1)
        {
        have += want / 2    # previous iteration worked though
        want = 1            # start with low want again
        }
      else
        found = 1
      }
    else
      {
      want *= 2             # last one worked; try again with more
      pos = newpos          # but only search from the last match
      }
    }

  res["start"]  = pos
  res["end"]    = pos + have
  res["line"]   = substring(strs, pos, pos + have)
  res["length"] = have

  return res
  }

# ==============================================================================
# longest_line_len_re(strs): returns the length of the longest line in the
#       string strs. The line is found using regular expressions. Uses
#       longest_line_base_re().
# ==============================================================================

define longest_line_len_re
  {
  return longest_line_base_re($1)["length"]
  }

# ==============================================================================
# longest_line_re(strs): returns the longest line in the string strs. The line
#       is found using regular expressions. Uses longest_line_base_re().
# ==============================================================================

define longest_line_re
  {
  return longest_line_base_re($1)["line"]
  }

# ==============================================================================
# rjust_s(strs [, maxstr]): returns a string holding each line of str padded
#       with spaces to the left so that it is as long as maxstr, or as long as
#       the longest line in strs.
# ==============================================================================

define rjust_s
  {
  strs = split($1, "\n")

  # set up default width and padding string
  if ($n_args > 1)
    maxstr = replace_in_string($2, "(?n.)", " ", "regex")
  else
    maxstr = ""

  width = length(maxstr)

  # measure longest line, and maintain padding string of that length
  nstr = strs[]
  for (i = 0; i < nstr; i++)
    {
    str = strs[i]
    w = length(str)
    if (width < w)
      {
      width = w
      maxstr = replace_in_string(str, ".", " ", "regex")
      }
    }

  # assemble result
  res = ""
  nl = ""
  for (i = 0; i < nstr; i++)
    {
    str = strs[i]

    w = length(str)
    res = res nl substring(maxstr str, w, width + w)
    nl = "\n"
    }

  return res
  }

# ==============================================================================
# rjust(strs [, minlen]): returns a string holding each line of str padded with
#       spaces to the left so that it is minlen long, or as long as the longest
#       line in strs. Calls rjust_s().
# ==============================================================================

define rjust
  {
  strs = split($1, "\n")

  pads = "1234567890"
  minlen = 0
  if ($n_args > 1)
    {
    if (valid_number($2))
      {
      minlen = $2
      if (minlen < 0)
        minlen = 0
      # grow pags till big enough/too big
      while (length(pads) < minlen)
        pads = pads pads
      # crop it down to the right size
      pads = substring(pads, 0, minlen)
      }
    else
      {
      minlen = length($2)
      pads = $2
      }
    }

  return rjust_s($1, pads)
  }

# ==============================================================================
# ljust_s(strs [, maxstr]): returns a string holding each line of str padded
#       with spaces to the right so that it is as long as maxstr, or as long as
#       the longest line in strs.
# ==============================================================================

define ljust_s
  {
  strs = split($1, "\n")

  # set up default width and padding string
  if ($n_args > 1)
    maxstr = replace_in_string($2, "(?n.)", " ", "regex")
  else
    maxstr = ""

  width = length(maxstr)

  # measure longest line, and maintain padding string of that length
  nstr = strs[]
  for (i = 0; i < nstr; i++)
    {
    str = strs[i]
    w = length(str)
    if (width < w)
      {
      width = w
      maxstr = replace_in_string(str, ".", " ", "regex")
      }
    }

  # assemble result
  res = ""
  nl = ""
  for (i = 0; i < nstr; i++)
    {
    str = strs[i]
    w = length(str)
    res = res nl substring(str maxstr, 0, width)
    nl = "\n"
    }

  return res
  }

# ==============================================================================
# ljust(strs [, minlen]): returns a string holding each line of str padded with
#       spaces to the ightt so that it is minlen long, or as long as the longest
#       line in strs. Calls ljust_s().
# ==============================================================================

define ljust
  {
  strs = split($1, "\n")

  pads = "12345678901234567890"
  minlen = 0
  if ($n_args > 1)
    {
    if (valid_number($2))
      {
      minlen = $2
      if (minlen < 0)
        minlen = 0
      # grow pags till big enough/too big
      while (length(pads) < minlen)
        pads = pads pads
      # crop it down to the right size
      pads = substring(pads, 0, minlen)
      }
    else
      {
      minlen = length($2)
      pads = $2
      }
    }

  return ljust_s($1, pads)
  }

# ==============================================================================
# trim(strs[, re[, nlSense]]): returns a string holding each line of strs with
#       ends matching "(?"nlSense"^(?:"re"))" and "(?"nlSense"(?:"re")$)"
#       removed. The default value of re is "\\s+", and of nlSense is "N".
# ==============================================================================

define trim
  {
  strs = $1
  re = "\\s+"
  nlSense = "N"

  if ($n_args >= 2) re = $2
  if ($n_args >= 3) nlSense = $3

  # strip the fronts of lines
  strs = replace_in_string(strs, "(?"nlSense"^(?:"re"))", "", "regex", "copy")
  # and the ends
  strs = replace_in_string(strs, "(?"nlSense"(?:"re")$)", "", "regex", "copy")

  return strs
  }

# ==============================================================================
# chomp(str[, re]): returns a string made from the content of str with the tail
#       corresponding to re removed. This is done within a "(?n(?:...)$)" group,
#       so only the end of the whole string is affected. The default value of re
#       is "\n".
# ==============================================================================

define chomp
  {
  str = $1
  re = "\n"

  if ($n_args >= 2) re = $2

  # strip the end
  str = replace_in_string(str, "(?n(?:"re")$)", "", "regex", "copy")

  return str
  }

# ==============================================================================
# compress(strs[, re[, repl[, nlSense]]]): returns a string holding each line of
#       strs with each sequence identified as "(?"nlSense"(?:"re")+)" replaced
#       with repl. The default value of re is "\\s", of repl is " ", and of
#       nlSense is "N".
# ==============================================================================

define compress
  {
  strs = $1
  re = "\\s"
  repl = " "
  nlSense = "N"

  if ($n_args >= 2) re = $2
  if ($n_args >= 3) repl = $3
  if ($n_args >= 4) nlSense = $4

  # do replacement
  strs = replace_in_string(strs, "(?"nlSense"(?:"re")+)", repl, "regex", "copy")

  return strs
  }

# ==============================================================================
# repeat(str, n): returns a string built by repeating str n times. It uses
#       string doubling to reduce concatenation operations.
# ==============================================================================

define repeat
  {
  str = $1 ""
  n = $2
  if (n < 1)
    return ""
  if (n == 1)
    return str
  res = ""
  for (;;)
    {
    if (n % 2 == 1)
      res = res str
    n /= 2
    if (n)
      str = str str
    else
      break
    }
  return res
  }

# ==============================================================================
# reverse_string(str): returns a string built by reversing the order of
#       characters in str.
# ==============================================================================

define reverse_string
  {
  str = $1 ""
  res = ""
  n = length(str)
  while (n--)
    res = res substr(str, n, n + 1)
  return res
  }

# ==============================================================================
# number(string [, "strict"|defval]): returns the numeric value read from the
#       front of the string argument. Fails if no leading number was found, with
#       defval (which defaults to zero), or, if "strict" is present, with an
#       invalid function call. [IDENTICAL TO THE to_number() FUNCTION]
# ==============================================================================

define number
  {
  s = $1

  defval = 0
  if ($n_args > 1 && $2 != "strict")
    defval = number($2)

  s = replace_in_string(s, "(?n^\\s*([-+]?\\d+).*)", "\\1", "regex")
  if (!valid_number(s))
    {
    if ($n_args > 1 && $2 == "strict")
      s = number_NoNumericPrefixFound()
    else
      s = defval
    }
  return s + 0
  }

# ==============================================================================
# to_number(string [, "strict"|defval]): returns the numeric value read from the
#       front of the string argument. Fails if no leading number was found, with
#       defval (which defaults to zero), or, if "strict" is present, with an
#       invalid function call. [IDENTICAL TO THE number() FUNCTION]
# ==============================================================================

define to_number
  {
  s = $1

  defval = 0
  if ($n_args > 1 && $2 != "strict")
    defval = to_number($2)

  s = replace_in_string(s, "(?n^\\s*([-+]?\\d+).*)", "\\1", "regex")
  if (!valid_number(s))
    {
    if ($n_args > 1 && $2 == "strict")
      s = number_NoNumericPrefixFound()
    else
      s = defval
    }
  return s + 0
  }

# ==============================================================================
# String comparison functions
# ==============================================================================

# nuls(a): returns true if string a == ""
define nuls
  {
  return !length($1)
  }
# nz(a): returns true if string a != "" (in fact this returns length(a))
define nz
  {
  return length($1)
  }

# ------------------------------------------------------------------------------

# eqs(a, b): returns true if scalars a and b compare as equal strings
define eqs
  {
  return string_compare($1, $2) == 0
  }
# nes(a, b): returns true if scalars a and b compare as non-equal strings
define nes
  {
  return string_compare($1, $2) != 0
  }
# lts(a, b): returns true if scalars a and b string-compare as a < b
define lts
  {
  return string_compare($1, $2) < 0
  }
# les(a, b): returns true if scalars a and b string-compare as a <= b
define les
  {
  return string_compare($1, $2) <= 0
  }
# gts(a, b): returns true if scalars a and b string-compare as a > b
define gts
  {
  return string_compare($1, $2) > 0
  }
# ges(a, b): returns true if scalars a and b string-compare as a >= b
define ges
  {
  return string_compare($1, $2) >= 0
  }

# ------------------------------------------------------------------------------

# eqsi(a, b): true if strings a and b compare as a == b (case insignificant)
define eqsi
  {
  return string_compare($1, $2, "nocase") == 0
  }
# nesi(a, b): true if strings a and b compare as a != b (case insignificant)
define nesi
  {
  return string_compare($1, $2, "nocase") != 0
  }
# ltsi(a, b): true if strings a and b compare as a < b (case insignificant)
define ltsi
  {
  return string_compare($1, $2, "nocase") < 0
  }
# lesi(a, b): true if strings a and b compare as a <= b (case insignificant)
define lesi
  {
  return string_compare($1, $2, "nocase") <= 0
  }
# gtsi(a, b): true if strings a and b compare as a > b (case insignificant)
define gtsi
  {
  return string_compare($1, $2, "nocase") > 0
  }
# gesi(a, b): true if scalars a and b compare as a >= b (case insignificant)
define gesi
  {
  return string_compare($1, $2, "nocase") >= 0
  }

# ==============================================================================
# Numeric string comparison functions
# ==============================================================================

# eq(a, b): true if strings a and b compare as a == b (numerically)
define eq
  {
  return to_number($1) == to_number($2)
  }
# ne(a, b): true if strings a and b compare as a != b (numerically)
define ne
  {
  return to_number($1) != to_number($2)
  }
# lt(a, b): true if strings a and b compare as a < b (numerically)
define lt
  {
  return to_number($1) < to_number($2)
  }
# le(a, b): true if strings a and b compare as a <= b (numerically)
define le
  {
  return to_number($1) <= to_number($2)
  }
# gt(a, b): true if strings a and b compare as a > b (numerically)
define gt
  {
  return to_number($1) > to_number($2)
  }
# ge(a, b): true if scalars a and b compare as a >= b (numerically)
define ge
  {
  return to_number($1) >= to_number($2)
  }

# ------------------------------------------------------------------------------
# versions of "between"
# ------------------------------------------------------------------------------

# lt_lt(a, b, c): true if a < b < c
define lt_lt
  {
  return lt($1, $2) && lt($2, $3)
  }
# lt_le(a, b, c): true if a < b <= c
define lt_le
  {
  return lt($1, $2) && le($2, $3)
  }
# le_le(a, b, c): true if a <= b <= c
define le_le
  {
  return le($1, $2) && le($2, $3)
  }
# le_lt(a, b, c): true if a <= b < c
define le_lt
  {
  return le($1, $2) && lt($2, $3)
  }
# between(a, b, c): true if a <= b <= c or c <= b <= a
define between
  {
  return le_le($1, $2, $3) || le_le($3, $2, $1)
  }

# ------------------------------------------------------------------------------
# bit manipulation
# ------------------------------------------------------------------------------

# compl(a): returns the result of the bitwise complement of a
define compl
  {
  return -$1 - 1
  }

# xor(a, b): returns the result of bitwise a xor b
define xor
  {
  a = $1
  b = $2
  return (a | b) & compl(a & b)
  }

# lshift(val, bits): return (val << bits); if bits < 0, rshift() is used
define lshift
  {
  val = $1
  bits = $2
  if (bits < 0)
    return rshift(val, -bits)
  while (bits-- > 0)
    val *= 2
  return val
  }

# rshift(val, bits): return (val >> bits) with sign extension; if bits < 0,
#       lshift() is used
define rshift
  {
  val = $1
  bits = $2
  if (bits < 0)
    return lshift(val, -bits)
  while (bits-- > 0)
    val /= 2
  return val
  }

# urshift(val, bits): return (val >> bits) with zero extension; if bits < 0,
#       lshift() is used
define urshift
  {
  val = $1
  bits = $2
  if (bits < 0)
    return lshift(val, -bits)
  if (bits-- > 0)
    {
    if (val < 0)
      {
      topbit = 1            # calculate top bit
      while (topbit > 0)
        topbit *= 2
      val /= 2
      val &= compl(topbit)  # remove top bit
      }
    else
      val /= 2
    }
  while (bits-- > 0)
    val /= 2
  return val
  }

# ==============================================================================
# Matching functions
# ==============================================================================

# ==============================================================================
# match_as(text, pattern, type): returns true if pattern is found in the text.
#       If so, it saves various values in a global array. Otherwise, if the
#       pattern is not found, these values are deleted and the function returns
#       false (0).
#       Matching is done using search_string() with type passed as the search
#       type.
#       Retrieve the stored values using matched_prefix(), matched_suffix()
#       matched_text(), matched_start() and matched_end().
#
#       The following call match_as():
#           match_re()/match_rei() - match regex
#           match_ren()/match_reni() - match regex with \n as normal character
#           match_w()/match_wi() - match word
#           match_s()/match_si() - match substring
# ==============================================================================

define match_as
  {
  text = $1
  pattern = $2
  type = $3
  beg = search_string(text, pattern, 0, type)
  end = $search_end

  s = "match_as__"
  for (i in $EXTENSIONS_NM)
    if (substring(i, 0, length(s)) == s)
      delete $EXTENSIONS_NM[i]

  if (beg >= 0)
    {
    $EXTENSIONS_NM["match_as__start"] = beg
    $EXTENSIONS_NM["match_as__end"] = end
    $EXTENSIONS_NM["match_as__before"] = substring(text, 0, beg)
    $EXTENSIONS_NM["match_as__matched"] = substring(text, beg, end)
    $EXTENSIONS_NM["match_as__after"] = substring(text, end)
    $EXTENSIONS_NM["match_as__pattern"] = pattern
    $EXTENSIONS_NM["match_as__type"] = type
    return 1
    }

  return 0
  }

# matched_prefix(): returns prefix of matched text in previously scanned string.
#       Fails if previous call to match_as() returned false. See match_as().
define matched_prefix
  {
  return $EXTENSIONS_NM["match_as__before"]
  }
# matched_suffix(): returns suffix of matched text in previously scanned string.
#       Fails if previous call to match_as() returned false. See match_as().
define matched_suffix
  {
  return $EXTENSIONS_NM["match_as__after"]
  }
# matched_text(): returns matched text from previously scanned string.
#       Fails if previous call to match_as() returned false. See match_as().
define matched_text
  {
  return $EXTENSIONS_NM["match_as__matched"]
  }
# matched_start(): returns the position of matched text from previously scanned
#       string. Fails if previous call to match_as() returned false.
#       See match_as().
define matched_start
  {
  return $EXTENSIONS_NM["match_as__start"]
  }
# matched_end(): returns the end position of matched text from previously
#       scanned string. Fails if previous call to match_as() returned false.
#       See match_as().
define matched_end
  {
  return $EXTENSIONS_NM["match_as__end"]
  }
# matched_groups(): returns the number of capturing parentheses in the last
#       regular expression match. Adds this number to $EXTENSIONS_NM if not
#       not already there.
define matched_groups
  {
  re = $EXTENSIONS_NM["match_as__pattern"]
  if ($EXTENSIONS_NM["match_as__type"] != "regex")
    return matched_groups_called_for_non_regex()
  if ("match_as__paren_groups" in $EXTENSIONS_NM)
    n = $EXTENSIONS_NM["match_as__paren_groups"]
  else
    {
    n = regex_capturing_parens(re)
    $EXTENSIONS_NM["match_as__paren_groups"] = n
    }
  return n
  }
# matched_part(): returns the match of a parethesised group from the last
#       match.
define matched_part
  {
  i = $1 + 0
  if ($EXTENSIONS_NM["match_as__type"] != "regex")
    return matched_part_called_for_non_regex()
  if (i == 0)
    return $EXTENSIONS_NM["match_as__matched"]
  if (i < 0)
    return matched_part_called_for_negative_group_index()
  if (i > 9)
    return matched_part_called_for_group_index_greater_than_9()
  if (i > matched_groups())
    return matched_part_called_for_group_index_greater_than_groups_found()
  return replace_in_string($EXTENSIONS_NM["match_as__matched"], \
                           $EXTENSIONS_NM["match_as__pattern"], "\\"i, "regex")
  }
# matched_1(): returns the text of the last match's parenthesis group 1
define matched_1
  {
  return matched_part(1)
  }
# matched_2(): returns the text of the last match's parenthesis group 2
define matched_2
  {
  return matched_part(2)
  }
# matched_3(): returns the text of the last match's parenthesis group 3
define matched_3
  {
  return matched_part(3)
  }
# matched_4(): returns the text of the last match's parenthesis group 4
define matched_4
  {
  return matched_part(4)
  }
# matched_5(): returns the text of the last match's parenthesis group 5
define matched_5
  {
  return matched_part(5)
  }
# matched_6(): returns the text of the last match's parenthesis group 6
define matched_6
  {
  return matched_part(6)
  }
# matched_7(): returns the text of the last match's parenthesis group 7
define matched_7
  {
  return matched_part(7)
  }
# matched_8(): returns the text of the last match's parenthesis group 8
define matched_8
  {
  return matched_part(8)
  }
# matched_9(): returns the text of the last match's parenthesis group 9
define matched_9
  {
  return matched_part(9)
  }

# ------------------------------------------------------------------------------

# match_re(str, pat): return true if regular expression pat is found in str.
define match_re
  {
  return match_as($1, $2, "regex")
  }
# match_rei(str, pat): return true if regular expression pat is found in str.
#       (case insignificant)
define match_rei
  {
  return match_as($1, $2, "regexNoCase")
  }
# match_ren(str, pat): return true if regular expression pat is found in str.
#       (newline insignificant)
define match_ren
  {
  return match_as($1, "(?n" $2 ")", "regexNoCase")
  }
# match_reni(str, pat): return true if regular expression pat is found in str.
#       (case insignificant, newline insignificant)
define match_reni
  {
  return match_as($1, "(?n" $2 ")", "regexNoCase")
  }

# match_w(str, pat): return true if word(s) pat is found in str.
define match_w
  {
  return match_as($1, $2, "caseWord")
  }
# match_wi(str, pat): return true if word(s) pat is found in str.
#       (case insignificant)
define match_wi
  {
  return match_as($1, $2, "word")
  }

# match_s(str, pat): return true if string pat is found in str.
define match_s
  {
  return match_as($1, $2, "case")
  }
# match_si(str, pat): return true if string pat is found in str.
#       (case insignificant)
define match_si
  {
  return match_as($1, $2, "literal")
  }

# ------------------------------------------------------------------------------

# sub_re(str, pat, replace): returns the result of calling
#       replace_in_string(str, pat, replace, "regex", "copy")
define sub_re
  {
  return replace_in_string($1, $2, $3, "regex", "copy")
  }

# sub_s(str, pat, replace): returns the result of calling
#       replace_in_string(str, pat, replace, "case", "copy")
define sub_s
  {
  return replace_in_string($1, $2, $3, "case", "copy")
  }

# ------------------------------------------------------------------------------

# background_is_dark(override): attempts to determine whether the standard NEdit
#       background is dark, to help select good values for rangeset colors.
#       If a parameter is given, it should be one of the following values:
#       - "ask": ask the user whether background is dark with a dialog box
#       - "reset": clear all values and process from scratch
#       - "yes" or 1: set the return value to true
#       - "no" or 0: set the return value to false
#       Returns 1 (true) if the background is dark, 0 (false) otherwise.
define background_is_dark
  {
  value = "background_is_dark__value"
  rgb_text_bg = "background_is_dark__rgb_text_bg"
  res = 0
  isDark = 0
  rgb = ""

  if ($n_args > 0)
    {
    isDark = $1
    if (valid_number(isDark))
      {
      if (isDark != 0)
        isDark = "yes"
      else
        isDark = "no"
      }

    if (isDark == "yes")
      $EXTENSIONS_NM[value] = 1
    else if (isDark == "no")
      $EXTENSIONS_NM[value] = 0
    else if (isDark == "ask")
      {
      # if we have a "is dark" state, propose dark first (default button)
      if ((value in $EXTENSIONS_NM) && \
          $EXTENSIONS_NM[value])
        {
        res = dialog("Is the window background dark or light?", "Dark", "Light")
        $EXTENSIONS_NM[value] = (res == 1)
        }
      else
        {
        res = dialog("Is the window background light or dark?", "Light", "Dark")
        $EXTENSIONS_NM[value] = (res != 1)
        }
      }
    else if (isDark == "reset")
      {
      if (value in $EXTENSIONS_NM)
        delete $EXTENSIONS_NM[value]
      if (rgb_text_bg in $EXTENSIONS_NM)
        delete $EXTENSIONS_NM[rgb_text_bg]
      }
    }

  if (value in $EXTENSIONS_NM)
    {
    return $EXTENSIONS_NM[value]
    }

  # we attempt to call the non-standard get_colors(): if this does not exist,
  # the macro will crash - so set things up so this only happens once.

  if (!(rgb_text_bg in $EXTENSIONS_NM)) # have we tried yet?
    {
    $EXTENSIONS_NM[rgb_text_bg] = ""
    $EXTENSIONS_NM[rgb_text_bg] = get_colors()["rgb_text_bg"]
    }
  res = length($EXTENSIONS_NM[rgb_text_bg])

  # if we get here and don't have a result (get_colors() crashed), try to
  # use highlighting information (if any) - this might not provide a good value
  # because a highlighting background color may supercede the normal text
  # background (try at the start and/or end of the buffer)
  if (!res)
    {
    style = get_style_at_pos(0)
    if ("background" in style && \
        get_range($text_length - 1, $text_length) == "\n")
      {
      # the first character has highlights with a background; try the last
      # (if it's a "\n")
      style = get_style_at_pos($text_length - 1)
      }
    if ("back_rgb" in style)
      {
      $EXTENSIONS_NM[rgb_text_bg] = style["back_rgb"]
      res = length($EXTENSIONS_NM[rgb_text_bg])
      }
    }

  if (res)
    {
    rgb = $EXTENSIONS_NM[rgb_text_bg]
    r = hex_to_int(substring(rgb, 1, 3))
    g = hex_to_int(substring(rgb, 3, 5))
    b = hex_to_int(substring(rgb, 5, 7))
    isDark = (r < 192 && g < 192 && b < 220 && (r + g + b) < (3 * 128))
    $EXTENSIONS_NM[value] = isDark
    return isDark
    }

  # if we get here, give up: ask the user
  return background_is_dark("ask")
  }

# ------------------------------------------------------------------------------

# set_window_title([string [, type]]): attempts to use the
#       set_window_title_format([string [, type]]) function to change the
#       window's title. If the version of NEdit does not support this function,
#       it will fail only once; subsequent calls will simply do nothing.
define set_window_title
  {
  title = ""
  type = "text"

  if ($n_args > 0)
    title = $1
  if ($n_args > 1)
    type = $2

  works = "set_window_title works"

  try = 1
  if (!(works in $EXTENSIONS_NM))
    $EXTENSIONS_NM[works] = "no"            # prepare failed state
  else if ($EXTENSIONS_NM[works] == "no")
    try = 0                                 # failed before - don't try again

  if (try)
    {
    set_window_title_format(title, type)
    $EXTENSIONS_NM[works] = "yes"           # OK for next time
    }
  }