# ==============================================================================
# extensions.nm
# Author: Tony Balinski
#
# This file contains the following functions:
#       tochar
#       toascii
#       isinstring
#       tolowercase
#       touppercase
#       islowercase islower isuppercase isupper isalpha isasciialpha
#       isdigit isxdigit isalnum isspace ispunct isgraph isprint iscntrl isascii
#       isblank isword
#       togglecase
#       substr
#       replace_in_str
#       hex_to_int
#       hex_to_char
#       hex2_to_str
#       int_to_hex
#       char_to_hex
#       start_of_line_pos
#       end_of_line_pos
#       line_of_pos
#       break_lines_over
#       quote_literal_as_regex
#       regex_to_quoted_string
#       line_col_to_pos
#       str_line_col_to_pos
#       pos_to_line
#       str_pos_to_line
#       pos_to_column
#       str_pos_to_column
#       rjust
#       ljust
#       trim
#       compress
#       number
# ==============================================================================

# in $ASC256_CHARS, the leading " " is a dummy character for the zero index
# (character NUL)
$CHARS["all"] = \
           " \x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" \
        "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" \
        "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" \
        "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" \
        "\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" \
        "\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" \
        "\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" \
        "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" \
        "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" \
        "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" \
        "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" \
        "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" \
        "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" \
        "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" \
        "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" \
        "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"

ascii["upper"]  = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
ascii["lower"]  = "abcdefghijklmnopqrstuvwxyz"
ascii["alpha"]  = ascii["upper"] ascii["lower"]
ascii["digit"]  = "0123456789"
ascii["alnum"]  = ascii["alpha"] ascii["digit"]
ascii["punct"]  = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
ascii["space"]  = "\t\n\v\f\r "
ascii["xdigit"] = ascii["digit"] "ABCDEFabcdef"
ascii["graph"]  = ascii["alnum"] ascii["punct"]
ascii["print"]  = ascii["graph"] " "
ascii["word"]   = ascii["alnum"] "_"
ascii["blank"]  = " \t"
ascii["cntrl"]  =     "\x01\x02\x03\x04\x05\x06\x07" \
                  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" \
                  "\x10\x11\x12\x13\x14\x15\x16\x17" \
                  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"

$CHARS["ascii"] = ascii

iso8859_1["upper"]  = "ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ"
iso8859_1["lower"]  = "abcdefghijklmnopqrstuvwxyzàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþ"
iso8859_1["alpha"]  = iso8859_1["upper"] iso8859_1["lower"] "ßÿ"
iso8859_1["digit"]  = "0123456789"
iso8859_1["alnum"]  = iso8859_1["alpha"] iso8859_1["digit"]
iso8859_1["punct"]  = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" \
                      "¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿×÷"
iso8859_1["space"]  = "\t\n\v\f\r "
iso8859_1["xdigit"] = iso8859_1["digit"] "ABCDEFabcdef"
iso8859_1["graph"]  = iso8859_1["alnum"] iso8859_1["punct"]
iso8859_1["print"]  = iso8859_1["graph"] " "
iso8859_1["word"]   = iso8859_1["alnum"] "_"
iso8859_1["blank"]  = " \t"
iso8859_1["cntrl"]  =     "\x01\x02\x03\x04\x05\x06\x07" \
                      "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" \
                      "\x10\x11\x12\x13\x14\x15\x16\x17" \
                      "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"

$CHARS["iso8859_1"] = iso8859_1

$ASC256_CHARS = " \a" \
                "\b\t\n\v\f\r" \
                "" \
                "" \
                " !\"#$%&'()*+,-./0123456789:;<=>?" \
                "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_" \
                "`abcdefghijklmnopqrstuvwxyz{|}~" \
                "€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ" \
                " ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿" \
                "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß" \
                "àáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"

# ==============================================================================
# tochar(int): returns the ascii char corresponding to the integer int, if in
#       range, as a single character string. Fails with the empty string (as for
#       int = 0).
# ==============================================================================

define tochar
  {
  i = $1
  if (i < -128 || i == 0 || i >= 256)
    return ""
  else if (i < 0)
    i += 256
  return substring($ASC256_CHARS, i, i + 1)
  }

# ==============================================================================
# toascii(char): returns the ascii code corresponding to the first character in
#       char, if present, as an integer. Fails with -1.
# ==============================================================================

define toascii
  {
  if ($1 == "")
    return 0
  # skip initial character in $ASC256_CHARS: it's a dummy
  return search_string($ASC256_CHARS, substring($1, 0, 1), 1, "case")
  }

# ==============================================================================
$THE_LOWERCASE_CHARS ="abcdefghijklmnopqrstuvwxyzàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþ"
$THE_UPPERCASE_CHARS ="ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ"

$THE_LETTER_CHARS = $THE_LOWERCASE_CHARS $THE_UPPERCASE_CHARS "ßÿ"

$THE_ASCII_LETTER_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"

$THE_DIGITS = "0123456789"
$THE_XDIGITS = "0123456789ABCDEFabcdef"
$THE_SPACE_CHARS = " \t\v\f\n\r "
$THE_PUNCT_CHARS = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" \
                   "¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿×÷"

# ==============================================================================
# isinstring(ch, str): returns true if the first character of ch is in str.
# ==============================================================================

define isinstring
  {
  ch = substring($1, 0, 1)

  return (search_string($2, ch, 0, "case") != -1)
  }

# ==============================================================================
# tolowercase(string): converts to lower case, returning the converted string.
# ==============================================================================

define tolowercase
  {
  str = $1
  res = ""
  len = length(str)
  ch = ""

  for (i = 0; i < len; i++)
    {
    ch = substring(str, i, i+1)
    pos = search_string($THE_UPPERCASE_CHARS, ch, 0, "case")
    if (pos == -1)
      res = res ch
    else
      res = res substring($THE_LOWERCASE_CHARS, pos, pos + 1)
    }
  return res
  }

# ==============================================================================
# touppercase(string): converts to upper case, returning the converted string.
# ==============================================================================

define touppercase
  {
  str = $1
  res = ""
  len = length(str)
  ch = ""

  for (i = 0; i < len; i++)
    {
    ch = substring(str, i, i+1)
    pos = search_string($THE_LOWERCASE_CHARS, ch, 0, "case")
    if (pos == -1)
      res = res ch
    else
      res = res substring($THE_UPPERCASE_CHARS, pos, pos + 1)
    }
  return res
  }

# ==============================================================================
# islowercase(string), islower(string),
# isuppercase(string), isupper(string),
# isalpha(string), isasciialpha(string),
# isdigit(string), isxdigit(string),
# isalnum(string), isspace(string),
# ispunct(string), isgraph(string),
# isprint(string), iscntrl(string),
# isascii(string): return true if the first character of string is of the
#       appropriate class.
# isblank(string): return true if the first character is space or tab
# isword(string): return true if isalnum(string) or the first character is '_'
# ==============================================================================

# islowercase(string): checks the first character of string (aka islower())
define islowercase
  {
  return isinstring($1, $THE_LOWERCASE_CHARS)
  }
# islower(string): checks the first character of the string
define islower
  {
  return isinstring($1, $THE_LOWERCASE_CHARS)
  }
# isuppercase(string): checks the first character of the string (aka isupper())
define isuppercase
  {
  return isinstring($1, $THE_UPPERCASE_CHARS)
  }
# isupper(string): checks the first character of the string
define isupper
  {
  return isinstring($1, $THE_UPPERCASE_CHARS)
  }
# isalpha(string): checks the first character of the string (iso8859-1)
#       cf isasciialpha()
define isalpha
  {
  return isinstring($1, $THE_LETTER_CHARS)
  }
# isasciialpha(string): checks the first character of the string (ascii only)
define isasciialpha
  {
  return isinstring($1, $THE_ASCII_LETTER_CHARS)
  }
# isdigit(string): checks the first character of the string
define isdigit
  {
  return isinstring($1, $THE_DIGITS)
  }
# isxdigit(string): checks the first character of the string (hexadecimal)
define isxdigit
  {
  return isinstring($1, $THE_XDIGITS)
  }
# isalnum(string): checks the first character of the string (iso8859-1)
define isalnum
  {
  return isinstring($1, $THE_LETTER_CHARS) || isinstring($1, $THE_DIGITS)
  }
# isspace(string): checks the first character of the string
define isspace
  {
  return isinstring($1, $THE_SPACE_CHARS)
  }
# ispunct(string): checks the first character of the string (iso8859-1)
define ispunct
  {
  return isinstring($1, $THE_PUNCT_CHARS)
  }
# isgraph(string): checks the first character of the string (iso8859-1)
define isgraph
  {
  return isinstring($1, $THE_PUNCT_CHARS) || isalnum($1)
  }
# isprint(string): checks the first character of the string (iso8859-1)
define isprint
  {
  return isgraph($1) || isinstring($1, " ")
  }
# iscntrl(string): checks the first character of the string (iso8859-1)
define iscntrl
  {
  return !isprint($1) && !isspace($1)
  }
# isascii(string): checks the first character of the string
define isascii
  {
  return toascii($1) < 128
  }
# isblank(string): return true if the first character is space or tab
define isblank
  {
  return isinstring($1, " \t")
  }
# isword(string): return true if the first character is '_', an ascii letter or
#       a digit
define isword
  {
  return isasciialpha($1) || isdigit($1) || isinstring($1, "_")
  }

# ==============================================================================
# togglecase(string): returns the string with all uppercase characters switched
#       to lowercase and vice-versa.
# ==============================================================================

define togglecase
  {
  s = $1
  r = ""
  for (i = 0, c = substring(s, i, i+1); \
       c != ""; \
       i++, c = substring(s, i, i+1))
    {
    if (touppercase(c) != c)
      r = r touppercase(c)
    else
      r = r tolowercase(c)
    }
  return r
  }

# ==============================================================================
# substr(string, [startpos, [endpos]]): calls substring() for given string,
#       start position, end position. Position parameters are optional. If any
#       are negative, measurement is made from the end of the string.
#
# Parameters:
#       $1 - source string (default "")
#       $2 - start position (default 0)
#       $3 - end position (default length($1))
# ==============================================================================

define substr
  {
  # get parameters
  if ($n_args < 1)
    return ""

  string = $1
  startpos = 0
  if ($n_args >= 2)
    startpos = $2

  len = length(string)
  endpos = len
  if ($n_args >= 3)
    endpos = $3

  if (startpos < 0)
    startpos += len
  if (endpos < 0)
    endpos += len

  if (startpos < 0)
    startpos = 0
  if (endpos < 0)
    endpos = 0

  return substring(string, startpos, endpos)
  }

# ==============================================================================
# replace_in_str(string, search_for, replace_with, [type]): calls
#       replace_in_string() with its parameters. If replace_in_string() fails,
#       returns the original string unchanged.
#
# Parameters:
#       $1 - original string in which patterns should be matched
#       $2 - patterns to replace
#       $3 - what to replace with (default "")
#       $4 - type of matching to use (default "literal")
# ==============================================================================

define replace_in_str
  {
  # get parameters
  if ($n_args < 2)
    return ""

  string = $1
  search_for = $2

  replace_with = ""
  if ($n_args >= 3)
    replace_with = $3

  type = "literal"
  if ($n_args >= 4)
    type = $4

  res = replace_in_string(string, search_for, replace_with, type)
  if (res == "")
    res = string

  return res
  }

# ==============================================================================
# hex_to_int(hex_str): returns the value of the hex string. All (and only) hex
#       characters are considered. Scanning stops at first non-hex character.
# ==============================================================================

define hex_to_int
  {
  # get parameter
  if ($n_args < 1)
    return 0

  val = 0
  i = 0
  for (c = substring($1, i, i+1); c != ""; ++i, c = substring($1, i, i+1))
    {
    pos = search_string("0123456789ABCDEF", toupper(c), 0)
    if (pos < 0)
      break

    val = (16 * val) + pos
    }

  return val
  }

# ==============================================================================
# hex_to_char(hex_str): returns the value of the hex string. All (and only) hex
#       characters are considered.
# ==============================================================================

define hex_to_char
  {
  i = hex_to_int($1)
  if (i == 0 || i >= 256)
    return ""
  return substring($ASC256_CHARS, i, i+1)
  }

# ==============================================================================
# hex2_to_str(hex_str): returns a string whose value is that of the
#       concatenation of characters whose hex values are listed, two digits at
#       a time. All non-digits in the input are ignored.
# ==============================================================================

define hex2_to_str
  {
  # remove all hex characters
  s = replace_in_string($1, "[^0-9A-Fa-f]|\n", "", "regex")
  if (s == "")
    s = $1

  res = ""
  i = 0
  for (cc = substring($1, i, i+2); cc != ""; i += 2, cc = substring($1, i, i+2))
    {
    res = res hex_to_char(cc)
    }

  return res
  }

# ==============================================================================
# int_to_hex(int, mindigits): returns the hex representation of the integer.
#       By default, mindigits is set to one.
# ==============================================================================

define int_to_hex
  {
  prec = 1
  i = $1
  if ($n_args > 1)
    prec = $2
  if (prec < 1)
    prec = 1
  if (i < 0)
    i = -i

  res = ""
  while (prec > 0 || i > 0)
    {
    p = i % 16
    res = substring("0123456789ABCDEF", p, p + 1) res
    prec--
    i = i / 16
    }

  return res
  }

# ==============================================================================
# char_to_hex(chars): returns the hex values of each of the characters passed.
#       Each character is transformed into 2 hex digits.
# ==============================================================================

define char_to_hex
  {
  res = ""
  i = 0
  for (c = substring($1, i, i+1); c != ""; ++i, c = substring($1, i, i+1))
    {
    pos = search_string($ASC256_CHARS, c, 1, "case")
    if (pos >= 1)
      {
      res = res int_to_hex(pos, 2)
      }
    }
  return res
  }

# ==============================================================================
# start_of_line_pos([pos]): returns the position of the start of the line
#       containing pos (default $cursor) - ie the position following the
#       previous newline.
# ==============================================================================

define start_of_line_pos
  {
  if ($n_args < 1)
    pos = $cursor
  else
    pos = $1

  # find end of THIS line
  e = end_of_line_pos(pos)

  # now find the previous one
  pe = search("\n", e - 1, "case", "backward")
  if (pe < 0)
    return 0
  return $search_end
  }

# ==============================================================================
# end_of_line_pos([pos]): returns the position of the end of the line containing
#       pos (default $cursor) - ie the position of the next newline.
# ==============================================================================

define end_of_line_pos
  {
  if ($n_args < 1)
    pos = $cursor
  else
    pos = $1

  e = search("\n", pos, "case")
  if (e == -1)
    return $text_length
  return e
  }

# ==============================================================================
# line_of_pos([pos]): returns the line containing the position pos (default
#       $cursor) as a string without a trailing newline.
# ==============================================================================

define line_of_pos
  {
  if ($n_args < 1)
    pos = $cursor
  else
    pos = $1

  # find end of THIS line
  e = end_of_line_pos(pos)

  # now find the previous one
  pe = search("\n", e - 1, "case", "backward")
  if (pe < 0)
    return 0
  b = $search_end

  return get_range(b, e)
  }

# ==============================================================================
# break_lines_over(maxchars, string [, pref]): tries to break string at word
#       boundaries adding newlines followed by the string pref (if present).
#       Returns the modified string.
# ==============================================================================

define break_lines_over
  {
  maxchars = $1
  string = $2
  if ($n_args > 2)
    pref = $3
  else
    pref = ""

  res = ""
  while (length(string) > maxchars)
    {
    nlpos = search_string(string, "\n", 0)
    if (0 <= nlpos && nlpos <= maxchars)
      {
      res = res substring(string, 0, nlpos + 1)
      string = substring(string, nlpos + 1, length(string))
      }
    else
      {
      nlpos = search_string(string, ">", maxchars, "regex", "backward")
      if (!(0 <= nlpos && nlpos <= maxchars))
        {
        nlpos = maxchars
        }
      res = res substring(string, 0, nlpos) "\n"
      string = substring(string, nlpos, length(string))
      }
    }
  res = res string

  return res
  }

# ==============================================================================
# quote_literal_as_regex(string): returns a string representing a valid regex
#       search string for the parameter passed.
# ==============================================================================

define quote_literal_as_regex
  {
  string = $1
  res = ""
  len = length(string)

# take the next lines out when \e == esc
# if ("\e" != "\\e")
#   {
#   dialog("Change extensions.nm: quote_literal_as_regex() - \\e != \\\\e")
#   }
# and uncomment the "\e" line below

# take the next lines out when \e == \\e (ie \e != esc)
  if ("\e" == "\\e")
    {
    dialog("Change extensions.nm: quote_literal_as_regex() - \\e == \\\\e")
    }
# and comment out the "\e" line below

  for (i = 0; i < len; i++)
    {
    c = substring(string, i, i + 1)
    if (search_string("\\|()[]{}<>.*+?^$&-", c, 0, "case") >= 0)
      res = res "\\" c
    else if (c == "\a") res = res "\\a"
    else if (c == "\b") res = res "\\b"
    else if (c == "\e") res = res "\\e" # put this line in when \e == esc
    else if (c == "\f") res = res "\\f"
    else if (c == "\n") res = res "\\n"
    else if (c == "\r") res = res "\\r"
    else if (c == "\t") res = res "\\t"
    else if (c == "\v") res = res "\\v"
    else                res = res c
    }

  return res
  }

# ==============================================================================
# regex_to_quoted_string(regex): adds quotes and backslashes to convert a
#       string containing a valid regex into one usable in NEdit Macro code.
# ==============================================================================

define regex_to_quoted_string
  {
  string = $1
  res = ""
  len = length(string)

# take the next lines out when \e == esc
# if ("\e" != "\\e")
#   {
#   dialog("Change extensions.nm: regex_to_quoted_string() - \\e != \\\\e")
#   }
# and uncomment the "\e" line below

# take the next lines out when \e == \\e (ie \e != esc)
  if ("\e" == "\\e")
    {
    dialog("Change extensions.nm: regex_to_quoted_string() - \\e == \\\\e")
    }
# and comment out the "\e" line below

  for (i = 0; i < len; i++)
    {
    c  = substring(string, i, i + 1)
    c2 = substring(string, i, i + 2)
    if (search_string(c2, "^\\\\[abefnrtv]", 0, "regex") == 0)
      {
      res = res c2      # already a back-slash escaped control char sequence
      i++               # skip the next character
      }
    else if (search_string("\"\\", c, 0, "case") >= 0)
      res = res "\\" c                  # quote that quote or back-slash
    else if (c == "\a") res = res "\\a" # convert control char to escape seq
    else if (c == "\b") res = res "\\b"
    else if (c == "\e") res = res "\\e" # put this line in when \e == esc
    else if (c == "\f") res = res "\\f"
    else if (c == "\n") res = res "\\n"
    else if (c == "\r") res = res "\\r"
    else if (c == "\t") res = res "\\t"
    else if (c == "\v") res = res "\\v"
    else                res = res c     # leave anything else alone
    }

  return "\"" res "\""
  }

# ==============================================================================
# line_col_to_pos(lineNum [, colNum [, tabSize]]): returns the position of the
#       character position indicated by lineNum and colNum, assuming a
#       particular tabSize. By default, colNum is zero and tabSize equals
#       $tab_dist. Lines are numbered from 1, columns from zero. If there are
#       not enough lines, -1 is returned; if there are not enough columns in the
#       addressed line, the position of the last character is returned; if the
#       column is "inside" a tab, return the tab's position.
# ==============================================================================

define line_col_to_pos
  {
  lineNum = $1
  colNum = 0
  if ($n_args >= 2 && $2 >= 0)
    {
    colNum = $2
    }
  tabSize = $tab_dist
  if ($n_args >= 3 && $3 >= 0)
    {
    tabSize =$3
    }

  bufferPos = 0

  if (lineNum > 1)
    {
    if (search("(^.*\n){" lineNum - 1 "}", 0, "regex") != -1)
      bufferPos = $search_end
    else
      bufferPos = -1
    }

  if (bufferPos >= 0 && colNum > 0)
    {
    if (search("^.+$", bufferPos, "regex") != -1)
      {
      last = $search_end
      pos = bufferPos
      end = bufferPos
      col = 0
      while (pos < last)
        {
        nexttab = search("\t", pos)
        if (nexttab < pos || nexttab > last)
          nexttab = last
        if (nexttab == pos)
          {
          # pos is at a tab: if the tab extends beyond colNum, return this pos
          nextCol = col + tabSize - (col % tabSize)
          if (nextCol > colNum)
            return pos
          pos++             # skip the tab
          }
        else
          {
          # pos is at a non-tab: if the non-tab sequence extends beyond colNum,
          # we have an overrun of col + nexttab - pos - colNum;
          # return end-of-non-tab-sequence - overrun
          nextCol = col + nexttab - pos
          if (nextCol >= colNum)
            return nexttab - (nextCol - colNum)
          pos = nexttab     # skip to next tab
          }
        # move column count forward
        col = nextCol
        }
      # not found during the loop
      return last
      }
    }
  return bufferPos
  }

# ==============================================================================
# str_line_col_to_pos(string, lineNum [, colNum [, tabSize]]): returns the
#       position of the character position indicated by lineNum and colNum,
#       assuming a particular tabSize. By default, colNum is zero and tabSize
#       equals $tab_dist. Lines are numbered from 1, columns from zero. If there
#       are not enough lines, -1 is returned; if there are not enough columns in
#       the addressed line, the position of the last character is returned; if
#       the column is "inside" a tab, return the tab's position.
# ==============================================================================

define str_line_col_to_pos
  {
  string = $1
  lineNum = $2
  colNum = 0
  if ($n_args >= 3 && $3 >= 0)
    {
    colNum = $3
    }
  tabSize = $tab_dist
  if ($n_args >= 4 && $4 >= 0)
    {
    tabSize = $4
    }

  bufferPos = 0

  if (lineNum > 1)
    {
    if (search_string(string, "(^.*\n){" lineNum - 1 "}", 0, "regex") != -1)
      bufferPos = $search_end
    else
      bufferPos = -1
    }

  if (bufferPos >= 0 && colNum > 0)
    {
    if (search_string(string, "^.+$", bufferPos, "regex") != -1)
      {
      last = $search_end
      pos = bufferPos
      end = bufferPos
      col = 0
      while (pos < last)
        {
        nexttab = search_string(string, "\t", pos)
        if (nexttab < pos || nexttab > last)
          nexttab = last
        if (nexttab == pos)
          {
          # pos is at a tab: if the tab extends beyond colNum, return this pos
          nextCol = col + tabSize - (col % tabSize)
          if (nextCol > colNum)
            return pos
          pos++             # skip the tab
          }
        else
          {
          # pos is at a non-tab: if the non-tab sequence extends beyond colNum,
          # we have an overrun of col + nexttab - pos - colNum;
          # return end-of-non-tab-sequence - overrun
          nextCol = col + nexttab - pos
          if (nextCol >= colNum)
            return nexttab - (nextCol - colNum)
          pos = nexttab     # skip to next tab
          }
        # move column count forward
        col = nextCol
        }
      # not found during the loop
      return last
      }
    }
  return bufferPos
  }

# ==============================================================================
# pos_to_line(pos): returns the line number (counted from 1) of the position
#       pos. If pos is too large, this returns the last line number for the
#       document (so the minimum value is 1).
# ==============================================================================

define pos_to_line
  {
  line = 0
  pos = 0
  posNum = $1

  if (posNum >= $text_length)
    posNum = $text_length

  while (pos <= posNum)
    {
    line++
    if (search("\n", pos) < 0)
      break
    else
      pos = $search_end
    }

  return line
  }

# ==============================================================================
# str_pos_to_line(string, pos): returns the line number (counted from 1) of the
#       position pos within the string. If pos is too large, this returns the
#       last line number for the string (so the minimum value is 1).
# ==============================================================================

define str_pos_to_line
  {
  line = 0
  pos = 0
  string = $1
  posNum = $2
  len = length(string)

  if (posNum >= len)
    posNum = len

  while (pos <= posNum)
    {
    line++
    if (search_string(string, "\n", pos) < 0)
      break
    else
      pos = $search_end
    }

  return line
  }

# ==============================================================================
# pos_to_column(pos [, tabSize]): returns the column number (counted from 0) of
#       the position pos in its line. If pos is too large, this returns the last
#       column of the last line for the document (so the minimum value is 1).
# ==============================================================================

define pos_to_column
  {
  posNum = $1
  tabSize = $tab_dist
  if ($n_args >= 2)
    tabSize = $2

  if (posNum > $text_length)
    posNum = $text_length

  # find previous start of line
  pos = search("^", posNum, "regex", "backward")
  end = search("$", posNum, "regex", "forward")

  col = 0
  nextCol = 0

  while (pos < posNum)
    {
    nexttab = search("\t", pos)
    if (nexttab < pos || nexttab > end)
      nexttab = end
    if (nexttab >= posNum)
      nextCol = col + posNum - pos      # no tabs between pos and posNum
    else if (nexttab > pos)
      nextCol = col + nexttab - pos     # skip contiguous non-tabs
    else # nexttab == pos
      {
      nextCol = col + tabSize - (col % tabSize)
      nexttab++
      }
    pos = nexttab
    col = nextCol
    }

  return col
  }

# ==============================================================================
# str_pos_to_column(string, pos [, tabSize]): returns the column number (counted
#       from 0) of the position pos in its line inside string. If pos is too
#       large, this returns the last column of the last line for the string (so
#       the minimum value is 1).
# ==============================================================================

define str_pos_to_column
  {
  posNum = $1
  tabSize = $tab_dist
  if ($n_args >= 2)
    tabSize = $2

  len = length(string)
  if (posNum > len)
    posNum = len

  # find previous start of line
  pos = search(string, "^", posNum, "regex", "backward")
  end = search(string, "$", posNum, "regex", "forward")

  col = 0
  nextCol = 0

  while (pos < posNum)
    {
    nexttab = search(string, "\t", pos)
    if (nexttab < pos || nexttab > end)
      nexttab = end
    if (nexttab >= posNum)
      nextCol = col + posNum - pos      # no tabs between pos and posNum
    else if (nexttab > pos)
      nextCol = col + nexttab - pos     # skip contiguous non-tabs
    else # nexttab == pos
      {
      nextCol = col + tabSize - (col % tabSize)
      nexttab++
      }
    pos = nexttab
    col = nextCol
    }

  return col
  }

# ==============================================================================
# rjust(strs [, maxstr]): returns a string holding each line of str padded with
#       spaces to the left so that it is as long as maxstr, or to the longest
#       line in strs.
# ==============================================================================

define rjust
  {
  strs = split($1, "\n")

  # set up default width and padding string
  if ($n_args > 1)
    maxstr = replace_in_string($2, "(?n.)", " ", "regex")
  else
    maxstr = ""

  width = length(maxstr)

  # measure longest line, and maintain padding string of that length
  nstr = strs[]
  for (i = 0; i < nstr; i++)
    {
    str = strs[i]
    w = length(str)
    if (width < w)
      {
      width = w
      maxstr = replace_in_string(str, ".", " ", "regex")
      }
    }

  # assemble result
  res = ""
  nl = ""
  for (i = 0; i < nstr; i++)
    {
    str = strs[i]

    w = length(str)
    if (i == nstr - 1 && str == "")     # add nothing to an empty last line
      {
      maxstr = ""
      width = 0
      }
    res = res nl substring(maxstr str, w, width + w)
    nl = "\n"
    }

  return res
  }

# ==============================================================================
# ljust(strs [, maxstr]): returns a string holding each line of str padded with
#       spaces to the right so that it is as long as maxstr, or to the longest
#       line in strs.
# ==============================================================================

define ljust
  {
  strs = split($1, "\n")

  # set up default width and padding string
  if ($n_args > 1)
    maxstr = replace_in_string($2, "(?n.)", " ", "regex")
  else
    maxstr = ""

  width = length(maxstr)

  # measure longest line, and maintain padding string of that length
  nstr = strs[]
  for (i = 0; i < nstr; i++)
    {
    str = strs[i]
    w = length(str)
    if (width < w)
      {
      width = w
      maxstr = replace_in_string(str, ".", " ", "regex")
      }
    }

  # assemble result
  res = ""
  nl = ""
  for (i = 0; i < nstr; i++)
    {
    str = strs[i]
    if (i == nstr - 1 && str == "")     # add nothing to an empty last line
      {
      maxstr = ""
      width = 0
      }
    w = length(str)
    res = res nl substring(str maxstr, 0, width)
    nl = "\n"
    }

  return res
  }

# ==============================================================================
# trim(strs[, re[, nlSense]]): returns a string holding each line of strs with
#       ends matching "(?"nlSense"^(?:"re"))" and "(?"nlSense"(?:"re")$)"
#       removed. The default value of re is "\\s+", and of nlSense is "N".
# ==============================================================================

define trim
  {
  strs = $1
  re = "\\s+"
  nlSense = "N"

  if ($n_args >= 2) re = $2
  if ($n_args >= 3) nlSense = $3

  # strip the fronts of lines
  strs = replace_in_string(strs, "(?"nlSense"^(?:"re"))", "", "regex", "copy")
  # and the ends
  strs = replace_in_string(strs, "(?"nlSense"(?:"re")$)", "", "regex", "copy")

  return strs
  }

# ==============================================================================
# compress(strs[, re[, repl[, nlSense]]]): returns a string holding each line of
#       strs with each sequence identified as "(?"nlSense"(?:"re")+)" replaced
#       with repl. The default value of re is "\\s", of repl is " ", and of
#       nlSense is "N".
# ==============================================================================

define compress
  {
  strs = $1
  re = "\\s"
  nlSense = "N"

  if ($n_args >= 2) re = $2
  if ($n_args >= 3) nlSense = $3

  # do replacement
  strs = replace_in_string(strs, "(?"nlSense"(?:"re")+)", repl, "regex", "copy")

  return strs
  }

# ==============================================================================
# number(string [, "strict"]): returns the numeric value read from the front of
#       the string argument. Fails if no leading number was found, with zero,
#       or, if "strict" is present, with an invalid function call.
# ==============================================================================

define number
  {
  s = $1
  if (valid_number(s))
    return s + 0
  s = replace_in_string(s, "(?n^\\s*([-+]?\\d+).*)", "\\1", "regex")
  if (s == "")
    {
    if ($n_args > 1 && $2 == "strict")
      s = number_NoNumericPrefixFound()
    else
      s = 0
    }
  return s + 0
  }