# ============================================================================== # extensions.nm # Author: Tony Balinski # # This file contains the following functions: # (Character classes) # ctype_charstr # ctype_change_charset # tochar # toascii # posinstring # charatpos # isinstring # tolowercase # touppercase # islowercase islower isuppercase isupper isalpha isasciialpha # isdigit isxdigit isalnum isspace ispunct isgraph isprint iscntrl isascii # isblank isword # togglecase # (Short forms for substring() and replace_in_string(..., "copy")) # substr # replace_in_str # (Extensions - use string or document as required) # subtext # search_text # (Octal/Hexadecimal numeric conversion) # base_to_int # oct_to_int # hex_to_int # hex_to_char # hex2_to_str # base64_to_chars # int_to_binbase # int_to_hex # int_to_HEX # int_to_oct # int_to_bin # char_to_hex # char_to_HEX # char_to_oct # (Line information) # start_of_line_pos # end_of_line_pos # line_of_pos # (Wrapping/formatting) # break_lines_over # (Regexes) # quote_literal_as_c # quote_literal_as_regex # quote_literal_as_subst # quote_literal_for_shell_esc # regex_to_quoted_string # regex_capturing_parens # unquote # (Positions, lines and columns) # line_col_to_pos # str_line_col_to_pos # pos_to_line # pos_to_column # (word extraction) # get_word_at_pos # (Text justification, formatting) # longest_line_len # longest_line # rjust_s rjust # ljust_s ljust # trim # chomp # compress # repeat # reverse_string # (String to number conversions) # number tonumber # (String comparison) # nuls nz # eqs nes lts les gts ges # eqsi nesi ltsi lesi gtsi gesi # (Numeric comparison) # eq ne lt le gt ge # lt_lt lt_le le_le le_lt between # (Bit manipulation) # compl # xor # lshift rshift urshift # (Pattern matching - short versions of search_string(), returning boolean) # match_as # matched_prefix matched_suffix matched_text matched_start matched_end # matched_groups matched_part matched_[1-9] # match_re match_rei match_ren match_reni # match_w match_wi # match_s match_si # (Pattern replacing - returning replace_in_string(..., "copy") result) # sub_re # sub_s # (Colors) # background_is_dark # ============================================================================== # ------------------------------------------------------------------------------ # Globals: # $EXTENSIONS_NM[]: an array holding static data used by functions in this # module. # For character classes we have the following key: # chars refers to an array of two entries: # ascii, iso8859_1 each holding character class keys: # upper, lower, alpha, digit, alnum # punct, space, xdigit, graph, print # word, blank, cntrl # each of these contains a string of all characters in the # character class # For the regular expression matching routines (see match_as(), # matched_prefix(), matched_suffix(), matched_text(), matched_start(), # matched_end(), matched_groups(), matched_part(), matched_[1-9](), # match_re(), match_rei(), match_ren(), match_reni, match_w(), match_wi(), # match_s(), match_si()) we have the keys: # match_as__start starting position of the match # match_as__end end position of the match # match_as__before text prefixing the found match # match_as__matched text that matched the pattern # match_as__after text following the found match # match_as__pattern search pattern # match_as__type search type # match_as__paren_groups number of regex grouping parentheses # (see matched_groups) # For background_is_dark() we have the keys: # background_is_dark__value previous result of function # background_is_dark__rgb_text_bg result of a color lookup # ------------------------------------------------------------------------------ $EXTENSIONS_NM[""] = 0 # ============================================================================== if (!("chars" in $EXTENSIONS_NM)) { # in chars_all, the leading " " is a dummy character for the zero index # (character NUL) chars_all = \ " \x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" \ "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" \ "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" \ "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" \ "\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" \ "\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" \ "\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" \ "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" \ "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" \ "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" \ "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" \ "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" \ "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" \ "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" \ "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" \ "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" ascii["upper"] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" ascii["lower"] = "abcdefghijklmnopqrstuvwxyz" ascii["alpha"] = ascii["upper"] ascii["lower"] ascii["digit"] = "0123456789" ascii["alnum"] = ascii["alpha"] ascii["digit"] ascii["punct"] = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" ascii["space"] = "\t\n\v\f\r " ascii["xdigit"] = ascii["digit"] "ABCDEFabcdef" ascii["graph"] = ascii["alnum"] ascii["punct"] ascii["print"] = ascii["graph"] " " ascii["word"] = ascii["alnum"] "_" ascii["blank"] = " \t" ascii["cntrl"] = "\x01\x02\x03\x04\x05\x06\x07" \ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" \ "\x10\x11\x12\x13\x14\x15\x16\x17" \ "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" # 7-bit ascii ascii["ascii"] = substring(chars_all, 0, 128) chars["ascii"] = ascii # for ßÿ we cheat: pretend upper(ß) == ß (it should be SS); upper(ÿ) == Y iso8859_1["upper"] = ascii["upper"] "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßY" iso8859_1["lower"] = ascii["lower"] "àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþßÿ" iso8859_1["alpha"] = iso8859_1["upper"] iso8859_1["lower"] iso8859_1["digit"] = "0123456789" iso8859_1["alnum"] = iso8859_1["alpha"] iso8859_1["digit"] iso8859_1["punct"] = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" \ "¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿×÷" iso8859_1["space"] = "\t\n\v\f\r " iso8859_1["xdigit"] = iso8859_1["digit"] "ABCDEFabcdef" iso8859_1["graph"] = iso8859_1["alnum"] iso8859_1["punct"] iso8859_1["print"] = iso8859_1["graph"] " " iso8859_1["word"] = iso8859_1["alnum"] "_" iso8859_1["blank"] = " \t" iso8859_1["cntrl"] = "\x01\x02\x03\x04\x05\x06\x07" \ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" \ "\x10\x11\x12\x13\x14\x15\x16\x17" \ "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" iso8859_1["ascii"] = ascii["ascii"] chars["iso8859_1"] = iso8859_1 $EXTENSIONS_NM["chars"] = chars $EXTENSIONS_NM["chars_all"] = chars_all $EXTENSIONS_NM["charset"] = "iso8859_1" # which set to use: ascii, iso8859_1 } # ============================================================================== # ctype_charstr(charclass [, charset]): return the string of characters of the # named class using the current character set (ascii or iso8859_1), or a # named set. # ============================================================================== define ctype_charstr { charclass = $1 if ($n_args > 1) charset = $2 else charset = $EXTENSIONS_NM["charset"] return $EXTENSIONS_NM["chars"][charset][charclass] } # ============================================================================== # ctype_change_charset(charset): change the global character set used in the # character class functions. If charset exists as an index to # chars, it is stored in $EXTENSIONS_NM["charset"] for future use. If # not, the operation is ignored. Return the current charset name. # ============================================================================== define ctype_change_charset { if ($n_args > 0) { charset = $1 if (charset in $EXTENSIONS_NM["chars"]) $EXTENSIONS_NM["charset"] = charset } return $EXTENSIONS_NM["charset"] } # ============================================================================== # tochar(int): returns the ascii char corresponding to the integer int, if in # range, as a single character string. Fails with the empty string (as for # int = 0). # ============================================================================== define tochar { i = $1 if (i < -128 || i == 0 || i >= 256) return "" else if (i < 0) i += 256 all = $EXTENSIONS_NM["chars_all"] return substring(all, i, i + 1) } # ============================================================================== # toascii(char): returns the ascii code corresponding to the first character in # char, if present, as an integer. Fails with -1. # ============================================================================== define toascii { if ($1 == "") return 0 # skip initial character in $EXTENSIONS_NM["chars_all"]: it's a dummy all = $EXTENSIONS_NM["chars_all"] return search_string(all, substring($1, 0, 1), 1, "case") } # ============================================================================== # posinstring(sub, str): returns the position of the first occurrence of string # sub in string str. Fails with -1. # ============================================================================== define posinstring { return search_string($2, $1, 0, "case") } # ============================================================================== # charatpos(str, pos): returns the character of string sub at position pos. # Fails with "". # ============================================================================== define charatpos { if (pos < 0) return "" return substring($1, $2, $2+1) } # ============================================================================== # isinstring(ch, str): returns true if the first character of ch is in str. # ============================================================================== define isinstring { ch = substring($1, 0, 1) return (search_string($2, ch, 0, "case") != -1) } # ============================================================================== # tolowercase(string): converts to lower case, returning the converted string. # ============================================================================== define tolowercase { str = $1 res = "" len = length(str) ch = "" uppers = ctype_charstr("upper") lowers = ctype_charstr("lower") for (i = 0; i < len; i++) { ch = substring(str, i, i+1) pos = search_string(uppers, ch, 0, "case") if (pos == -1) res = res ch else res = res substring(lowers, pos, pos + 1) } return res } # ============================================================================== # touppercase(string): converts to upper case, returning the converted string. # ============================================================================== define touppercase { str = $1 res = "" len = length(str) ch = "" uppers = ctype_charstr("upper") lowers = ctype_charstr("lower") for (i = 0; i < len; i++) { ch = substring(str, i, i+1) pos = search_string(lowers, ch, 0, "case") if (pos == -1) res = res ch else res = res substring(uppers, pos, pos + 1) } return res } # ============================================================================== # isctype(ch, classname [, charset]): returns true if the first character of ch # is in the character class named classname for the character set charset. # ============================================================================== define isctype { if ($n_args < 3) return isinstring($1, ctype_charstr($2)) else return isinstring($1, ctype_charstr($2, $3)) } # ============================================================================== # islowercase(string), islower(string), # isuppercase(string), isupper(string), # isalpha(string), isasciialpha(string), # isdigit(string), isxdigit(string), # isalnum(string), isspace(string), # ispunct(string), isgraph(string), # isprint(string), iscntrl(string), # isascii(string): return true if the first character of string is of the # appropriate class. # isblank(string): return true if the first character is space or tab # isword(string): return true if isalnum(string) or the first character is '_' # ============================================================================== # islowercase(string): checks the first character of string (aka islower()) define islowercase { return isctype($1, "lower") } # islower(string): checks the first character of the string define islower { return isctype($1, "lower") } # isuppercase(string): checks the first character of the string (aka isupper()) define isuppercase { return isctype($1, "upper") } # isupper(string): checks the first character of the string define isupper { return isctype($1, "upper") } # isalpha(string): checks the first character of the string (current encoding) # cf isasciialpha() define isalpha { return isctype($1, "alpha") } # isasciialpha(string): checks the first character of the string (ascii only) define isasciialpha { return isctype($1, "alpha", "ascii") } # isdigit(string): checks the first character of the string define isdigit { return isctype($1, "digit") } # isxdigit(string): checks the first character of the string (hexadecimal) define isxdigit { return isctype($1, "xdigit") } # isalnum(string): checks the first character of the string (current encoding) define isalnum { return isctype($1, "alnum") } # isspace(string): checks the first character of the string define isspace { return isctype($1, "space") } # ispunct(string): checks the first character of the string (current encoding) define ispunct { return isctype($1, "punct") } # isgraph(string): checks the first character of the string (current encoding) define isgraph { return isctype($1, "graph") } # isprint(string): checks the first character of the string (current encoding) define isprint { return isctype($1, "print") } # iscntrl(string): checks the first character of the string (current encoding) define iscntrl { return isctype($1, "cntrl") } # isascii(string): checks the first character of the string define isascii { return toascii($1) < 128 } # isblank(string): return true if the first character is space or tab define isblank { return isctype($1, "blank") } # isword(string): return true if the first character is '_', an ascii letter or # a digit define isword { return isctype($1, "word", "ascii") } # ============================================================================== # togglecase(string): returns the string with all uppercase characters switched # to lowercase and vice-versa. # ============================================================================== define togglecase { s = $1 r = "" for (i = 0, c = substring(s, i, i+1); \ c != ""; \ i++, c = substring(s, i, i+1)) { if (touppercase(c) != c) r = r touppercase(c) else r = r tolowercase(c) } return r } # ============================================================================== # substr(string, [startpos, [endpos]]): calls substring() for given string, # start position, end position. Position parameters are optional. If any # are negative, measurement is made from the end of the string. # # Parameters: # $1 - source string (default "") # $2 - start position (default 0) # $3 - end position (default length($1)) # ============================================================================== define substr { # get parameters if ($n_args < 1) return "" string = $1 startpos = 0 if ($n_args >= 2) startpos = $2 len = length(string) endpos = len if ($n_args >= 3) endpos = $3 if (startpos < 0) startpos += len if (endpos < 0) endpos += len if (startpos < 0) startpos = 0 if (endpos < 0) endpos = 0 return substring(string, startpos, endpos) } # ============================================================================== # replace_in_str(string, search_for, replace_with, [type]): calls # replace_in_string() with its parameters. If replace_in_string() fails, # returns the original string unchanged. # # Parameters: # $1 - original string in which patterns should be matched # $2 - patterns to replace # $3 - what to replace with (default "") # $4 - type of matching to use (default "literal") # ============================================================================== define replace_in_str { # get parameters if ($n_args < 2) return "" string = $1 search_for = $2 replace_with = "" if ($n_args >= 3) replace_with = $3 type = "literal" if ($n_args >= 4) type = $4 return replace_in_string(string, search_for, replace_with, type, "copy") } # ------------------------------------------------------------------------------ # subtext(str, left [, right] [, "file" | "document" | "string"]): returns a # substring of str, if str is not empty; returns a substring of the # document otherwise. Acts like the built-in substring() function. If str # might be empty, a final parameter can force the document-versus-string # input choice. # ------------------------------------------------------------------------------ define subtext { str = $1 left = $2 right = 0 have_right = 0 use_file = 0 if ($n_args > 2) { if (valid_number($3)) { right = $3 have_right = 1 } } # use which input? if ($n_args > 2 + have_right) { arg = $args[2 + have_right] use_file = (arg == "file" || arg == "document") } else if (str == "") use_file = 1 if (use_file) len = $text_length # use document text else len = length(str) if (!have_right) right = len if (left < 0) left = max(0, len + left) if (right < 0) right = max(0, len + right) left = min(left, len) right = min(right, len) if (left >= right) return "" if (str == "") return get_range(left, right) # use document text else return substring(str, left, right) } # ------------------------------------------------------------------------------ # search_text(str, ...): returns the results of a call to search() or # search_string() depending on whether the string str is empty, or whether # the keyword "file", "document" or "string" is seen as one of the # arguments beyond the third. # ------------------------------------------------------------------------------ define search_text { str = $1 pat = $2 pos = $3 type = "" direction = "" wrap = "" use_file = -1 # get other arguments for (argn = 4; argn <= $n_args; argn++) { arg = $args[argn] if (arg == "wrap" || \ arg == "nowrap") { if (wrap != "") search_text_wrap_value_assigned_twice() wrap = arg } else if (arg == "forward" || \ arg == "backward") { if (direction != "") search_text_direction_value_assigned_twice() direction = arg } else if (arg == "literal" || \ arg == "case" || \ arg == "word" || \ arg == "caseWord" || \ arg == "regex" || \ arg == "regexNoCase") { if (type != "") search_text_type_value_assigned_twice() type = arg } else if (arg == "file" || \ arg == "document") { if (use_file != -1) search_text_use_file_value_assigned_twice() use_file = 1 } else if (arg == "string") { if (use_file != -1) search_text_use_file_value_assigned_twice() use_file = 0 } else search_text_unknown_keyword_argument() } # set up defaults if (type == "") type = "literal" if (direction == "") direction = "forward" if (wrap == "") wrap = "nowrap" if (use_file == -1) use_file = (str == "") if (use_file) return search(pat, pos, type, direction, wrap) else return search_string(str, pat, pos, type, direction, wrap) } # ============================================================================== # base_to_int(base, str [, "strict"]): returns the value of the string as an # integer of base base. All (and only) valid base characters are # considered. Scanning stops at first invalid character, returning "" if # the argument "strict" is passed. For bases 2 to 36, the characters are # 0-9, a-z (case insignificant); for 37 to 64 they are the base64 # characters A-Z, a-z, 0-9, +, / and are case significant. (See RFC 1421, # RFC 2045.) Bases outside the range 2 - 64 are not allowed. # ============================================================================== define base_to_int { # get base base = $1 if (!(2 <= base && base <= 64)) return base_to_int_called_with_invalid_base() lc = "0123456789abcdefghijklmnopqrstuvwxyz" uc = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" # get parameter if ($n_args < 2) return 0 is_strict = 0 if ($n_args > 2) is_strict = ($3 == "strict") if (base > 36) { uc = b64 lc = b64 } pos = -1 val = 0 len = length($2) for (i = 0; i < len; ++i) { c = substring($2, i, i+1) pos = search_string(lc, c, 0) if (pos < 0) pos = search_string(uc, c, 0) if (pos < 0 || pos >= base) { pos = -1 break } val = (base * val) + pos } if (pos < 0 && is_strict) return "" return val } # ============================================================================== # oct_to_int(oct_str [, "strict"]): returns the value of the oct string. All # (and only) oct characters are considered. Scanning stops at first # non-oct character. If a second parameter is supplied and equals "strict" # all input characters must be oct digits, otherwise the function fails # with the string "". # ============================================================================== define oct_to_int { if ($n_args == 0) return 0 else if ($n_args == 1) return base_to_int(8, $1) else return base_to_int(8, $1, $2) } # ============================================================================== # hex_to_int(hex_str [, "strict"]): returns the value of the hex string. All # (and only) hex characters are considered. Scanning stops at first # non-hex character. If a second parameter is supplied and equals "strict" # all input characters must be hex digits, otherwise the function fails # with the string "". # ============================================================================== define hex_to_int { if ($n_args == 0) return 0 else if ($n_args == 1) return base_to_int(16, $1) else return base_to_int(16, $1, $2) } # ============================================================================== # hex_to_char(hex_str): returns the value of the hex string. All (and only) hex # characters are considered. # ============================================================================== define hex_to_char { i = hex_to_int($1) if (i <= 0 || i >= 256) return "" all = $EXTENSIONS_NM["chars_all"] return substring(all, i, i+1) } # ============================================================================== # hex2_to_str(hex_str): returns a string whose value is that of the # concatenation of characters whose hex values are listed, two digits at # a time. All non-digits in the input are ignored. A nul value is # translated to the unlikely string [[[---!NUL!---]]] # ============================================================================== define hex2_to_str { # remove all hex characters s = replace_in_string($1, "[^0-9A-Fa-f]|\n", "", "regex") res = "" i = 0 for (cc = substring(s, i, i+2); cc != ""; i += 2, cc = substring(s, i, i+2)) { if (cc == "00") res = res "[[[---!NUL!---]]]" else res = res hex_to_char(cc) } return res } # ============================================================================== # base64_to_chars(str [, b62, b63]): decodes string str in base64 as a sequence # of byte valued characters returned. The +/ values for 62 and 63 can be # replaced. All (and only) valid base characters are considered, except # that whitespace is ignored. Scanning stops at first invalid input # character, or the terminator. (See RFC1421, RFC 2045.) It also stops if # a zero character is found in the output. # ============================================================================== define base64_to_chars { str = $1 b0_61 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" b62 = "+" b63 = "/" if ($n_args >= 2) b62 = $2 if ($n_args >= 3) b63 = $3 if (length(b62) != 1|| !isgraph(b62)|| !isascii(b62)|| isinstring(b62, b0_61)) return base64_to_chars_called_with_invalid_b62() if (length(b63) != 1|| !isgraph(b63)|| !isascii(b63)|| isinstring(b63, b0_61)\ || b62 == b63) return base64_to_chars_called_with_invalid_b63() b64 = b0_61 b62 b63 str = replace_in_string(str, "(?n\\s+)", "", "regex", "copy") inp = replace_in_string(str, "[^"b0_61"\\"b62"\\"b63"].*", "", "regex","copy") s06 = 64 s12 = 64 * s06 s18 = 64 * s12 len = length(inp) res = "" for (i = 0; i < len; i += 4) { # eat off 4 chars at a time, to make 3 on output A = search_string(b64, substring(inp, i, i+1), 0) B = search_string(b64, substring(inp, i+1, i+2), 0) C = search_string(b64, substring(inp, i+2, i+3), 0) D = search_string(b64, substring(inp, i+3, i+4), 0) if (A == -1) A = 0 if (B == -1) B = 0 if (C == -1) C = 0 if (D == -1) D = 0 i = A * s18 + B * s12 + C * s06 + D z = i & 255 i /= 256 y = i & 255 i /= 256 x = i & 255 r = tochar(x) tochar(y) tochar(z) res = res r if (length(r) < 3) break } return res } # ============================================================================== # chars_to_base64(str [, b62, b63]): codes string str to the returned base64 # sequence. The +/ values for 62 and 63 can be replaced. The sequence is # broken with a new line every 64 coding characters. (See RFC1421, # RFC 2045.) # ============================================================================== define chars_to_base64 { str = $1 b0_61 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" b62 = "+" b63 = "/" if ($n_args >= 2) b62 = $2 if ($n_args >= 3) b63 = $3 if (length(b62) != 1|| !isgraph(b62)|| !isascii(b62)|| isinstring(b62, b0_61)) return chars_to_base64_called_with_invalid_b62() if (length(b63) != 1|| !isgraph(b63)|| !isascii(b63)|| isinstring(b63, b0_61)\ || b62 == b63) return chars_to_base64_called_with_invalid_b63() b64 = b0_61 b62 b63 str = replace_in_string(str, "(?n\\s+)", "", "regex", "copy") inp = replace_in_string(str, "[^"b0_61"\\"b62"\\"b63"].*", "", "regex","copy") s08 = 64 s16 = 64 * s08 len = length(inp) res = "" for (i = 0; i < len; i += 3) { # eat off 3 chars at a time, to make 4 on output A = toascii(substring(inp, i, i+1)) B = toascii(substring(inp, i+1, i+2)) C = toascii(substring(inp, i+2, i+3)) if (A == -1) A = 0 if (B == -1) B = 0 if (C == -1) C = 0 i = A * s16 + B * s08 + C z = i & 63 i /= 64 y = i & 63 i /= 64 x = i & 63 i /= 64 w = i & 63 r = substring(b63, w, w+1) substring(b63, x, x+1) \ substring(b63, y, y+1) substring(b63, z, z+1) if (!B) r = substring(r, 0, 2) else if (!C) r = substring(r, 0, 3) res = res r } return res } # ============================================================================== # int_to_binbase(bits, digits, i, mindigits): returns the binary representation # of the integer i in the base of 2^bits. Values are treated as unsigned; # this will never have a minus sign. digits is a string containing the # digits to use, in order, starting with the zero digit. Used by # int_to_hex() and int_to_oct(). # ============================================================================== define int_to_binbase { bits = $1 digits = $2 i = $3 mindigits = $4 carry = (i < 0) max_len = 1 res = "" acc = 0 accbit = 1 accbits = 0 while ((mindigits > 0 || i != 0) && max_len != 0) { p = (i % 2) != 0 i -= p & carry acc += (p * accbit) accbit *= 2 ++accbits if (accbits == bits) { res = substring(digits, acc, acc + 1) res accbits = 0 acc = 0 accbit = 1 --mindigits } max_len *= 2 # shift up i = i / 2 } if (acc > 0) { res = substring(digits, acc, acc + 1) res --mindigits } while (mindigits-- > 0) { res = substring(digits, 0, 1) res } return res } # ============================================================================== # int_to_hex(int, mindigits): returns the hex representation of the integer. # This will never have a minus sign. By default, mindigits is set to one. # Hex letter "digits" will be lowercase: use int_to_HEX() for uppercase. # ============================================================================== define int_to_hex { mindigits = 1 int = $1 if ($n_args > 1) mindigits = $2 if (mindigits < 1) mindigits = 1 return int_to_binbase(4, "0123456789abcdef", int, mindigits) } # ============================================================================== # int_to_HEX(int, mindigits): returns the hex representation of the integer. # This will never have a minus sign. By default, mindigits is set to one. # Hex letter "digits" will be uppercase: use int_to_hex() for lowercase. # ============================================================================== define int_to_HEX { mindigits = 1 int = $1 if ($n_args > 1) mindigits = $2 if (mindigits < 1) mindigits = 1 return int_to_binbase(4, "0123456789ABCDEF", int, mindigits) } # ============================================================================== # int_to_oct(int, mindigits): returns the octal representation of the integer. # This will never have a minus sign. By default, mindigits is set to one. # ============================================================================== define int_to_oct { mindigits = 1 int = $1 if ($n_args > 1) mindigits = $2 if (mindigits < 1) mindigits = 1 return int_to_binbase(3, "01234567", int, mindigits) } # ============================================================================== # int_to_bin(int, mindigits): returns the binary representation of the integer. # This will never have a minus sign. By default, mindigits is set to one. # ============================================================================== define int_to_bin { mindigits = 1 i = $1 if ($n_args > 1) mindigits = $2 if (mindigits < 1) mindigits = 1 carry = (i < 0) max_len = -1 # assume all 1s res = "" while ((mindigits > 0 || i != 0) && max_len != 0) { p = (i % 2) != 0 res = p res i -= p & carry mindigits-- max_len *= 2 # shift up i = i / 2 } while (mindigits-- > 0) res = "0" res return res } # ============================================================================== # char_to_hex(chars [, prefix]): returns the hex values of each of the # characters passed. Each character is transformed into 2 hex digits. # A prefix can be added in front of each character, eg "\\x". # Hex letter "digits" will be lowercase: use char_to_HEX() for uppercase. # ============================================================================== define char_to_hex { res = "" prefix = "" if ($n_args > 1) prefix = $2 all = $EXTENSIONS_NM["chars_all"] i = 0 for (c = substring($1, i, i+1); c != ""; ++i, c = substring($1, i, i+1)) { pos = search_string(all, c, 1, "case") if (pos >= 1) { res = res prefix int_to_hex(pos, 2) } } return res } # ============================================================================== # char_to_HEX(chars [, prefix]): returns the hex values of each of the # characters passed. Each character is transformed into 2 hex digits. # A prefix can be added in front of each character, eg "\\x". # Hex letter "digits" will be uppercase: use char_to_hex() for lowercase. # ============================================================================== define char_to_HEX { res = "" prefix = "" if ($n_args > 1) prefix = $2 all = $EXTENSIONS_NM["chars_all"] i = 0 for (c = substring($1, i, i+1); c != ""; ++i, c = substring($1, i, i+1)) { pos = search_string(all, c, 1, "case") if (pos >= 1) { res = res prefix int_to_HEX(pos, 2) } } return res } # ============================================================================== # char_to_oct(chars [, prefix]): returns the octal values of each of the # characters passed. Each character is transformed into 3 octal digits. # A prefix can be added in front of each character, eg "\\". # ============================================================================== define char_to_oct { res = "" prefix = "" if ($n_args > 1) prefix = $2 all = $EXTENSIONS_NM["chars_all"] i = 0 for (c = substring($1, i, i+1); c != ""; ++i, c = substring($1, i, i+1)) { pos = search_string(all, c, 1, "case") if (pos >= 1) { res = res prefix int_to_oct(pos, 3) } } return res } # ============================================================================== # start_of_line_pos([pos [, string]]): returns the position of the start of the # line containing pos (default $cursor) - ie the position following the # previous newline. If string is supplied, it is used instead of the # current document. # ============================================================================== define start_of_line_pos { if ($n_args < 1) pos = $cursor else pos = $1 if ($n_args < 2) p = search("\n", pos - 1, "case", "backward") else p = search_string($2, "\n", pos - 1, "case", "backward") if (p < 0) return 0 return $search_end } # ============================================================================== # end_of_line_pos([pos [, string]]): returns the position of the end of the line # containing pos (default $cursor) - ie the position of the next newline. # If string is supplied, it is used instead of the current document. # ============================================================================== define end_of_line_pos { if ($n_args < 1) pos = $cursor else pos = $1 if ($n_args < 2) { e = search("\n", pos, "case") if (e < 0) return $text_length } else { e = search_string($2, "\n", pos, "case") if (e < 0) return length($2) } return e } # ============================================================================== # line_of_pos([pos [, string]]): returns the line containing the position pos # (default $cursor) as a string without a trailing newline. If string is # supplied, it is used instead of the current document. # ============================================================================== define line_of_pos { if ($n_args < 1) pos = $cursor else pos = $1 if ($n_args < 2) { e = end_of_line_pos(pos) # find end of THIS line pe = search("\n", e - 1, "case", "backward") # now find the previous one } else { e = end_of_line_pos(pos, $2) pe = search_string($2, "\n", e - 1, "case", "backward") } if (pe >= 0) b = $search_end else b = 0 return get_range(b, e) } # ============================================================================== # break_lines_over(maxchars, string [, pref]): tries to break string at word # boundaries adding newlines followed by the string pref (if present). # Returns the modified string. # ============================================================================== define break_lines_over { maxchars = $1 string = $2 if ($n_args > 2) pref = $3 else pref = "" res = "" while (length(string) > maxchars) { nlpos = search_string(string, "\n", 0) if (0 <= nlpos && nlpos <= maxchars) { res = res substring(string, 0, nlpos + 1) string = substring(string, nlpos + 1) } else { nlpos = search_string(string, ">", maxchars, "regex", "backward") if (!(0 <= nlpos && nlpos <= maxchars)) { nlpos = maxchars } res = res substring(string, 0, nlpos) "\n" string = substring(string, nlpos) } } res = res string return res } # ============================================================================== # quote_literal_as_c(string): returns a string representing a valid C # string for the parameter passed. # ============================================================================== define quote_literal_as_c { string = $1 res = "" len = length(string) for (i = 0; i < len; i++) { c = substring(string, i, i + 1) if (search_string("\\'\"", c, 0, "case") >= 0) res = res "\\" c else if (c == "\a") res = res "\\a" else if (c == "\b") res = res "\\b" else if (c == "\f") res = res "\\f" else if (c == "\n") res = res "\\n" else if (c == "\r") res = res "\\r" else if (c == "\t") res = res "\\t" else if (c == "\v") res = res "\\v" else if (!(isprint(c) && isascii(c))) res = res char_to_hex(c, "\\x") else res = res c } return res } # ============================================================================== # quote_literal_as_regex(string): returns a string representing a valid regex # search string for the parameter passed. # ============================================================================== define quote_literal_as_regex { string = $1 res = "" len = length(string) for (i = 0; i < len; i++) { c = substring(string, i, i + 1) if (search_string("\\|()[]{}<>.*+?^$&-", c, 0, "case") >= 0) res = res "\\" c else if (c == "\a") res = res "\\a" else if (c == "\b") res = res "\\b" else if (c == "\e") res = res "\\e" else if (c == "\f") res = res "\\f" else if (c == "\n") res = res "\\n" else if (c == "\r") res = res "\\r" else if (c == "\t") res = res "\\t" else if (c == "\v") res = res "\\v" else res = res c } return res } # ============================================================================== # quote_literal_as_subst(string): returns a string representing a valid # replace substitution string for the parameter passed. # ============================================================================== define quote_literal_as_subst { string = $1 res = "" len = length(string) for (i = 0; i < len; i++) { c = substring(string, i, i + 1) if (search_string("\\&", c, 0, "case") >= 0) res = res "\\" c else if (c == "\a") res = res "\\a" else if (c == "\b") res = res "\\b" else if (c == "\e") res = res "\\e" else if (c == "\f") res = res "\\f" else if (c == "\n") res = res "\\n" else if (c == "\r") res = res "\\r" else if (c == "\t") res = res "\\t" else if (c == "\v") res = res "\\v" else res = res c } return res } # ============================================================================== # quote_literal_for_shell_esc(string): returns a string representing the input # string with backslash escapes and/or octal escapes. It quotes the # following with a backslash: # space ! " # $ & ' ( ) * ; < > ? [ \ ] ^ ` { | } ~ # It "C-style" backslash-escapes the following control characters: # bell (\a), back-space (\b), form-feed (\f), new-line/line-feed (\n), # carriage-return (\r), tab (\t), vertical-tab (\v) # It "C-style" octal-escapes the other characters that fall into the # iscntrl(ch) group, as defined by the iscntrl() macro function. # ============================================================================== define quote_literal_for_shell_esc { string = $1 res = "" len = length(string) for (i = 0; i < len; i++) { c = substring(string, i, i + 1) if (search_string(" !\"#$&'()*;<>?[\\]^`{|}~", c, 0, "case") >= 0) res = res "\\" c else if (c == "\a") res = res "\\a" else if (c == "\b") res = res "\\b" else if (c == "\f") res = res "\\f" else if (c == "\n") res = res "\\n" else if (c == "\r") res = res "\\r" else if (c == "\t") res = res "\\t" else if (c == "\v") res = res "\\v" else if (iscntrl(c)) res = res char_to_oct(c, "\\") else res = res c } return res } # ============================================================================== # regex_to_quoted_string(regex): adds quotes and backslashes to convert a # string containing a valid regex into one usable in NEdit Macro code. # ============================================================================== define regex_to_quoted_string { string = $1 res = "" len = length(string) for (i = 0; i < len; i++) { c = substring(string, i, i + 1) c2 = substring(string, i, i + 2) if (search_string(c2, "^\\\\[abefnrtv]", 0, "regex") == 0) { res = res c2 # already a back-slash escaped control char sequence i++ # skip the next character } else if (search_string("\"\\", c, 0, "case") >= 0) res = res "\\" c # quote that quote or back-slash else if (c == "\a") res = res "\\a" # convert control char to escape seq else if (c == "\b") res = res "\\b" else if (c == "\e") res = res "\\e" else if (c == "\f") res = res "\\f" else if (c == "\n") res = res "\\n" else if (c == "\r") res = res "\\r" else if (c == "\t") res = res "\\t" else if (c == "\v") res = res "\\v" else res = res c # leave anything else alone } return "\"" res "\"" } # ============================================================================== # regex_capturing_parens(re): counts the number of regular expression capturing # parentheses in a string. These are non-escaped open-parentheses not # followed by a question mark. It does not check for valid regexes. # ============================================================================== define regex_capturing_parens { re = $1 "" group_paren = "(?= 0; \ pos = search_string(re, group_paren, $search_end, "regex")) { ++i } return i } # ============================================================================== # unquote(str): returns the result of removing quoting backslashes from str. It # only understands the special control character backslash sequences and # backslash itself. Otherwise, backslashes are removed. # ============================================================================== define unquote { str = $1 "" group_paren = "(?= 0; \ pos = search_string(str, "\\", end)) { res = res substring(str, end, pos) end = $search_end next = substring(str, end, end + 1) ind = search_string("abefnrtv\\", 0, next, "case") if (ind >= 0) { end++ res = res substring("\a\b\e\f\n\r\t\v\\", ind, ind + 1) } } res = res substring(str, end) return res } # ============================================================================== # line_col_to_pos(lineNum [, colNum [, tabSize [, string]]]): returns the # position of the character position indicated by lineNum and colNum, # assuming a particular tabSize. By default, colNum is zero and tabSize # equals $tab_dist. Lines are numbered from 1, columns from zero. If there # are not enough lines, -1 is returned; if there are not enough columns in # the addressed line, the position of the last character is returned; if # the column is "inside" a tab, return the tab's position. If string is # present, measurements take place within it; otherwise the current # document is used. # ============================================================================== define line_col_to_pos { lineNum = $1 colNum = 0 if ($n_args >= 2 && $2 >= 0) colNum = $2 tabSize = $tab_dist if ($n_args >= 3 && $3 >= 0) tabSize = $3 useWin = ($n_args <= 3) if (useWin) string = "" else string = $4 bufferPos = 0 if (lineNum > 1) { if (useWin) res = search("(?:^.*\n){" lineNum - 1 "}", 0, "regex") else res = search_string(string, "(?:^.*\n){" lineNum - 1 "}", 0, "regex") if (res >= 0) bufferPos = $search_end else bufferPos = -1 } if (bufferPos >= 0 && colNum > 0) { if (useWin) res = search("^.*$", bufferPos, "regex") else res = search_string(string, "^.*$", bufferPos, "regex") if (res >= 0) { last = $search_end pos = bufferPos end = bufferPos col = 0 while (pos < last) { if (useWin) nexttab = search("\t", pos) else nexttab = search_string(string, "\t", pos) if (nexttab < pos || nexttab > last) nexttab = last if (nexttab == pos) { # pos is at a tab: if the tab extends beyond colNum, return this pos nextCol = col + tabSize - (col % tabSize) if (nextCol > colNum) return pos pos++ # skip the tab } else { # pos is at a non-tab: if the non-tab sequence extends beyond colNum, # we have an overrun of col + nexttab - pos - colNum; # return end-of-non-tab-sequence - overrun nextCol = col + nexttab - pos if (nextCol >= colNum) return nexttab - (nextCol - colNum) pos = nexttab # skip to next tab } # move column count forward col = nextCol } # not found during the loop return last } } return bufferPos } # ============================================================================== # str_line_col_to_pos(string, lineNum [, colNum [, tabSize]]): returns the # position of the character position indicated by lineNum and colNum, # assuming a particular tabSize. By default, colNum is zero and tabSize # equals $tab_dist. Lines are numbered from 1, columns from zero. If there # are not enough lines, -1 is returned; if there are not enough columns in # the addressed line, the position of the last character is returned; if # the column is "inside" a tab, return the tab's position. All # measurements take place within string. This function calls # line_col_to_pos(lineNum, colNum, tabSize, string) with appropriate # defaults. # ============================================================================== define str_line_col_to_pos { string = $1 lineNum = $2 colNum = 0 if ($n_args >= 3 && $3 >= 0) colNum = $3 tabSize = $tab_dist if ($n_args >= 4 && $4 >= 0) tabSize = $4 return line_col_to_pos(lineNum, colNum, tabSize, string) } # ============================================================================== # pos_to_line_forwardNlines(pos, nLines [, string]): returns the position of # the start of the line nLines away; fails with -1. If string is supplied # the search is performed in the string, otherwise in the current window. # ============================================================================== define pos_to_line_forwardNlines { pos = $1 nLines = $2 + 0 useWin = 1 string = "" lim = $text_length if ($n_args > 2) { useWin = 0 string = $3 lim = length($3) } if (nLines == 0) return pos if (nLines < 0) return -1 rx = "(?:.*\n){" nLines "}" if (useWin) { if (search(rx, pos, "regex") >= 0) return $search_end } else if (search_string(string, rx, pos, "regex") >= 0) return $search_end return -1 } # ============================================================================== # pos_to_line(pos [, string]): returns the line number (counted from 1) of the # position pos. If pos is too large, this returns the last line number # for the document (so the minimum value is 1). If string is present, # measurement takes place in the string; otherwise the current document # is used. # ============================================================================== define pos_to_line { line = 1 delta = 1 pos = 0 posNum = $1 useWin = ($n_args < 2) if (useWin) { string = "" len = $text_length } else { string = $2 len = length(string) } if (posNum >= len) posNum = len # get start of line containing position $1 in string if (useWin) npos = search("\n", posNum - 1, "backward") else npos = search_string(string, "\n", posNum - 1, "backward") if (npos >= 0) posNum = $search_end else posNum = 0 # on the first line, no earlier "\n" while (pos < posNum) { if (useWin) npos = pos_to_line_forwardNlines(pos, delta) else npos = pos_to_line_forwardNlines(pos, delta, string) # dialog("pos=" pos " < posNum=" posNum "\ndelta=" delta \ # "\nnew pos: " npos "\nline: " line "\nnew line: " (line + delta)) if (npos > posNum || npos == -1) delta /= 2 else { line += delta pos = npos # avoid backtracking too far in pos_to_line_forwardNlines() if (delta < 64) delta *= 2 } } return line } # old in-window only version of pos_to_line: # define pos_to_line__ # { # line = 0 # pos = 0 # posNum = $1 # # if (posNum >= $text_length) # posNum = $text_length # # while (pos <= posNum) # { # line++ # if (search("\n", pos) < 0) # break # else # pos = $search_end # } # # return line # } # alternate version using string replacement to count newlines: # define pos_to_line___ # { # pos = $1 # if ($n_args > 1) # str = substring($2, 0, pos) # else # str = get_range(0, pos) # # s = replace_in_string(str, ".*", "", "regex") # return 1 + length(s) # } # ============================================================================== # pos_to_column(pos [, tabSize [, string]]): returns the column number (counted # from 0) of the position pos in its line. If pos is too large, this # returns the last column of the last line for the document (so the # minimum value is 1). If string is present, measurements are taken # within string; otherwise within the current document. # ============================================================================== define pos_to_column { posNum = $1 tabSize = $tab_dist if ($n_args >= 2) tabSize = $2 useWin = ($n_args <= 2) if (useWin) { string = "" text_length = $text_length # find previous start of line pos = search("^", posNum, "regex", "backward") } else { string = $3 text_length = length(string) # find previous start of line pos = search_string(string, "^", posNum, "regex", "backward") } if (posNum > text_length) posNum = text_length col = 0 end = pos while (end < posNum) { # include "$" to search to stop searching too far beyond area of interest # note that the end-of-line will always be at or beyond posNum if (useWin) pos = search("\t+|$", end, "regex") else pos = search_string(string, "\t+|$", end, "regex") # search will never fail: it will always find $ (since posNum <= length) if (pos > posNum) # we searched too far, so we're done return col + posNum - end col += pos - end # add normal chars from last end to tab end = $search_end # end of tab sequence if (end > posNum) # is posNum in the sequence? end = posNum # yes - so consider only to posNum col += tabSize * (end - pos) # account for tab sequence col -= col % tabSize # and adjust end column to tab boundary } return col } # ============================================================================== # colwidth(s [, colpos [, tab_dist]]): returns the width in columns of a string # s positioned at column colpos in a line (defaults to zero), and uses a # tab width of tab_dest (defaults to $tab_dest) - it assumes no control # characters in the string other than tab, and no newlines. # ============================================================================== define colwidth { s = $1 if ($n_args > 1) colpos = $2 else colpos = 0 if ($n_args > 2) tab_dist = $3 else tab_dist = $tab_dist etab = 0 last = 0 col = colpos for (tab = search_string(s, "\t+", 0, "regex"); \ tab >= 0; \ tab = search_string(s, "\t+", etab, "regex")) { etab = $search_end # add non-tab char widths col += tab - last # add variable width of first tab in tab sequence col += tab_dist - (col % tab_dist) # add width of following tabs in tab sequence col += (etab - tab - 1) * tab_dist } # add width of chars following last tab col += length(s) - etab # finally, remove our start column position return col - colpos } # ============================================================================== # get_word_at_pos([pos [, string [, rebeg, reend]]]) - return the word at # position pos in the string string. If string is not given, use the # current document's text; if pos is not given, use the current cursor # position in the document. If the position is not at the start, inside, # or at the end of a word, an empty string is returned. # Words are found using the regular expressions rebeg, reend, if # supplied; by default "<" and ">" are used. If rebeg, reend are given, # and string is empty, use the current document's text. For example, # rebeg = "(?<=\\s|^)(?=\\S)", reend = "(?<=\\S)(?=\\s|$)" will pick out # a non-space sequence, and rebeg = "^", reend = "$" will pick out a line. # ============================================================================== define get_word_at_pos { pos = $cursor string = "" use_string = 0 rebeg = "<" reend = ">" if ($n_args >= 1) pos = $1 + 0 if ($n_args >= 2) { string = $2 "" use_string = 1 } if ($n_args >= 4) { use_string = (string != "") rebeg = $3 "" reend = $4 "" } # look to start and end of previous word; this must overlap pos if (use_string == 0) { beg = search(rebeg, pos, "regex", "backward") end = search(reend, beg, "regex") word = get_range(beg, end) } else { beg = search_string(string, rebeg, pos, "regex", "backward") end = search_string(string, reend, beg, "regex") word = substring(string, beg, end) } if (beg <= pos && pos <= end) return word return "" # failed } # ============================================================================== # longest_line_len(strs): returns the length of the longest line in the string # strs. # ============================================================================== define longest_line_len { strs = split($1, "\n") len = 0 # measure longest line, and maintain padding string of that length nstr = strs[] for (i = 0; i < nstr; i++) len = max(len, length(strs[i])) return len } # ============================================================================== # longest_line(strs): returns the longest line in the string strs. # ============================================================================== define longest_line { strs = split($1, "\n") maxlen = 0 str = "" # measure longest line, and maintain padding string of that length for (i in strs) { s = strs[i] len = length(s) if (len > maxlen) { str = s maxlen = len } } return str } # ============================================================================== # longest_line_base_re(strs): returns information about the longest line in the # string strs. The line is found using regular expressions. The result is # an array with elements # ["start"] position of the first character of the line # ["end"] position of the line's newline (or end-of-string) # ["line"] the line's content # ["length"] the line's length # ============================================================================== define longest_line_base_re { have = 0 want = 1 strs = $1 found = 0 pos = 0 while (found == 0) { RE = "^.{" (have + want) "}" newpos = search_string(strs, RE, pos, "regex") if (newpos < 0) # no line with that many columns { if (want > 1) { have += want / 2 # previous iteration worked though want = 1 # start with low want again } else found = 1 } else { want *= 2 # last one worked; try again with more pos = newpos # but only search from the last match } } res["start"] = pos res["end"] = pos + have res["line"] = substring(strs, pos, pos + have) res["length"] = have return res } # ============================================================================== # longest_line_len_re(strs): returns the length of the longest line in the # string strs. The line is found using regular expressions. Uses # longest_line_base_re(). # ============================================================================== define longest_line_len_re { return longest_line_base_re($1)["length"] } # ============================================================================== # longest_line_re(strs): returns the longest line in the string strs. The line # is found using regular expressions. Uses longest_line_base_re(). # ============================================================================== define longest_line_re { return longest_line_base_re($1)["line"] } # ============================================================================== # rjust_s(strs [, maxstr]): returns a string holding each line of str padded # with spaces to the left so that it is as long as maxstr, or as long as # the longest line in strs. # ============================================================================== define rjust_s { strs = split($1, "\n") # set up default width and padding string if ($n_args > 1) maxstr = replace_in_string($2, "(?n.)", " ", "regex") else maxstr = "" width = length(maxstr) # measure longest line, and maintain padding string of that length nstr = strs[] for (i = 0; i < nstr; i++) { str = strs[i] w = length(str) if (width < w) { width = w maxstr = replace_in_string(str, ".", " ", "regex") } } # assemble result res = "" nl = "" for (i = 0; i < nstr; i++) { str = strs[i] w = length(str) res = res nl substring(maxstr str, w, width + w) nl = "\n" } return res } # ============================================================================== # rjust(strs [, minlen]): returns a string holding each line of str padded with # spaces to the left so that it is minlen long, or as long as the longest # line in strs. Calls rjust_s(). # ============================================================================== define rjust { strs = split($1, "\n") pads = "1234567890" minlen = 0 if ($n_args > 1) { if (valid_number($2)) { minlen = $2 if (minlen < 0) minlen = 0 # grow pags till big enough/too big while (length(pads) < minlen) pads = pads pads # crop it down to the right size pads = substring(pads, 0, minlen) } else { minlen = length($2) pads = $2 } } return rjust_s($1, pads) } # ============================================================================== # ljust_s(strs [, maxstr]): returns a string holding each line of str padded # with spaces to the right so that it is as long as maxstr, or as long as # the longest line in strs. # ============================================================================== define ljust_s { strs = split($1, "\n") # set up default width and padding string if ($n_args > 1) maxstr = replace_in_string($2, "(?n.)", " ", "regex") else maxstr = "" width = length(maxstr) # measure longest line, and maintain padding string of that length nstr = strs[] for (i = 0; i < nstr; i++) { str = strs[i] w = length(str) if (width < w) { width = w maxstr = replace_in_string(str, ".", " ", "regex") } } # assemble result res = "" nl = "" for (i = 0; i < nstr; i++) { str = strs[i] w = length(str) res = res nl substring(str maxstr, 0, width) nl = "\n" } return res } # ============================================================================== # ljust(strs [, minlen]): returns a string holding each line of str padded with # spaces to the ightt so that it is minlen long, or as long as the longest # line in strs. Calls ljust_s(). # ============================================================================== define ljust { strs = split($1, "\n") pads = "12345678901234567890" minlen = 0 if ($n_args > 1) { if (valid_number($2)) { minlen = $2 if (minlen < 0) minlen = 0 # grow pags till big enough/too big while (length(pads) < minlen) pads = pads pads # crop it down to the right size pads = substring(pads, 0, minlen) } else { minlen = length($2) pads = $2 } } return ljust_s($1, pads) } # ============================================================================== # trim(strs[, re[, nlSense]]): returns a string holding each line of strs with # ends matching "(?"nlSense"^(?:"re"))" and "(?"nlSense"(?:"re")$)" # removed. The default value of re is "\\s+", and of nlSense is "N". # ============================================================================== define trim { strs = $1 re = "\\s+" nlSense = "N" if ($n_args >= 2) re = $2 if ($n_args >= 3) nlSense = $3 # strip the fronts of lines strs = replace_in_string(strs, "(?"nlSense"^(?:"re"))", "", "regex", "copy") # and the ends strs = replace_in_string(strs, "(?"nlSense"(?:"re")$)", "", "regex", "copy") return strs } # ============================================================================== # chomp(str[, re]): returns a string made from the content of str with the tail # corresponding to re removed. This is done within a "(?n(?:...)$)" group, # so only the end of the whole string is affected. The default value of re # is "\n". # ============================================================================== define chomp { str = $1 re = "\n" if ($n_args >= 2) re = $2 # strip the end str = replace_in_string(str, "(?n(?:"re")$)", "", "regex", "copy") return str } # ============================================================================== # compress(strs[, re[, repl[, nlSense]]]): returns a string holding each line of # strs with each sequence identified as "(?"nlSense"(?:"re")+)" replaced # with repl. The default value of re is "\\s", of repl is " ", and of # nlSense is "N". # ============================================================================== define compress { strs = $1 re = "\\s" repl = " " nlSense = "N" if ($n_args >= 2) re = $2 if ($n_args >= 3) repl = $3 if ($n_args >= 4) nlSense = $4 # do replacement strs = replace_in_string(strs, "(?"nlSense"(?:"re")+)", repl, "regex", "copy") return strs } # ============================================================================== # repeat(str, n): returns a string built by repeating str n times. It uses # string doubling to reduce concatenation operations. # ============================================================================== define repeat { str = $1 "" n = $2 if (n < 1) return "" if (n == 1) return str res = "" for (;;) { if (n % 2 == 1) res = res str n /= 2 if (n) str = str str else break } return res } # ============================================================================== # reverse_string(str): returns a string built by reversing the order of # characters in str. # ============================================================================== define reverse_string { str = $1 "" res = "" n = length(str) while (n--) res = res substr(str, n, n + 1) return res } # ============================================================================== # number(string [, "strict"|defval]): returns the numeric value read from the # front of the string argument. Fails if no leading number was found, with # defval (which defaults to zero), or, if "strict" is present, with an # invalid function call. [IDENTICAL TO THE to_number() FUNCTION] # ============================================================================== define number { s = $1 defval = 0 if ($n_args > 1 && $2 != "strict") defval = number($2) s = replace_in_string(s, "(?n^\\s*([-+]?\\d+).*)", "\\1", "regex") if (!valid_number(s)) { if ($n_args > 1 && $2 == "strict") s = number_NoNumericPrefixFound() else s = defval } return s + 0 } # ============================================================================== # to_number(string [, "strict"|defval]): returns the numeric value read from the # front of the string argument. Fails if no leading number was found, with # defval (which defaults to zero), or, if "strict" is present, with an # invalid function call. [IDENTICAL TO THE number() FUNCTION] # ============================================================================== define to_number { s = $1 defval = 0 if ($n_args > 1 && $2 != "strict") defval = to_number($2) s = replace_in_string(s, "(?n^\\s*([-+]?\\d+).*)", "\\1", "regex") if (!valid_number(s)) { if ($n_args > 1 && $2 == "strict") s = number_NoNumericPrefixFound() else s = defval } return s + 0 } # ============================================================================== # String comparison functions # ============================================================================== # nuls(a): returns true if string a == "" define nuls { return !length($1) } # nz(a): returns true if string a != "" (in fact this returns length(a)) define nz { return length($1) } # ------------------------------------------------------------------------------ # eqs(a, b): returns true if scalars a and b compare as equal strings define eqs { return string_compare($1, $2) == 0 } # nes(a, b): returns true if scalars a and b compare as non-equal strings define nes { return string_compare($1, $2) != 0 } # lts(a, b): returns true if scalars a and b string-compare as a < b define lts { return string_compare($1, $2) < 0 } # les(a, b): returns true if scalars a and b string-compare as a <= b define les { return string_compare($1, $2) <= 0 } # gts(a, b): returns true if scalars a and b string-compare as a > b define gts { return string_compare($1, $2) > 0 } # ges(a, b): returns true if scalars a and b string-compare as a >= b define ges { return string_compare($1, $2) >= 0 } # ------------------------------------------------------------------------------ # eqsi(a, b): true if strings a and b compare as a == b (case insignificant) define eqsi { return string_compare($1, $2, "nocase") == 0 } # nesi(a, b): true if strings a and b compare as a != b (case insignificant) define nesi { return string_compare($1, $2, "nocase") != 0 } # ltsi(a, b): true if strings a and b compare as a < b (case insignificant) define ltsi { return string_compare($1, $2, "nocase") < 0 } # lesi(a, b): true if strings a and b compare as a <= b (case insignificant) define lesi { return string_compare($1, $2, "nocase") <= 0 } # gtsi(a, b): true if strings a and b compare as a > b (case insignificant) define gtsi { return string_compare($1, $2, "nocase") > 0 } # gesi(a, b): true if scalars a and b compare as a >= b (case insignificant) define gesi { return string_compare($1, $2, "nocase") >= 0 } # ============================================================================== # Numeric string comparison functions # ============================================================================== # eq(a, b): true if strings a and b compare as a == b (numerically) define eq { return to_number($1) == to_number($2) } # ne(a, b): true if strings a and b compare as a != b (numerically) define ne { return to_number($1) != to_number($2) } # lt(a, b): true if strings a and b compare as a < b (numerically) define lt { return to_number($1) < to_number($2) } # le(a, b): true if strings a and b compare as a <= b (numerically) define le { return to_number($1) <= to_number($2) } # gt(a, b): true if strings a and b compare as a > b (numerically) define gt { return to_number($1) > to_number($2) } # ge(a, b): true if scalars a and b compare as a >= b (numerically) define ge { return to_number($1) >= to_number($2) } # ------------------------------------------------------------------------------ # versions of "between" # ------------------------------------------------------------------------------ # lt_lt(a, b, c): true if a < b < c define lt_lt { return lt($1, $2) && lt($2, $3) } # lt_le(a, b, c): true if a < b <= c define lt_le { return lt($1, $2) && le($2, $3) } # le_le(a, b, c): true if a <= b <= c define le_le { return le($1, $2) && le($2, $3) } # le_lt(a, b, c): true if a <= b < c define le_lt { return le($1, $2) && lt($2, $3) } # between(a, b, c): true if a <= b <= c or c <= b <= a define between { return le_le($1, $2, $3) || le_le($3, $2, $1) } # ------------------------------------------------------------------------------ # bit manipulation # ------------------------------------------------------------------------------ # compl(a): returns the result of the bitwise complement of a define compl { return -$1 - 1 } # xor(a, b): returns the result of bitwise a xor b define xor { a = $1 b = $2 return (a | b) & compl(a & b) } # lshift(val, bits): return (val << bits); if bits < 0, rshift() is used define lshift { val = $1 bits = $2 if (bits < 0) return rshift(val, -bits) while (bits-- > 0) val *= 2 return val } # rshift(val, bits): return (val >> bits) with sign extension; if bits < 0, # lshift() is used define rshift { val = $1 bits = $2 if (bits < 0) return lshift(val, -bits) while (bits-- > 0) val /= 2 return val } # urshift(val, bits): return (val >> bits) with zero extension; if bits < 0, # lshift() is used define urshift { val = $1 bits = $2 if (bits < 0) return lshift(val, -bits) if (bits-- > 0) { if (val < 0) { topbit = 1 # calculate top bit while (topbit > 0) topbit *= 2 val /= 2 val &= compl(topbit) # remove top bit } else val /= 2 } while (bits-- > 0) val /= 2 return val } # ============================================================================== # Matching functions # ============================================================================== # ============================================================================== # match_as(text, pattern, type): returns true if pattern is found in the text. # If so, it saves various values in a global array. Otherwise, if the # pattern is not found, these values are deleted and the function returns # false (0). # Matching is done using search_string() with type passed as the search # type. # Retrieve the stored values using matched_prefix(), matched_suffix() # matched_text(), matched_start() and matched_end(). # # The following call match_as(): # match_re()/match_rei() - match regex # match_ren()/match_reni() - match regex with \n as normal character # match_w()/match_wi() - match word # match_s()/match_si() - match substring # ============================================================================== define match_as { text = $1 pattern = $2 type = $3 beg = search_string(text, pattern, 0, type) end = $search_end s = "match_as__" for (i in $EXTENSIONS_NM) if (substring(i, 0, length(s)) == s) delete $EXTENSIONS_NM[i] if (beg >= 0) { $EXTENSIONS_NM["match_as__start"] = beg $EXTENSIONS_NM["match_as__end"] = end $EXTENSIONS_NM["match_as__before"] = substring(text, 0, beg) $EXTENSIONS_NM["match_as__matched"] = substring(text, beg, end) $EXTENSIONS_NM["match_as__after"] = substring(text, end) $EXTENSIONS_NM["match_as__pattern"] = pattern $EXTENSIONS_NM["match_as__type"] = type return 1 } return 0 } # matched_prefix(): returns prefix of matched text in previously scanned string. # Fails if previous call to match_as() returned false. See match_as(). define matched_prefix { return $EXTENSIONS_NM["match_as__before"] } # matched_suffix(): returns suffix of matched text in previously scanned string. # Fails if previous call to match_as() returned false. See match_as(). define matched_suffix { return $EXTENSIONS_NM["match_as__after"] } # matched_text(): returns matched text from previously scanned string. # Fails if previous call to match_as() returned false. See match_as(). define matched_text { return $EXTENSIONS_NM["match_as__matched"] } # matched_start(): returns the position of matched text from previously scanned # string. Fails if previous call to match_as() returned false. # See match_as(). define matched_start { return $EXTENSIONS_NM["match_as__start"] } # matched_end(): returns the end position of matched text from previously # scanned string. Fails if previous call to match_as() returned false. # See match_as(). define matched_end { return $EXTENSIONS_NM["match_as__end"] } # matched_groups(): returns the number of capturing parentheses in the last # regular expression match. Adds this number to $EXTENSIONS_NM if not # not already there. define matched_groups { re = $EXTENSIONS_NM["match_as__pattern"] if ($EXTENSIONS_NM["match_as__type"] != "regex") return matched_groups_called_for_non_regex() if ("match_as__paren_groups" in $EXTENSIONS_NM) n = $EXTENSIONS_NM["match_as__paren_groups"] else { n = regex_capturing_parens(re) $EXTENSIONS_NM["match_as__paren_groups"] = n } return n } # matched_part(): returns the match of a parethesised group from the last # match. define matched_part { i = $1 + 0 if ($EXTENSIONS_NM["match_as__type"] != "regex") return matched_part_called_for_non_regex() if (i == 0) return $EXTENSIONS_NM["match_as__matched"] if (i < 0) return matched_part_called_for_negative_group_index() if (i > 9) return matched_part_called_for_group_index_greater_than_9() if (i > matched_groups()) return matched_part_called_for_group_index_greater_than_groups_found() return replace_in_string($EXTENSIONS_NM["match_as__matched"], \ $EXTENSIONS_NM["match_as__pattern"], "\\"i, "regex") } # matched_1(): returns the text of the last match's parenthesis group 1 define matched_1 { return matched_part(1) } # matched_2(): returns the text of the last match's parenthesis group 2 define matched_2 { return matched_part(2) } # matched_3(): returns the text of the last match's parenthesis group 3 define matched_3 { return matched_part(3) } # matched_4(): returns the text of the last match's parenthesis group 4 define matched_4 { return matched_part(4) } # matched_5(): returns the text of the last match's parenthesis group 5 define matched_5 { return matched_part(5) } # matched_6(): returns the text of the last match's parenthesis group 6 define matched_6 { return matched_part(6) } # matched_7(): returns the text of the last match's parenthesis group 7 define matched_7 { return matched_part(7) } # matched_8(): returns the text of the last match's parenthesis group 8 define matched_8 { return matched_part(8) } # matched_9(): returns the text of the last match's parenthesis group 9 define matched_9 { return matched_part(9) } # ------------------------------------------------------------------------------ # match_re(str, pat): return true if regular expression pat is found in str. define match_re { return match_as($1, $2, "regex") } # match_rei(str, pat): return true if regular expression pat is found in str. # (case insignificant) define match_rei { return match_as($1, $2, "regexNoCase") } # match_ren(str, pat): return true if regular expression pat is found in str. # (newline insignificant) define match_ren { return match_as($1, "(?n" $2 ")", "regexNoCase") } # match_reni(str, pat): return true if regular expression pat is found in str. # (case insignificant, newline insignificant) define match_reni { return match_as($1, "(?n" $2 ")", "regexNoCase") } # match_w(str, pat): return true if word(s) pat is found in str. define match_w { return match_as($1, $2, "caseWord") } # match_wi(str, pat): return true if word(s) pat is found in str. # (case insignificant) define match_wi { return match_as($1, $2, "word") } # match_s(str, pat): return true if string pat is found in str. define match_s { return match_as($1, $2, "case") } # match_si(str, pat): return true if string pat is found in str. # (case insignificant) define match_si { return match_as($1, $2, "literal") } # ------------------------------------------------------------------------------ # sub_re(str, pat, replace): returns the result of calling # replace_in_string(str, pat, replace, "regex", "copy") define sub_re { return replace_in_string($1, $2, $3, "regex", "copy") } # sub_s(str, pat, replace): returns the result of calling # replace_in_string(str, pat, replace, "case", "copy") define sub_s { return replace_in_string($1, $2, $3, "case", "copy") } # ------------------------------------------------------------------------------ # background_is_dark(override): attempts to determine whether the standard NEdit # background is dark, to help select good values for rangeset colors. # If a parameter is given, it should be one of the following values: # - "ask": ask the user whether background is dark with a dialog box # - "reset": clear all values and process from scratch # - "yes" or 1: set the return value to true # - "no" or 0: set the return value to false # Returns 1 (true) if the background is dark, 0 (false) otherwise. define background_is_dark { value = "background_is_dark__value" rgb_text_bg = "background_is_dark__rgb_text_bg" res = 0 isDark = 0 rgb = "" if ($n_args > 0) { isDark = $1 if (valid_number(isDark)) { if (isDark != 0) isDark = "yes" else isDark = "no" } if (isDark == "yes") $EXTENSIONS_NM[value] = 1 else if (isDark == "no") $EXTENSIONS_NM[value] = 0 else if (isDark == "ask") { # if we have a "is dark" state, propose dark first (default button) if ((value in $EXTENSIONS_NM) && \ $EXTENSIONS_NM[value]) { res = dialog("Is the window background dark or light?", "Dark", "Light") $EXTENSIONS_NM[value] = (res == 1) } else { res = dialog("Is the window background light or dark?", "Light", "Dark") $EXTENSIONS_NM[value] = (res != 1) } } else if (isDark == "reset") { if (value in $EXTENSIONS_NM) delete $EXTENSIONS_NM[value] if (rgb_text_bg in $EXTENSIONS_NM) delete $EXTENSIONS_NM[rgb_text_bg] } } if (value in $EXTENSIONS_NM) { return $EXTENSIONS_NM[value] } # we attempt to call the non-standard get_colors(): if this does not exist, # the macro will crash - so set things up so this only happens once. if (!(rgb_text_bg in $EXTENSIONS_NM)) # have we tried yet? { $EXTENSIONS_NM[rgb_text_bg] = "" $EXTENSIONS_NM[rgb_text_bg] = get_colors()["rgb_text_bg"] } res = length($EXTENSIONS_NM[rgb_text_bg]) # if we get here and don't have a result (get_colors() crashed), try to # use highlighting information (if any) - this might not provide a good value # because a highlighting background color may supercede the normal text # background (try at the start and/or end of the buffer) if (!res) { style = get_style_at_pos(0) if ("background" in style && \ get_range($text_length - 1, $text_length) == "\n") { # the first character has highlights with a background; try the last # (if it's a "\n") style = get_style_at_pos($text_length - 1) } if ("back_rgb" in style) { $EXTENSIONS_NM[rgb_text_bg] = style["back_rgb"] res = length($EXTENSIONS_NM[rgb_text_bg]) } } if (res) { rgb = $EXTENSIONS_NM[rgb_text_bg] r = hex_to_int(substring(rgb, 1, 3)) g = hex_to_int(substring(rgb, 3, 5)) b = hex_to_int(substring(rgb, 5, 7)) isDark = (r < 192 && g < 192 && b < 220 && (r + g + b) < (3 * 128)) $EXTENSIONS_NM[value] = isDark return isDark } # if we get here, give up: ask the user return background_is_dark("ask") } # ------------------------------------------------------------------------------ # set_window_title([string [, type]]): attempts to use the # set_window_title_format([string [, type]]) function to change the # window's title. If the version of NEdit does not support this function, # it will fail only once; subsequent calls will simply do nothing. define set_window_title { title = "" type = "text" if ($n_args > 0) title = $1 if ($n_args > 1) type = $2 works = "set_window_title works" try = 1 if (!(works in $EXTENSIONS_NM)) $EXTENSIONS_NM[works] = "no" # prepare failed state else if ($EXTENSIONS_NM[works] == "no") try = 0 # failed before - don't try again if (try) { set_window_title_format(title, type) $EXTENSIONS_NM[works] = "yes" # OK for next time } }