# ============================================================================== # extensions.nm # Author: Tony Balinski # # This file contains the following functions: # tochar # toascii # isinstring # tolowercase # touppercase # islowercase islower isuppercase isupper isalpha isasciialpha # isdigit isxdigit isalnum isspace ispunct isgraph isprint iscntrl isascii # isblank isword # togglecase # substr # replace_in_str # hex_to_int # hex_to_char # hex2_to_str # int_to_hex # char_to_hex # start_of_line_pos # end_of_line_pos # line_of_pos # break_lines_over # quote_literal_as_regex # regex_to_quoted_string # line_col_to_pos # str_line_col_to_pos # pos_to_line # str_pos_to_line # pos_to_column # str_pos_to_column # rjust # ljust # trim # compress # number # ============================================================================== # in $ASC256_CHARS, the leading " " is a dummy character for the zero index # (character NUL) $CHARS["all"] = \ " \x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" \ "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" \ "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" \ "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" \ "\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" \ "\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" \ "\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" \ "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" \ "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" \ "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" \ "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" \ "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" \ "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" \ "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" \ "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" \ "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" ascii["upper"] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" ascii["lower"] = "abcdefghijklmnopqrstuvwxyz" ascii["alpha"] = ascii["upper"] ascii["lower"] ascii["digit"] = "0123456789" ascii["alnum"] = ascii["alpha"] ascii["digit"] ascii["punct"] = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" ascii["space"] = "\t\n\v\f\r" ascii["xdigit"] = ascii["digit"] "ABCDEFabcdef" ascii["graph"] = ascii["alnum"] ascii["punct"] ascii["print"] = ascii["graph"] " " ascii["word"] = ascii["alnum"] "_" ascii["blank"] = " \t" ascii["cntrl"] = "\x01\x02\x03\x04\x05\x06\x07" \ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" \ "\x10\x11\x12\x13\x14\x15\x16\x17" \ "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" $CHARS["ascii"] = ascii iso8859_1["upper"] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" iso8859_1["lower"] = "abcdefghijklmnopqrstuvwxyz" iso8859_1["alpha"] = iso8859_1["upper"] iso8859_1["lower"] "" iso8859_1["digit"] = "0123456789" iso8859_1["alnum"] = iso8859_1["alpha"] iso8859_1["digit"] iso8859_1["punct"] = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" \ "" iso8859_1["space"] = "\t\n\v\f\r" iso8859_1["xdigit"] = iso8859_1["digit"] "ABCDEFabcdef" iso8859_1["graph"] = iso8859_1["alnum"] iso8859_1["punct"] iso8859_1["print"] = iso8859_1["graph"] " " iso8859_1["word"] = iso8859_1["alnum"] "_" iso8859_1["blank"] = " \t" iso8859_1["cntrl"] = "\x01\x02\x03\x04\x05\x06\x07" \ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" \ "\x10\x11\x12\x13\x14\x15\x16\x17" \ "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" $CHARS["iso8859_1"] = iso8859_1 $ASC256_CHARS = " \a" \ "\b\t\n\v\f\r" \ "" \ "" \ " !\"#$%&'()*+,-./0123456789:;<=>?" \ "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_" \ "`abcdefghijklmnopqrstuvwxyz{|}~" \ "" \ "" \ "" \ "" # ============================================================================== # tochar(int): returns the ascii char corresponding to the integer int, if in # range, as a single character string. Fails with the empty string (as for # int = 0). # ============================================================================== define tochar { i = $1 if (i < -128 || i == 0 || i >= 256) return "" else if (i < 0) i += 256 return substring($ASC256_CHARS, i, i + 1) } # ============================================================================== # toascii(char): returns the ascii code corresponding to the first character in # char, if present, as an integer. Fails with -1. # ============================================================================== define toascii { if ($1 == "") return 0 # skip initial character in $ASC256_CHARS: it's a dummy return search_string($ASC256_CHARS, substring($1, 0, 1), 1, "case") } # ============================================================================== $THE_LOWERCASE_CHARS ="abcdefghijklmnopqrstuvwxyz" $THE_UPPERCASE_CHARS ="ABCDEFGHIJKLMNOPQRSTUVWXYZ" $THE_LETTER_CHARS = $THE_LOWERCASE_CHARS $THE_UPPERCASE_CHARS "" $THE_ASCII_LETTER_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" $THE_DIGITS = "0123456789" $THE_XDIGITS = "0123456789ABCDEFabcdef" $THE_SPACE_CHARS = " \t\v\f\n\r" $THE_PUNCT_CHARS = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" \ "" # ============================================================================== # isinstring(ch, str): returns true if the first character of ch is in str. # ============================================================================== define isinstring { ch = substring($1, 0, 1) return (search_string($2, ch, 0, "case") != -1) } # ============================================================================== # tolowercase(string): converts to lower case, returning the converted string. # ============================================================================== define tolowercase { str = $1 res = "" len = length(str) ch = "" for (i = 0; i < len; i++) { ch = substring(str, i, i+1) pos = search_string($THE_UPPERCASE_CHARS, ch, 0, "case") if (pos == -1) res = res ch else res = res substring($THE_LOWERCASE_CHARS, pos, pos + 1) } return res } # ============================================================================== # touppercase(string): converts to upper case, returning the converted string. # ============================================================================== define touppercase { str = $1 res = "" len = length(str) ch = "" for (i = 0; i < len; i++) { ch = substring(str, i, i+1) pos = search_string($THE_LOWERCASE_CHARS, ch, 0, "case") if (pos == -1) res = res ch else res = res substring($THE_UPPERCASE_CHARS, pos, pos + 1) } return res } # ============================================================================== # islowercase(string), islower(string), # isuppercase(string), isupper(string), # isalpha(string), isasciialpha(string), # isdigit(string), isxdigit(string), # isalnum(string), isspace(string), # ispunct(string), isgraph(string), # isprint(string), iscntrl(string), # isascii(string): return true if the first character of string is of the # appropriate class. # isblank(string): return true if the first character is space or tab # isword(string): return true if isalnum(string) or the first character is '_' # ============================================================================== # islowercase(string): checks the first character of string (aka islower()) define islowercase { return isinstring($1, $THE_LOWERCASE_CHARS) } # islower(string): checks the first character of the string define islower { return isinstring($1, $THE_LOWERCASE_CHARS) } # isuppercase(string): checks the first character of the string (aka isupper()) define isuppercase { return isinstring($1, $THE_UPPERCASE_CHARS) } # isupper(string): checks the first character of the string define isupper { return isinstring($1, $THE_UPPERCASE_CHARS) } # isalpha(string): checks the first character of the string (iso8859-1) # cf isasciialpha() define isalpha { return isinstring($1, $THE_LETTER_CHARS) } # isasciialpha(string): checks the first character of the string (ascii only) define isasciialpha { return isinstring($1, $THE_ASCII_LETTER_CHARS) } # isdigit(string): checks the first character of the string define isdigit { return isinstring($1, $THE_DIGITS) } # isxdigit(string): checks the first character of the string (hexadecimal) define isxdigit { return isinstring($1, $THE_XDIGITS) } # isalnum(string): checks the first character of the string (iso8859-1) define isalnum { return isinstring($1, $THE_LETTER_CHARS) || isinstring($1, $THE_DIGITS) } # isspace(string): checks the first character of the string define isspace { return isinstring($1, $THE_SPACE_CHARS) } # ispunct(string): checks the first character of the string (iso8859-1) define ispunct { return isinstring($1, $THE_PUNCT_CHARS) } # isgraph(string): checks the first character of the string (iso8859-1) define isgraph { return isinstring($1, $THE_PUNCT_CHARS) || isalnum($1) } # isprint(string): checks the first character of the string (iso8859-1) define isprint { return isgraph($1) || isinstring($1, " ") } # iscntrl(string): checks the first character of the string (iso8859-1) define iscntrl { return !isprint($1) && !isspace($1) } # isascii(string): checks the first character of the string define isascii { return toascii($1) < 128 } # isblank(string): return true if the first character is space or tab define isblank { return isinstring($1, " \t") } # isword(string): return true if the first character is '_', an ascii letter or # a digit define isword { return isasciialpha($1) || isdigit($1) || isinstring($1, "_") } # ============================================================================== # togglecase(string): returns the string with all uppercase characters switched # to lowercase and vice-versa. # ============================================================================== define togglecase { s = $1 r = "" for (i = 0, c = substring(s, i, i+1); \ c != ""; \ i++, c = substring(s, i, i+1)) { if (touppercase(c) != c) r = r touppercase(c) else r = r tolowercase(c) } return r } # ============================================================================== # substr(string, [startpos, [endpos]]): calls substring() for given string, # start position, end position. Position parameters are optional. If any # are negative, measurement is made from the end of the string. # # Parameters: # $1 - source string (default "") # $2 - start position (default 0) # $3 - end position (default length($1)) # ============================================================================== define substr { # get parameters if ($n_args < 1) return "" string = $1 startpos = 0 if ($n_args >= 2) startpos = $2 len = length(string) endpos = len if ($n_args >= 3) endpos = $3 if (startpos < 0) startpos += len if (endpos < 0) endpos += len if (startpos < 0) startpos = 0 if (endpos < 0) endpos = 0 return substring(string, startpos, endpos) } # ============================================================================== # replace_in_str(string, search_for, replace_with, [type]): calls # replace_in_string() with its parameters. If replace_in_string() fails, # returns the original string unchanged. # # Parameters: # $1 - original string in which patterns should be matched # $2 - patterns to replace # $3 - what to replace with (default "") # $4 - type of matching to use (default "literal") # ============================================================================== define replace_in_str { # get parameters if ($n_args < 2) return "" string = $1 search_for = $2 replace_with = "" if ($n_args >= 3) replace_with = $3 type = "literal" if ($n_args >= 4) type = $4 res = replace_in_string(string, search_for, replace_with, type) if (res == "") res = string return res } # ============================================================================== # hex_to_int(hex_str): returns the value of the hex string. All (and only) hex # characters are considered. Scanning stops at first non-hex character. # ============================================================================== define hex_to_int { # get parameter if ($n_args < 1) return 0 val = 0 i = 0 for (c = substring($1, i, i+1); c != ""; ++i, c = substring($1, i, i+1)) { pos = search_string("0123456789ABCDEF", toupper(c), 0) if (pos < 0) break val = (16 * val) + pos } return val } # ============================================================================== # hex_to_char(hex_str): returns the value of the hex string. All (and only) hex # characters are considered. # ============================================================================== define hex_to_char { i = hex_to_int($1) if (i == 0 || i >= 256) return "" return substring($ASC256_CHARS, i, i+1) } # ============================================================================== # hex2_to_str(hex_str): returns a string whose value is that of the # concatenation of characters whose hex values are listed, two digits at # a time. All non-digits in the input are ignored. # ============================================================================== define hex2_to_str { # remove all hex characters s = replace_in_string($1, "[^0-9A-Fa-f]|\n", "", "regex") if (s == "") s = $1 res = "" i = 0 for (cc = substring($1, i, i+2); cc != ""; i += 2, cc = substring($1, i, i+2)) { res = res hex_to_char(cc) } return res } # ============================================================================== # int_to_hex(int, mindigits): returns the hex representation of the integer. # By default, mindigits is set to one. # ============================================================================== define int_to_hex { prec = 1 i = $1 if ($n_args > 1) prec = $2 if (prec < 1) prec = 1 if (i < 0) i = -i res = "" while (prec > 0 || i > 0) { p = i % 16 res = substring("0123456789ABCDEF", p, p + 1) res prec-- i = i / 16 } return res } # ============================================================================== # char_to_hex(chars): returns the hex values of each of the characters passed. # Each character is transformed into 2 hex digits. # ============================================================================== define char_to_hex { res = "" i = 0 for (c = substring($1, i, i+1); c != ""; ++i, c = substring($1, i, i+1)) { pos = search_string($ASC256_CHARS, c, 1, "case") if (pos >= 1) { res = res int_to_hex(pos, 2) } } return res } # ============================================================================== # start_of_line_pos([pos]): returns the position of the start of the line # containing pos (default $cursor) - ie the position following the # previous newline. # ============================================================================== define start_of_line_pos { if ($n_args < 1) pos = $cursor else pos = $1 # find end of THIS line e = end_of_line_pos(pos) # now find the previous one pe = search("\n", e - 1, "case", "backward") if (pe < 0) return 0 return $search_end } # ============================================================================== # end_of_line_pos([pos]): returns the position of the end of the line containing # pos (default $cursor) - ie the position of the next newline. # ============================================================================== define end_of_line_pos { if ($n_args < 1) pos = $cursor else pos = $1 e = search("\n", pos, "case") if (e == -1) return $text_length return e } # ============================================================================== # line_of_pos([pos]): returns the line containing the position pos (default # $cursor) as a string without a trailing newline. # ============================================================================== define line_of_pos { if ($n_args < 1) pos = $cursor else pos = $1 # find end of THIS line e = end_of_line_pos(pos) # now find the previous one pe = search("\n", e - 1, "case", "backward") if (pe < 0) return 0 b = $search_end return get_range(b, e) } # ============================================================================== # break_lines_over(maxchars, string [, pref]): tries to break string at word # boundaries adding newlines followed by the string pref (if present). # Returns the modified string. # ============================================================================== define break_lines_over { maxchars = $1 string = $2 if ($n_args > 2) pref = $3 else pref = "" res = "" while (length(string) > maxchars) { nlpos = search_string(string, "\n", 0) if (0 <= nlpos && nlpos <= maxchars) { res = res substring(string, 0, nlpos + 1) string = substring(string, nlpos + 1, length(string)) } else { nlpos = search_string(string, ">", maxchars, "regex", "backward") if (!(0 <= nlpos && nlpos <= maxchars)) { nlpos = maxchars } res = res substring(string, 0, nlpos) "\n" string = substring(string, nlpos, length(string)) } } res = res string return res } # ============================================================================== # quote_literal_as_regex(string): returns a string representing a valid regex # search string for the parameter passed. # ============================================================================== define quote_literal_as_regex { string = $1 res = "" len = length(string) # take the next lines out when \e == esc # if ("\e" != "\\e") # { # dialog("Change extensions.nm: quote_literal_as_regex() - \\e != \\\\e") # } # and uncomment the "\e" line below # take the next lines out when \e == \\e (ie \e != esc) if ("\e" == "\\e") { dialog("Change extensions.nm: quote_literal_as_regex() - \\e == \\\\e") } # and comment out the "\e" line below for (i = 0; i < len; i++) { c = substring(string, i, i + 1) if (search_string("\\|()[]{}<>.*+?^$&-", c, 0, "case") >= 0) res = res "\\" c else if (c == "\a") res = res "\\a" else if (c == "\b") res = res "\\b" else if (c == "\e") res = res "\\e" # put this line in when \e == esc else if (c == "\f") res = res "\\f" else if (c == "\n") res = res "\\n" else if (c == "\r") res = res "\\r" else if (c == "\t") res = res "\\t" else if (c == "\v") res = res "\\v" else res = res c } return res } # ============================================================================== # regex_to_quoted_string(regex): adds quotes and backslashes to convert a # string containing a valid regex into one usable in NEdit Macro code. # ============================================================================== define regex_to_quoted_string { string = $1 res = "" len = length(string) # take the next lines out when \e == esc # if ("\e" != "\\e") # { # dialog("Change extensions.nm: regex_to_quoted_string() - \\e != \\\\e") # } # and uncomment the "\e" line below # take the next lines out when \e == \\e (ie \e != esc) if ("\e" == "\\e") { dialog("Change extensions.nm: regex_to_quoted_string() - \\e == \\\\e") } # and comment out the "\e" line below for (i = 0; i < len; i++) { c = substring(string, i, i + 1) c2 = substring(string, i, i + 2) if (search_string(c2, "^\\\\[abefnrtv]", 0, "regex") == 0) { res = res c2 # already a back-slash escaped control char sequence i++ # skip the next character } else if (search_string("\"\\", c, 0, "case") >= 0) res = res "\\" c # quote that quote or back-slash else if (c == "\a") res = res "\\a" # convert control char to escape seq else if (c == "\b") res = res "\\b" else if (c == "\e") res = res "\\e" # put this line in when \e == esc else if (c == "\f") res = res "\\f" else if (c == "\n") res = res "\\n" else if (c == "\r") res = res "\\r" else if (c == "\t") res = res "\\t" else if (c == "\v") res = res "\\v" else res = res c # leave anything else alone } return "\"" res "\"" } # ============================================================================== # line_col_to_pos(lineNum [, colNum [, tabSize]]): returns the position of the # character position indicated by lineNum and colNum, assuming a # particular tabSize. By default, colNum is zero and tabSize equals # $tab_dist. Lines are numbered from 1, columns from zero. If there are # not enough lines, -1 is returned; if there are not enough columns in the # addressed line, the position of the last character is returned; if the # column is "inside" a tab, return the tab's position. # ============================================================================== define line_col_to_pos { lineNum = $1 colNum = 0 if ($n_args >= 2 && $2 >= 0) { colNum = $2 } tabSize = $tab_dist if ($n_args >= 3 && $3 >= 0) { tabSize =$3 } bufferPos = 0 if (lineNum > 1) { if (search("(^.*\n){" lineNum - 1 "}", 0, "regex") != -1) bufferPos = $search_end else bufferPos = -1 } if (bufferPos >= 0 && colNum > 0) { if (search("^.+$", bufferPos, "regex") != -1) { last = $search_end pos = bufferPos end = bufferPos col = 0 while (pos < last) { nexttab = search("\t", pos) if (nexttab < pos || nexttab > last) nexttab = last if (nexttab == pos) { # pos is at a tab: if the tab extends beyond colNum, return this pos nextCol = col + tabSize - (col % tabSize) if (nextCol > colNum) return pos pos++ # skip the tab } else { # pos is at a non-tab: if the non-tab sequence extends beyond colNum, # we have an overrun of col + nexttab - pos - colNum; # return end-of-non-tab-sequence - overrun nextCol = col + nexttab - pos if (nextCol >= colNum) return nexttab - (nextCol - colNum) pos = nexttab # skip to next tab } # move column count forward col = nextCol } # not found during the loop return last } } return bufferPos } # ============================================================================== # str_line_col_to_pos(string, lineNum [, colNum [, tabSize]]): returns the # position of the character position indicated by lineNum and colNum, # assuming a particular tabSize. By default, colNum is zero and tabSize # equals $tab_dist. Lines are numbered from 1, columns from zero. If there # are not enough lines, -1 is returned; if there are not enough columns in # the addressed line, the position of the last character is returned; if # the column is "inside" a tab, return the tab's position. # ============================================================================== define str_line_col_to_pos { string = $1 lineNum = $2 colNum = 0 if ($n_args >= 3 && $3 >= 0) { colNum = $3 } tabSize = $tab_dist if ($n_args >= 4 && $4 >= 0) { tabSize = $4 } bufferPos = 0 if (lineNum > 1) { if (search_string(string, "(^.*\n){" lineNum - 1 "}", 0, "regex") != -1) bufferPos = $search_end else bufferPos = -1 } if (bufferPos >= 0 && colNum > 0) { if (search_string(string, "^.+$", bufferPos, "regex") != -1) { last = $search_end pos = bufferPos end = bufferPos col = 0 while (pos < last) { nexttab = search_string(string, "\t", pos) if (nexttab < pos || nexttab > last) nexttab = last if (nexttab == pos) { # pos is at a tab: if the tab extends beyond colNum, return this pos nextCol = col + tabSize - (col % tabSize) if (nextCol > colNum) return pos pos++ # skip the tab } else { # pos is at a non-tab: if the non-tab sequence extends beyond colNum, # we have an overrun of col + nexttab - pos - colNum; # return end-of-non-tab-sequence - overrun nextCol = col + nexttab - pos if (nextCol >= colNum) return nexttab - (nextCol - colNum) pos = nexttab # skip to next tab } # move column count forward col = nextCol } # not found during the loop return last } } return bufferPos } # ============================================================================== # pos_to_line(pos): returns the line number (counted from 1) of the position # pos. If pos is too large, this returns the last line number for the # document (so the minimum value is 1). # ============================================================================== define pos_to_line { line = 0 pos = 0 posNum = $1 if (posNum >= $text_length) posNum = $text_length while (pos <= posNum) { line++ if (search("\n", pos) < 0) break else pos = $search_end } return line } # ============================================================================== # str_pos_to_line(string, pos): returns the line number (counted from 1) of the # position pos within the string. If pos is too large, this returns the # last line number for the string (so the minimum value is 1). # ============================================================================== define str_pos_to_line { line = 0 pos = 0 string = $1 posNum = $2 len = length(string) if (posNum >= len) posNum = len while (pos <= posNum) { line++ if (search_string(string, "\n", pos) < 0) break else pos = $search_end } return line } # ============================================================================== # pos_to_column(pos [, tabSize]): returns the column number (counted from 0) of # the position pos in its line. If pos is too large, this returns the last # column of the last line for the document (so the minimum value is 1). # ============================================================================== define pos_to_column { posNum = $1 tabSize = $tab_dist if ($n_args >= 2) tabSize = $2 if (posNum > $text_length) posNum = $text_length # find previous start of line pos = search("^", posNum, "regex", "backward") end = search("$", posNum, "regex", "forward") col = 0 nextCol = 0 while (pos < posNum) { nexttab = search("\t", pos) if (nexttab < pos || nexttab > end) nexttab = end if (nexttab >= posNum) nextCol = col + posNum - pos # no tabs between pos and posNum else if (nexttab > pos) nextCol = col + nexttab - pos # skip contiguous non-tabs else # nexttab == pos { nextCol = col + tabSize - (col % tabSize) nexttab++ } pos = nexttab col = nextCol } return col } # ============================================================================== # str_pos_to_column(string, pos [, tabSize]): returns the column number (counted # from 0) of the position pos in its line inside string. If pos is too # large, this returns the last column of the last line for the string (so # the minimum value is 1). # ============================================================================== define str_pos_to_column { posNum = $1 tabSize = $tab_dist if ($n_args >= 2) tabSize = $2 len = length(string) if (posNum > len) posNum = len # find previous start of line pos = search(string, "^", posNum, "regex", "backward") end = search(string, "$", posNum, "regex", "forward") col = 0 nextCol = 0 while (pos < posNum) { nexttab = search(string, "\t", pos) if (nexttab < pos || nexttab > end) nexttab = end if (nexttab >= posNum) nextCol = col + posNum - pos # no tabs between pos and posNum else if (nexttab > pos) nextCol = col + nexttab - pos # skip contiguous non-tabs else # nexttab == pos { nextCol = col + tabSize - (col % tabSize) nexttab++ } pos = nexttab col = nextCol } return col } # ============================================================================== # rjust(strs [, maxstr]): returns a string holding each line of str padded with # spaces to the left so that it is as long as maxstr, or to the longest # line in strs. # ============================================================================== define rjust { strs = split($1, "\n") # set up default width and padding string if ($n_args > 1) maxstr = replace_in_string($2, "(?n.)", " ", "regex") else maxstr = "" width = length(maxstr) # measure longest line, and maintain padding string of that length nstr = strs[] for (i = 0; i < nstr; i++) { str = strs[i] w = length(str) if (width < w) { width = w maxstr = replace_in_string(str, ".", " ", "regex") } } # assemble result res = "" nl = "" for (i = 0; i < nstr; i++) { str = strs[i] w = length(str) if (i == nstr - 1 && str == "") # add nothing to an empty last line { maxstr = "" width = 0 } res = res nl substring(maxstr str, w, width + w) nl = "\n" } return res } # ============================================================================== # ljust(strs [, maxstr]): returns a string holding each line of str padded with # spaces to the right so that it is as long as maxstr, or to the longest # line in strs. # ============================================================================== define ljust { strs = split($1, "\n") # set up default width and padding string if ($n_args > 1) maxstr = replace_in_string($2, "(?n.)", " ", "regex") else maxstr = "" width = length(maxstr) # measure longest line, and maintain padding string of that length nstr = strs[] for (i = 0; i < nstr; i++) { str = strs[i] w = length(str) if (width < w) { width = w maxstr = replace_in_string(str, ".", " ", "regex") } } # assemble result res = "" nl = "" for (i = 0; i < nstr; i++) { str = strs[i] if (i == nstr - 1 && str == "") # add nothing to an empty last line { maxstr = "" width = 0 } w = length(str) res = res nl substring(str maxstr, 0, width) nl = "\n" } return res } # ============================================================================== # trim(strs[, re[, nlSense]]): returns a string holding each line of strs with # ends matching "(?"nlSense"^(?:"re"))" and "(?"nlSense"(?:"re")$)" # removed. The default value of re is "\\s+", and of nlSense is "N". # ============================================================================== define trim { strs = $1 re = "\\s+" nlSense = "N" if ($n_args >= 2) re = $2 if ($n_args >= 3) nlSense = $3 # strip the fronts of lines strs = replace_in_string(strs, "(?"nlSense"^(?:"re"))", "", "regex", "copy") # and the ends strs = replace_in_string(strs, "(?"nlSense"(?:"re")$)", "", "regex", "copy") return strs } # ============================================================================== # compress(strs[, re[, repl[, nlSense]]]): returns a string holding each line of # strs with each sequence identified as "(?"nlSense"(?:"re")+)" replaced # with repl. The default value of re is "\\s", of repl is " ", and of # nlSense is "N". # ============================================================================== define compress { strs = $1 re = "\\s" nlSense = "N" if ($n_args >= 2) re = $2 if ($n_args >= 3) nlSense = $3 # do replacement strs = replace_in_string(strs, "(?"nlSense"(?:"re")+)", repl, "regex", "copy") return strs } # ============================================================================== # number(string [, "strict"]): returns the numeric value read from the front of # the string argument. Fails if no leading number was found, with zero, # or, if "strict" is present, with an invalid function call. # ============================================================================== define number { s = $1 if (valid_number(s)) return s + 0 s = replace_in_string(s, "(?n^\\s*([-+]?\\d+).*)", "\\1", "regex") if (s == "") { if ($n_args > 1 && $2 == "strict") s = number_NoNumericPrefixFound() else s = 0 } return s + 0 }