From c2587c76f1b416cdbecb979e54941933246bf856 Mon Sep 17 00:00:00 2001 From: Skip Montanaro Date: Tue, 16 Feb 2021 20:14:16 -0600 Subject: starting over --- lib/TclUtil.py | 386 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 193 insertions(+), 193 deletions(-) (limited to 'lib/TclUtil.py') diff --git a/lib/TclUtil.py b/lib/TclUtil.py index 0990530..fb7b530 100644 --- a/lib/TclUtil.py +++ b/lib/TclUtil.py @@ -12,7 +12,7 @@ # For efficiency, the Tcl "tokenizing" routines used pre-compiled -# regular expressions. This is less readable but should be much faster +# regular expressions. This is less readable but should be much faster # than scanning the string a character at a time. # # The global variables @@ -21,9 +21,9 @@ # # The patterns always # have the form * so they always match at the start of the -# search buffer---maybe with the empty string. This makes it possible +# search buffer---maybe with the empty string. This makes it possible # to use the expression "_foo_prog.exec(str, i)[0][1]" to find the first -# character beyond the matched string. Note that this may be beyond the +# character beyond the matched string. Note that this may be beyond the # end variable -- where this matters, "min(i, end)" is used. # Constructs that cannot @@ -33,19 +33,19 @@ # # Many regular expressions contain an expression that matches # a Tcl backslash sequence as a subpart: -# \\\\C?M?(.|\n) +# \\\\C?M?(.|\n) # # This is a bit hard to # read because the backslash contained in it must be doubled twice: # once to get past Python's backslash mechanism, once to get past that -# of regular expressions. It uses (.|\n) to match absolutely +# of regular expressions. It uses (.|\n) to match absolutely # *every character*, becase the MULTILINE regular expression package does # not accept '\n' as a match for '.'. # # There is also a simplification in the pattern for backslashes: # *any* single character following a backslash is escaped, # so hex and octal -# excapes are not scanned fully. The forms \Cx, \Mx and \CMx are +# excapes are not scanned fully. The forms \Cx, \Mx and \CMx are # scanned correctly, as these may hide a special character. # (This does not invalidate the recognition of strings, although the # match is effectuated in a different way than by the Backslash function.) @@ -69,28 +69,28 @@ TclMatchingError = 'Tcl matching error' _varname_prog = regexp.compile('[a-zA-Z0-9_]*') def FindVarName(str, i, end): - if i < end and str[i] = '{': return BalanceBraces(str, i, end) - i = _varname_prog.exec(str, i)[0][1] - return min(i, end) + if i < end and str[i] = '{': return BalanceBraces(str, i, end) + i = _varname_prog.exec(str, i)[0][1] + return min(i, end) # Split a list into its elements. # Return a list of elements (strings). def SplitList(str): - i, end = 0, len(str) - list = [] - while 1: - i = SkipSpaces(str, i, end) - if i >= end: break - j = i - i = FindNextElement(str, i, end) - if str[j] = '{' and str[i-1] = '}': - element = str[j+1:i-1] - else: - element = Collapse(str[j:i]) - list.append(element) - return list + i, end = 0, len(str) + list = [] + while 1: + i = SkipSpaces(str, i, end) + if i >= end: break + j = i + i = FindNextElement(str, i, end) + if str[j] = '{' and str[i-1] = '}': + element = str[j+1:i-1] + else: + element = Collapse(str[j:i]) + list.append(element) + return list # Find the next element from a list. @@ -98,13 +98,13 @@ def SplitList(str): _element_prog = regexp.compile('([^ \t\n\\]+|\\\\C?M?(.|\n))*') def FindNextElement(str, i, end): - if i < end and str[i] = '{': - i = BalanceBraces(str, i, end) - if i < end and str[i] not in ' \t\n': - raise TclSyntaxError, 'Garbage after } in list' - return i - i = _element_prog.exec(str, i)[0][1] - return min(i, end) + if i < end and str[i] = '{': + i = BalanceBraces(str, i, end) + if i < end and str[i] not in ' \t\n': + raise TclSyntaxError, 'Garbage after } in list' + return i + i = _element_prog.exec(str, i)[0][1] + return min(i, end) # Copy a string, expanding all backslash sequences. @@ -112,19 +112,19 @@ def FindNextElement(str, i, end): _collapse_prog = regexp.compile('(\n|[^\\]+)*') def Collapse(str): - if '\\' not in str: return str - i, end = 0, len(str) - result = '' - while i < end: - j = _collapse_prog.exec(str, i)[0][1] - j = min(j, end) - result = result + str[i:j] - if j >= end: break - c = str[j] - if c <> '\\': raise TclAssertError, 'collapse error' - x, i = Backslash(str, j, end) - result = result + x - return result + if '\\' not in str: return str + i, end = 0, len(str) + result = '' + while i < end: + j = _collapse_prog.exec(str, i)[0][1] + j = min(j, end) + result = result + str[i:j] + if j >= end: break + c = str[j] + if c <> '\\': raise TclAssertError, 'collapse error' + x, i = Backslash(str, j, end) + result = result + x + return result # Find the next full command. @@ -138,25 +138,25 @@ def Collapse(str): _eol_prog = regexp.compile('[^\n]*') def FindNextCommand(str, i, end, bracketed): - i = SkipSpaces(str, i, end) - if i >= end: return [], end - if str[i] = '#': - i = _eol_prog.exec(str, i) - i = min(i, end) - if i < end and str[i] = '\n': i = i+1 - return [], i - if bracketed: terminators = [';'] - else: terminators = [';', '\n'] - list = [] - while i < end: - j = FindNextWord(str, i, end) - word = str[i:j] - if word in terminators: - i = j - break - if word <> '\n': list.append(i, j) - i = SkipSpaces(str, j, end) - return list, i + i = SkipSpaces(str, i, end) + if i >= end: return [], end + if str[i] = '#': + i = _eol_prog.exec(str, i) + i = min(i, end) + if i < end and str[i] = '\n': i = i+1 + return [], i + if bracketed: terminators = [';'] + else: terminators = [';', '\n'] + list = [] + while i < end: + j = FindNextWord(str, i, end) + word = str[i:j] + if word in terminators: + i = j + break + if word <> '\n': list.append(i, j) + i = SkipSpaces(str, j, end) + return list, i # Find the next word of a command. @@ -167,26 +167,26 @@ def FindNextCommand(str, i, end, bracketed): _word_prog = regexp.compile('([^ \t\n;[\\]+|\\\\C?M?(.|\n))*') def FindNextWord(str, i, end): - if i >= end: return end - if str[i] in '{"': - if str[i] = '{': i = BalanceBraces(str, i, end) - else: i = BalanceQuotes(str, i, end) - if i >= end or str[i] in ' \t\n;': return min(i, end) - raise TclSyntaxError, 'Garbage after } or "' - begin = i - while i < end: - i = _word_prog.exec(str, i)[0][1] - if i >= end: - i = end - break - c = str[i] - if c in ' \t': break - if c in ';\n': - if i = begin: i = i+1 - break - if c = '[': i = BalanceBrackets(str, i, end) - else: raise TclAssertError, 'word error' - return i + if i >= end: return end + if str[i] in '{"': + if str[i] = '{': i = BalanceBraces(str, i, end) + else: i = BalanceQuotes(str, i, end) + if i >= end or str[i] in ' \t\n;': return min(i, end) + raise TclSyntaxError, 'Garbage after } or "' + begin = i + while i < end: + i = _word_prog.exec(str, i)[0][1] + if i >= end: + i = end + break + c = str[i] + if c in ' \t': break + if c in ';\n': + if i = begin: i = i+1 + break + if c = '[': i = BalanceBrackets(str, i, end) + else: raise TclAssertError, 'word error' + return i # Parse balanced brackets from str[i:end]. @@ -197,22 +197,22 @@ def FindNextWord(str, i, end): _brackets_prog = regexp.compile('([^][{\\]+|\n|\\\\C?M?(.|\n))*') def BalanceBrackets(str, i, end): - if i >= end or str[i] <> '[': - raise TclAssertError, 'BalanceBrackets' - nesting = 0 - while i < end: - i = _brackets_prog.exec(str, i)[0][1] - if i >= end: break - c = str[i] - if c = '{': i = BalanceBraces(str, i, end) - else: - i = i+1 - if c = '[': nesting = nesting + 1 - elif c = ']': - nesting = nesting - 1 - if nesting = 0: return i - else: raise TclAssertError, 'brackets error' - raise TclMatchingError, 'Unmatched bracket ([)' + if i >= end or str[i] <> '[': + raise TclAssertError, 'BalanceBrackets' + nesting = 0 + while i < end: + i = _brackets_prog.exec(str, i)[0][1] + if i >= end: break + c = str[i] + if c = '{': i = BalanceBraces(str, i, end) + else: + i = i+1 + if c = '[': nesting = nesting + 1 + elif c = ']': + nesting = nesting - 1 + if nesting = 0: return i + else: raise TclAssertError, 'brackets error' + raise TclMatchingError, 'Unmatched bracket ([)' # Parse balanced braces from str[i:end]. @@ -223,20 +223,20 @@ def BalanceBrackets(str, i, end): _braces_prog = regexp.compile('([^{}\\]+|\n|\\\\C?M?(.|\n))*') def BalanceBraces(str, i, end): - if i >= end or str[i] <> '{': - raise TclAssertError, 'BalanceBraces' - nesting = 0 - while i < end: - i = _braces_prog.exec(str, i)[0][1] - if i >= end: break - c = str[i] - i = i+1 - if c = '{': nesting = nesting + 1 - elif c = '}': - nesting = nesting - 1 - if nesting = 0: return i - else: raise TclAssertError, 'braces error' - raise TclMatchingError, 'Unmatched brace ({)' + if i >= end or str[i] <> '{': + raise TclAssertError, 'BalanceBraces' + nesting = 0 + while i < end: + i = _braces_prog.exec(str, i)[0][1] + if i >= end: break + c = str[i] + i = i+1 + if c = '{': nesting = nesting + 1 + elif c = '}': + nesting = nesting - 1 + if nesting = 0: return i + else: raise TclAssertError, 'braces error' + raise TclMatchingError, 'Unmatched brace ({)' # Parse double quotes from str[i:end]. @@ -246,11 +246,11 @@ def BalanceBraces(str, i, end): _quotes_prog = regexp.compile('([^"\\]+|\n|\\\\C?M?(.|\n))*') def BalanceQuotes(str, i, end): - if i >= end or str[i] <> '"': - raise TclAssertError, 'BalanceQuotes' - i = _quotes_prog.exec(str, i+1)[0][1] - if i < end and str[i] = '"': return i+1 - raise TclMatchingError, 'Unmatched quote (")' + if i >= end or str[i] <> '"': + raise TclAssertError, 'BalanceQuotes' + i = _quotes_prog.exec(str, i+1)[0][1] + if i < end and str[i] = '"': return i+1 + raise TclMatchingError, 'Unmatched quote (")' # Static data used by Backslash() @@ -272,44 +272,44 @@ del c # this takes only the backslash itself off the string. def Backslash(str, i, end): - if i >= end or str[i] <> '\\': - raise TclAssertError, 'Backslash' - i = i+1 - if i = end: return '\\', i - c = str[i] - i = i+1 - if _bstab.has_key(c): return _bstab[c], i - if c = 'C': - if i = end: return '\\', i-1 - c = str[i] - i = i+1 - if c = 'M': - if i = end: return '\\', i-2 - c = str[i] - i = i+1 - x = ord(c) % 040 + 0200 - else: - x = ord(c) % 040 - return chr(x), i - elif c = 'M': - if i = end: return '\\', i-1 - c = str[i] - i = i+1 - x = ord(c) - if x < 0200: x = x + 0200 - return chr(x), i - elif c and c in '0123456789': - x = ord(c) - ord('0') - end = min(end, i+2) - while i < end: - c = str[i] - if c not in '0123456789': break - i = i+1 - x = x*8 + ord(c) - ord('0') - return ord(x), i - else: - # Not something that we recognize - return '\\', i-1 + if i >= end or str[i] <> '\\': + raise TclAssertError, 'Backslash' + i = i+1 + if i = end: return '\\', i + c = str[i] + i = i+1 + if _bstab.has_key(c): return _bstab[c], i + if c = 'C': + if i = end: return '\\', i-1 + c = str[i] + i = i+1 + if c = 'M': + if i = end: return '\\', i-2 + c = str[i] + i = i+1 + x = ord(c) % 040 + 0200 + else: + x = ord(c) % 040 + return chr(x), i + elif c = 'M': + if i = end: return '\\', i-1 + c = str[i] + i = i+1 + x = ord(c) + if x < 0200: x = x + 0200 + return chr(x), i + elif c and c in '0123456789': + x = ord(c) - ord('0') + end = min(end, i+2) + while i < end: + c = str[i] + if c not in '0123456789': break + i = i+1 + x = x*8 + ord(c) - ord('0') + return ord(x), i + else: + # Not something that we recognize + return '\\', i-1 # Skip over spaces and tabs (but not newlines). @@ -317,61 +317,61 @@ def Backslash(str, i, end): _spaces_prog = regexp.compile('[ \t]*') def SkipSpaces(str, i, end): - i = _spaces_prog.exec(str, i)[0][1] - return min(i, end) + i = _spaces_prog.exec(str, i)[0][1] + return min(i, end) # Concatenate the elements of a list with intervening spaces. def Concat(argv): - result = '' - sep = '' - for arg in argv: - result = result + (sep + arg) - sep = ' ' - return result + result = '' + sep = '' + for arg in argv: + result = result + (sep + arg) + sep = ' ' + return result # Concatenate list elements, adding braces etc. to make them parseable # again with SplitList. def BuildList(argv): - result = '' - sep = '' - for arg in argv: - arg = AddBraces(arg) - result = result + (sep + arg) - sep = ' ' - return result + result = '' + sep = '' + for arg in argv: + arg = AddBraces(arg) + result = result + (sep + arg) + sep = ' ' + return result # Add braces around a string if necessary to make it parseable by SplitList. def AddBraces(str): - # Special case for empty string - if str = '': return '{}' - # See if it contains balanced braces - res = '{' + str + '}' - if TryNextElement(res): - # See if it would survive unquoted - # XXX should escape [] and $ as well??? - if TryNextElement(str) and Collapse(str) = str: return str - # No -- return with added braces - return res - # Unbalanced braces. Add backslashes before suspect characters - res = '' - for c in str: - if c in '$\\[]{} ;': c = '\\' + c - elif c = '\n': c = '\\n' - elif c = '\t': c = '\\t' - res = res + c - return res + # Special case for empty string + if str = '': return '{}' + # See if it contains balanced braces + res = '{' + str + '}' + if TryNextElement(res): + # See if it would survive unquoted + # XXX should escape [] and $ as well??? + if TryNextElement(str) and Collapse(str) = str: return str + # No -- return with added braces + return res + # Unbalanced braces. Add backslashes before suspect characters + res = '' + for c in str: + if c in '$\\[]{} ;': c = '\\' + c + elif c = '\n': c = '\\n' + elif c = '\t': c = '\\t' + res = res + c + return res def TryNextElement(str): - end = len(str) - try: - i = FindNextElement(str, 0, end) - return i = end - except (TclSyntaxError, TclMatchingError): - return 0 + end = len(str) + try: + i = FindNextElement(str, 0, end) + return i = end + except (TclSyntaxError, TclMatchingError): + return 0 -- cgit v1.2.3