1 files changed, 377 insertions, 0 deletions
diff --git a/lib/TclUtil.py b/lib/TclUtil.py
new file mode 100644
index 0000000..0990530
--- /dev/null
+++ b/lib/TclUtil.py
@@ -0,0 +1,377 @@
+# Utilities used by 'Tcl' emulator.
+
+
+# Many functions in this file parse specific constructs from strings.
+# In order to limit the number of slice operations (the strings can
+# be very large), they always receive indices into the string that
+# indicate the slice of the string that should be considered.
+# The return value is in general another index, pointing to the first
+# character in the string beyond the recognized construct.
+# Errors are reported as exceptions (TclSyntaxError, TclMatchingError).
+# A few functions have multiple return values.
+
+
+# For efficiency, the Tcl "tokenizing" routines used pre-compiled
+# regular expressions. This is less readable but should be much faster
+# than scanning the string a character at a time.
+#
+# The global variables
+# containing the compiled regexp's are named _foo_prog where foo is
+# an indication of the function that uses them.
+#
+# The patterns always
+# have the form <something>* so they always match at the start of the
+# search buffer---maybe with the empty string. This makes it possible
+# to use the expression "_foo_prog.exec(str, i)[0][1]" to find the first
+# character beyond the matched string. Note that this may be beyond the
+# end variable -- where this matters, "min(i, end)" is used.
+
+# Constructs that cannot
+# be recognized by a finite automaton (like matching braces) are scanned
+# by a hybrid technique where the regular expression excludes the
+# braces.
+#
+# Many regular expressions contain an expression that matches
+# a Tcl backslash sequence as a subpart:
+# \\\\C?M?(.|\n)
+#
+# This is a bit hard to
+# read because the backslash contained in it must be doubled twice:
+# once to get past Python's backslash mechanism, once to get past that
+# of regular expressions. It uses (.|\n) to match absolutely
+# *every character*, becase the MULTILINE regular expression package does
+# not accept '\n' as a match for '.'.
+#
+# There is also a simplification in the pattern for backslashes:
+# *any* single character following a backslash is escaped,
+# so hex and octal
+# excapes are not scanned fully. The forms \Cx, \Mx and \CMx are
+# scanned correctly, as these may hide a special character.
+# (This does not invalidate the recognition of strings, although the
+# match is effectuated in a different way than by the Backslash function.)
+
+import regexp
+
+
+# Exceptions raised for various error conditions.
+
+TclAssertError = 'Tcl assert error'
+TclSyntaxError = 'Tcl syntax error'
+TclRuntimeError = 'Tcl runtime error'
+TclMatchingError = 'Tcl matching error'
+
+
+# Find a variable name.
+# A variable name is either a (possiblly empty) sequence of letters,
+# digits and underscores, or anything enclosed in matching braces.
+# Return the index past the end of the name.
+
+_varname_prog = regexp.compile('[a-zA-Z0-9_]*')
+
+def FindVarName(str, i, end):
+ if i < end and str[i] = '{': return BalanceBraces(str, i, end)
+ i = _varname_prog.exec(str, i)[0][1]
+ return min(i, end)
+
+
+# Split a list into its elements.
+# Return a list of elements (strings).
+
+def SplitList(str):
+ i, end = 0, len(str)
+ list = []
+ while 1:
+ i = SkipSpaces(str, i, end)
+ if i >= end: break
+ j = i
+ i = FindNextElement(str, i, end)
+ if str[j] = '{' and str[i-1] = '}':
+ element = str[j+1:i-1]
+ else:
+ element = Collapse(str[j:i])
+ list.append(element)
+ return list
+
+
+# Find the next element from a list.
+
+_element_prog = regexp.compile('([^ \t\n\\]+|\\\\C?M?(.|\n))*')
+
+def FindNextElement(str, i, end):
+ if i < end and str[i] = '{':
+ i = BalanceBraces(str, i, end)
+ if i < end and str[i] not in ' \t\n':
+ raise TclSyntaxError, 'Garbage after } in list'
+ return i
+ i = _element_prog.exec(str, i)[0][1]
+ return min(i, end)
+
+
+# Copy a string, expanding all backslash sequences.
+
+_collapse_prog = regexp.compile('(\n|[^\\]+)*')
+
+def Collapse(str):
+ if '\\' not in str: return str
+ i, end = 0, len(str)
+ result = ''
+ while i < end:
+ j = _collapse_prog.exec(str, i)[0][1]
+ j = min(j, end)
+ result = result + str[i:j]
+ if j >= end: break
+ c = str[j]
+ if c <> '\\': raise TclAssertError, 'collapse error'
+ x, i = Backslash(str, j, end)
+ result = result + x
+ return result
+
+
+# Find the next full command.
+# Return a list of begin, end indices of words in the string,
+# and an index pointing just after the terminating newline or
+# semicolon.
+# Initial spaces are skipped.
+# If the command begins with '#', it is considered empty and
+# characters until '\n' are skipped.
+
+_eol_prog = regexp.compile('[^\n]*')
+
+def FindNextCommand(str, i, end, bracketed):
+ i = SkipSpaces(str, i, end)
+ if i >= end: return [], end
+ if str[i] = '#':
+ i = _eol_prog.exec(str, i)
+ i = min(i, end)
+ if i < end and str[i] = '\n': i = i+1
+ return [], i
+ if bracketed: terminators = [';']
+ else: terminators = [';', '\n']
+ list = []
+ while i < end:
+ j = FindNextWord(str, i, end)
+ word = str[i:j]
+ if word in terminators:
+ i = j
+ break
+ if word <> '\n': list.append(i, j)
+ i = SkipSpaces(str, j, end)
+ return list, i
+
+
+# Find the next word of a command.
+# Semicolon and newline terminate words but also count as a word
+# themselves.
+# The start index must point to the start of the word.
+
+_word_prog = regexp.compile('([^ \t\n;[\\]+|\\\\C?M?(.|\n))*')
+
+def FindNextWord(str, i, end):
+ if i >= end: return end
+ if str[i] in '{"':
+ if str[i] = '{': i = BalanceBraces(str, i, end)
+ else: i = BalanceQuotes(str, i, end)
+ if i >= end or str[i] in ' \t\n;': return min(i, end)
+ raise TclSyntaxError, 'Garbage after } or "'
+ begin = i
+ while i < end:
+ i = _word_prog.exec(str, i)[0][1]
+ if i >= end:
+ i = end
+ break
+ c = str[i]
+ if c in ' \t': break
+ if c in ';\n':
+ if i = begin: i = i+1
+ break
+ if c = '[': i = BalanceBrackets(str, i, end)
+ else: raise TclAssertError, 'word error'
+ return i
+
+
+# Parse balanced brackets from str[i:end].
+# str[i] must be '['.
+# Returns end such that str[i:end] ends with ']'
+# and contains balanced braces and brackets.
+
+_brackets_prog = regexp.compile('([^][{\\]+|\n|\\\\C?M?(.|\n))*')
+
+def BalanceBrackets(str, i, end):
+ if i >= end or str[i] <> '[':
+ raise TclAssertError, 'BalanceBrackets'
+ nesting = 0
+ while i < end:
+ i = _brackets_prog.exec(str, i)[0][1]
+ if i >= end: break
+ c = str[i]
+ if c = '{': i = BalanceBraces(str, i, end)
+ else:
+ i = i+1
+ if c = '[': nesting = nesting + 1
+ elif c = ']':
+ nesting = nesting - 1
+ if nesting = 0: return i
+ else: raise TclAssertError, 'brackets error'
+ raise TclMatchingError, 'Unmatched bracket ([)'
+
+
+# Parse balanced braces from str[i:end].
+# str[i] must be '{'.
+# Returns end such that str[i:end] ends with '}'
+# and contains balanced braces.
+
+_braces_prog = regexp.compile('([^{}\\]+|\n|\\\\C?M?(.|\n))*')
+
+def BalanceBraces(str, i, end):
+ if i >= end or str[i] <> '{':
+ raise TclAssertError, 'BalanceBraces'
+ nesting = 0
+ while i < end:
+ i = _braces_prog.exec(str, i)[0][1]
+ if i >= end: break
+ c = str[i]
+ i = i+1
+ if c = '{': nesting = nesting + 1
+ elif c = '}':
+ nesting = nesting - 1
+ if nesting = 0: return i
+ else: raise TclAssertError, 'braces error'
+ raise TclMatchingError, 'Unmatched brace ({)'
+
+
+# Parse double quotes from str[i:end].
+# str[i] must be '"'.
+# Returns end such that str[i:end] ends with an unescaped '"'.
+
+_quotes_prog = regexp.compile('([^"\\]+|\n|\\\\C?M?(.|\n))*')
+
+def BalanceQuotes(str, i, end):
+ if i >= end or str[i] <> '"':
+ raise TclAssertError, 'BalanceQuotes'
+ i = _quotes_prog.exec(str, i+1)[0][1]
+ if i < end and str[i] = '"': return i+1
+ raise TclMatchingError, 'Unmatched quote (")'
+
+
+# Static data used by Backslash()
+
+_bstab = {}
+_bstab['n'] = '\n'
+_bstab['r'] = '\r'
+_bstab['t'] = '\t'
+_bstab['b'] = '\b'
+_bstab['e'] = '\033'
+_bstab['\n'] = ''
+for c in ' {}[]$";\\': _bstab[c] = c
+del c
+
+# Backslash interpretation.
+# First character must be a backslash.
+# Return a pair (<replacement string>, <end of sequence>).
+# Unrecognized or incomplete backslash sequences are not errors;
+# this takes only the backslash itself off the string.
+
+def Backslash(str, i, end):
+ if i >= end or str[i] <> '\\':
+ raise TclAssertError, 'Backslash'
+ i = i+1
+ if i = end: return '\\', i
+ c = str[i]
+ i = i+1
+ if _bstab.has_key(c): return _bstab[c], i
+ if c = 'C':
+ if i = end: return '\\', i-1
+ c = str[i]
+ i = i+1
+ if c = 'M':
+ if i = end: return '\\', i-2
+ c = str[i]
+ i = i+1
+ x = ord(c) % 040 + 0200
+ else:
+ x = ord(c) % 040
+ return chr(x), i
+ elif c = 'M':
+ if i = end: return '\\', i-1
+ c = str[i]
+ i = i+1
+ x = ord(c)
+ if x < 0200: x = x + 0200
+ return chr(x), i
+ elif c and c in '0123456789':
+ x = ord(c) - ord('0')
+ end = min(end, i+2)
+ while i < end:
+ c = str[i]
+ if c not in '0123456789': break
+ i = i+1
+ x = x*8 + ord(c) - ord('0')
+ return ord(x), i
+ else:
+ # Not something that we recognize
+ return '\\', i-1
+
+
+# Skip over spaces and tabs (but not newlines).
+
+_spaces_prog = regexp.compile('[ \t]*')
+
+def SkipSpaces(str, i, end):
+ i = _spaces_prog.exec(str, i)[0][1]
+ return min(i, end)
+
+
+# Concatenate the elements of a list with intervening spaces.
+
+def Concat(argv):
+ result = ''
+ sep = ''
+ for arg in argv:
+ result = result + (sep + arg)
+ sep = ' '
+ return result
+
+
+# Concatenate list elements, adding braces etc. to make them parseable
+# again with SplitList.
+
+def BuildList(argv):
+ result = ''
+ sep = ''
+ for arg in argv:
+ arg = AddBraces(arg)
+ result = result + (sep + arg)
+ sep = ' '
+ return result
+
+
+# Add braces around a string if necessary to make it parseable by SplitList.
+
+def AddBraces(str):
+ # Special case for empty string
+ if str = '': return '{}'
+ # See if it contains balanced braces
+ res = '{' + str + '}'
+ if TryNextElement(res):
+ # See if it would survive unquoted
+ # XXX should escape [] and $ as well???
+ if TryNextElement(str) and Collapse(str) = str: return str
+ # No -- return with added braces
+ return res
+ # Unbalanced braces. Add backslashes before suspect characters
+ res = ''
+ for c in str:
+ if c in '$\\[]{} ;': c = '\\' + c
+ elif c = '\n': c = '\\n'
+ elif c = '\t': c = '\\t'
+ res = res + c
+ return res
+
+
+def TryNextElement(str):
+ end = len(str)
+ try:
+ i = FindNextElement(str, 0, end)
+ return i = end
+ except (TclSyntaxError, TclMatchingError):
+ return 0