Move test_class.pyc to the new test framwork, and fix tokenization

of multi-line strings.
This commit is contained in:
Michael Hansen
2019-10-03 17:12:52 -07:00
parent 697aa5d2c3
commit 1cf1977a40
8 changed files with 105 additions and 131 deletions

View File

@@ -86,33 +86,14 @@ class FloatToken(PyToken):
class StringToken(PyToken):
def __init__(self, prefix, quotes, line, n_line):
def __init__(self, prefix, content, n_line):
super().__init__(PyToken.STRING, n_line)
# Normalize prefix for comparison
if prefix is None:
self.prefix = ''
else:
self.prefix = ''.join(sorted(prefix.lower()))
self.prefix = ''.join(sorted(prefix.lower()))
# Look for the end of the string
self.endpos = len(self.prefix) + len(quotes)
scan = line[self.endpos:]
while True:
if scan[0] == '\\':
scan = scan[2:]
self.endpos += 2
continue
if scan.startswith(quotes):
self.endpos += len(quotes)
break
scan = scan[1:]
self.endpos += 1
self.content = line[len(self.prefix) + len(quotes):self.endpos - len(quotes)]
# TODO: Normalize special characters for comparison
self.content.replace("'", "\\'")
# Normalize special characters for comparison
self.content = content.replace("'", "\\'").replace('\n', '\\n')
def __str__(self):
return "{}'{}'".format(self.prefix, self.content)
@@ -147,6 +128,41 @@ def symbolic_token(line, n_line):
return None
def string_token(line, n_line, pysrc):
match = RE_START_STRING.match(line)
if not match:
return None
# Look for the end of the string
prefix = match.group(1)
if prefix is None:
prefix = ''
quotes = match.group(2)
start = len(prefix) + len(quotes)
content = ''
while True:
end = line.find(quotes, start)
if end > 0 and line[end - 1] == '\\':
start = end + 1
continue
elif end >= 0:
content += line[start:end]
break
# Read in a new line
content += line[start:]
line = pysrc.readline()
n_line += 1
start = 0
if not line:
raise RuntimeError('Reached EOF while looking for {}'.format(repr(quotes)))
token = StringToken(prefix, content, n_line)
token.rem_line = line[end + len(quotes):]
token.end_line = n_line
return token
def read_tokens(pysrc):
indent_stack = [0]
context_stack = []
@@ -158,8 +174,7 @@ def read_tokens(pysrc):
if not line:
break
sline = line.strip()
if not sline or sline.startswith('#'):
if not line.strip() or line.lstrip().startswith('#'):
continue
# Look for indentation changes
@@ -174,58 +189,57 @@ def read_tokens(pysrc):
if indent != indent_stack[-1]:
raise RuntimeError('Incorrect indentation on line {}'.format(n_line))
while sline:
idx = 0
while sline[idx].isspace():
idx += 1
sline = sline[idx:]
while True:
line = line.lstrip()
if not line:
break
token = symbolic_token(sline, n_line)
token = symbolic_token(line, n_line)
if token:
if token.type in {'(', '{', '['}:
context_stack.append(token.type)
elif token.type == ')':
if len(context_stack) == 0 or context_stack[-1] != '(':
raise RuntimeError('Mismatched token at {} on line {}'.format(sline, n_line))
raise RuntimeError('Mismatched token at {} on line {}'.format(line, n_line))
context_stack.pop()
elif token.type == '}':
if len(context_stack) == 0 or context_stack[-1] != '{':
raise RuntimeError('Mismatched token at {} on line {}'.format(sline, n_line))
raise RuntimeError('Mismatched token at {} on line {}'.format(line, n_line))
context_stack.pop()
elif token.type == ']':
if len(context_stack) == 0 or context_stack[-1] != '[':
raise RuntimeError('Mismatched token at {} on line {}'.format(sline, n_line))
raise RuntimeError('Mismatched token at {} on line {}'.format(line, n_line))
context_stack.pop()
yield token
sline = sline[len(token.type):]
line = line[len(token.type):]
continue
match = RE_FLOAT.match(sline)
match = RE_FLOAT.match(line)
if match:
yield FloatToken(match.group(), n_line)
sline = sline[match.end():]
line = line[match.end():]
continue
match = RE_INT.match(sline)
match = RE_INT.match(line)
if match:
yield IntToken(match.group(), n_line)
sline = sline[match.end():]
line = line[match.end():]
continue
match = RE_START_STRING.match(sline)
if match:
token = StringToken(match.group(1), match.group(2), sline, n_line)
token = string_token(line, n_line, pysrc)
if token:
line = token.rem_line
n_line = token.end_line
yield token
sline = sline[token.endpos:]
continue
match = RE_WORD.match(sline)
match = RE_WORD.match(line)
if match:
yield WordToken(match.group(), n_line)
sline = sline[match.end():]
line = line[match.end():]
continue
print('Error: Unrecognized tokens: "{}" at line {}'.format(sline, n_line))
print('Error: Unrecognized tokens: "{}" at line {}'.format(line, n_line))
sys.exit(1)
if len(context_stack) == 0:

View File

@@ -1,43 +0,0 @@
"""
test_class.py -- source test pattern for class definitions
This source is part of the decompyle test suite.
decompyle is a Python byte-code decompiler
See http://www.goebel-consult.de/decompyle/ for download and
for further information
"""
class A:
class A1:
def __init__(self):
print 'A1.__init__'
def foo(self):
print 'A1.foo'
def __init__(self):
print 'A.__init__'
def foo(self):
print 'A.foo'
class B:
def __init__(self):
print 'B.__init__'
def bar(self):
print 'B.bar'
class C(A, B):
def foobar(self):
print 'C.foobar'
c = C()
c.foo()
c.bar()
c.foobar()

View File

@@ -1,42 +0,0 @@
"""
test_class.py -- source test pattern for class definitions
This source is part of the decompyle test suite.
decompyle is a Python byte-code decompiler
See http://www.goebel-consult.de/decompyle/ for download and
for further information
"""
class A:
class A1:
def __init__(self):
print 'A1.__init__'
def foo(self):
print 'A1.foo'
def __init__(self):
print 'A.__init__'
def foo(self):
print 'A.foo'
class B:
def __init__(self):
print 'B.__init__'
def bar(self):
print 'B.bar'
class C(A, B):
def foobar(self):
print 'C.foobar'
c = C()
c.foo()
c.bar()
c.foobar()

View File

@@ -0,0 +1,45 @@
'\ntest_class.py -- source test pattern for class definitions\n\nThis source is part of the decompyle test suite.\n\ndecompyle is a Python byte-code decompiler\nSee http://www.goebel-consult.de/decompyle/ for download and\nfor further information\n' <EOL>
class A : <EOL>
<INDENT>
class A1 : <EOL>
<INDENT>
def __init__ ( self ) : <EOL>
<INDENT>
print 'A1.__init__' <EOL>
<OUTDENT>
def foo ( self ) : <EOL>
<INDENT>
print 'A1.foo' <EOL>
<OUTDENT>
<OUTDENT>
def __init__ ( self ) : <EOL>
<INDENT>
print 'A.__init__' <EOL>
<OUTDENT>
def foo ( self ) : <EOL>
<INDENT>
print 'A.foo' <EOL>
<OUTDENT>
<OUTDENT>
class B : <EOL>
<INDENT>
def __init__ ( self ) : <EOL>
<INDENT>
print 'B.__init__' <EOL>
<OUTDENT>
def bar ( self ) : <EOL>
<INDENT>
print 'B.bar' <EOL>
<OUTDENT>
<OUTDENT>
class C ( A , B ) : <EOL>
<INDENT>
def foobar ( self ) : <EOL>
<INDENT>
print 'C.foobar' <EOL>
<OUTDENT>
<OUTDENT>
c = C ( ) <EOL>
c . foo ( ) <EOL>
c . bar ( ) <EOL>
c . foobar ( ) <EOL>