diff --git a/CMakeLists.txt b/CMakeLists.txt index faf12d4..f32d2de 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,18 +57,15 @@ target_link_libraries(pycdc pycxx) install(TARGETS pycdc RUNTIME DESTINATION bin) -# For tests -if(POLICY CMP0037) - # Don't complain about adding a target named "test" - cmake_policy(SET CMP0037 OLD) -endif() - -add_custom_target(test "${CMAKE_CURRENT_SOURCE_DIR}/pycdc_test.sh" +add_custom_target(dc_test "${CMAKE_CURRENT_SOURCE_DIR}/pycdc_test.sh" "${CMAKE_CURRENT_SOURCE_DIR}/tests" WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") -add_dependencies(test pycdc) +add_dependencies(dc_test pycdc) add_custom_target(rt_test "${CMAKE_CURRENT_SOURCE_DIR}/pycdc_rt_test.sh" "${CMAKE_CURRENT_BINARY_DIR}/tests" WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") -add_dependencies(rt_test test) +add_dependencies(rt_test dc_test) + +add_custom_target(check "${CMAKE_CURRENT_SOURCE_DIR}/tests/all_tests.sh" + WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") diff --git a/scripts/token_dump b/scripts/token_dump new file mode 100755 index 0000000..5ed0392 --- /dev/null +++ b/scripts/token_dump @@ -0,0 +1,251 @@ +#!/usr/bin/env python3 + +# Compare two python source files by tokens, ignoring whitespace (other than +# indentation) and comments + +import sys +import re + + +class PyToken: + INDENT = 1 + OUTDENT = 2 + ENDLINE = 3 + WORD = 100 + INT = 101 + FLOAT = 102 + STRING = 103 + + def __init__(self, type, n_line): + self.type = type + self.n_line = n_line + + def __str__(self): + if self.type == PyToken.INDENT: + return '' + if self.type == PyToken.OUTDENT: + return '' + if self.type == PyToken.ENDLINE: + return '' + return str(self.type) + + def __eq__(self, other): + return self.type == other.type + + +class WordToken(PyToken): + # We don't need to distinguish between keywords and other words, so + # we just lump them together in a single token type... + def __init__(self, word, n_line): + super().__init__(PyToken.WORD, n_line) + self.word = word + + def __str__(self): + return self.word + + def __eq__(self, other): + if not super().__eq__(other): + return False + return self.word == other.word + + +class IntToken(PyToken): + def __init__(self, value, n_line): + super().__init__(PyToken.INT, n_line) + try: + self.value = int(value.replace('_', ''), 0) + except ValueError: + # Support Python 2.x octal literals + if value.startswith('0'): + self.value = int(value.replace('_', ''), 8) + else: + raise + + def __str__(self): + return str(self.value) + + def __eq__(self, other): + if not super().__eq__(other): + return False + return self.value == other.value + + +class FloatToken(PyToken): + def __init__(self, value, n_line): + super().__init__(PyToken.FLOAT, n_line) + self.value = float(value.replace('_', '')) + + def __str__(self): + return str(self.value) + + def __eq__(self, other): + if not super().__eq__(other): + return False + # TODO: Might need some fuzz + return self.value == other.value + + +class StringToken(PyToken): + def __init__(self, prefix, quotes, line, n_line): + super().__init__(PyToken.STRING, n_line) + + # Normalize prefix for comparison + if prefix is None: + self.prefix = '' + else: + self.prefix = ''.join(sorted(prefix.lower())) + + # Look for the end of the string + self.endpos = len(self.prefix) + len(quotes) + scan = line[self.endpos:] + while True: + if scan[0] == '\\': + scan = scan[2:] + self.endpos += 2 + continue + if scan.startswith(quotes): + self.endpos += len(quotes) + break + scan = scan[1:] + self.endpos += 1 + + self.content = line[len(self.prefix) + len(quotes):self.endpos - len(quotes)] + + # TODO: Normalize special characters for comparison + self.content.replace("'", "\\'") + + def __str__(self): + return "{}'{}'".format(self.prefix, self.content) + + def __eq__(self, other): + if not super().__eq__(other): + return False + return self.prefix == other.prefix and self.content == other.content + + +RE_WHITESPACE = re.compile(r'\s+') +RE_WORD = re.compile(r'[A-Za-z_][A-Za-z0-9_]*') +RE_INT = re.compile(r'[0-9][0-9_]*|0[Xx][0-9A-Fa-f_]+|0[Bb][0-1_]+|0[Oo][0-7_]+') +RE_FLOAT = re.compile(r'(([0-9][0-9_]*)?\.[0-9][0-9_]*|[0-9][0-9_]*\.)([eE][+-]?[0-9][0-9_]*)?') +RE_START_STRING = re.compile(r'([rR][fFbB]?|[uU]|[fF][rR]?|[bB][rR]+)?(\'\'\'|\'|"""|")') + +# Note, tokens sharing a common prefix should be entered in order from +# longest to shortest, so we don't mismatch a long token as a sequence +# of shorter tokens +SYMBOLIC_TOKENS = ( + '<<=', '>>=', '**=', '//=', '...', '.', + '+=', '-=', '*=', '@=', '/=', '%=', '&=', '|=', '^=', + '<>', '<<', '<=', '<', '>>', '>=', '>', '!=', '==', '=', + ',', ';', ':=', ':', '->', '~', + '+', '-', '**', '*', '@', '//', '/', '%', '&', '|', '^', + '(', ')', '{', '}', '[', ']', +) +def symbolic_token(line, n_line): + for tok in SYMBOLIC_TOKENS: + if line.startswith(tok): + return PyToken(tok, n_line) + return None + + +def read_tokens(pysrc): + indent_stack = [0] + context_stack = [] + n_line = 0 + + while True: + line = pysrc.readline() + n_line += 1 + if not line: + break + + sline = line.strip() + if not sline or sline.startswith('#'): + continue + + # Look for indentation changes + if len(context_stack) == 0: + indent = len(line) - len(line.lstrip()) + if indent > indent_stack[-1]: + indent_stack.append(indent) + yield PyToken(PyToken.INDENT, n_line) + while indent < indent_stack[-1]: + indent_stack.pop() + yield PyToken(PyToken.OUTDENT, n_line) + if indent != indent_stack[-1]: + raise RuntimeError('Incorrect indentation on line {}'.format(n_line)) + + while sline: + idx = 0 + while sline[idx].isspace(): + idx += 1 + sline = sline[idx:] + + token = symbolic_token(sline, n_line) + if token: + if token.type in {'(', '{', '['}: + context_stack.append(token.type) + elif token.type == ')': + if len(context_stack) == 0 or context_stack[-1] != '(': + raise RuntimeError('Mismatched token at {} on line {}'.format(sline, n_line)) + context_stack.pop() + elif token.type == '}': + if len(context_stack) == 0 or context_stack[-1] != '{': + raise RuntimeError('Mismatched token at {} on line {}'.format(sline, n_line)) + context_stack.pop() + elif token.type == ']': + if len(context_stack) == 0 or context_stack[-1] != '[': + raise RuntimeError('Mismatched token at {} on line {}'.format(sline, n_line)) + context_stack.pop() + yield token + sline = sline[len(token.type):] + continue + + match = RE_FLOAT.match(sline) + if match: + yield FloatToken(match.group(), n_line) + sline = sline[match.end():] + continue + + match = RE_INT.match(sline) + if match: + yield IntToken(match.group(), n_line) + sline = sline[match.end():] + continue + + match = RE_START_STRING.match(sline) + if match: + token = StringToken(match.group(1), match.group(2), sline, n_line) + yield token + sline = sline[token.endpos:] + continue + + match = RE_WORD.match(sline) + if match: + yield WordToken(match.group(), n_line) + sline = sline[match.end():] + continue + + print('Error: Unrecognized tokens: "{}" at line {}'.format(sline, n_line)) + sys.exit(1) + + if len(context_stack) == 0: + yield PyToken(PyToken.ENDLINE, n_line) + + +if __name__ == '__main__': + if '--help' in sys.argv: + print('Usage: token_dump .py') + sys.exit(0) + + if len(sys.argv) >= 2: + pysrc = open(sys.argv[1], 'r') + else: + pysrc = sys.stdin + + for tok in read_tokens(pysrc): + if tok.type in {PyToken.ENDLINE, PyToken.INDENT, PyToken.OUTDENT}: + print(tok) + else: + print(tok, end=' ') + + pysrc.close() diff --git a/tests/all_tests.sh b/tests/all_tests.sh new file mode 100755 index 0000000..ae8f057 --- /dev/null +++ b/tests/all_tests.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +srcdir="$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. && pwd)" + +test_files=( "$srcdir"/tests/tokenized/*.txt ) +for tf in "${test_files[@]}"; do + test_name="$(basename "$tf")" + test_name="${test_name%.txt}" + "$srcdir"/tests/decompyle_test.sh $test_name tests +done diff --git a/tests/compiled/simple_const.1.0.pyc b/tests/compiled/simple_const.1.0.pyc new file mode 100644 index 0000000..b6997b6 Binary files /dev/null and b/tests/compiled/simple_const.1.0.pyc differ diff --git a/tests/compiled/simple_const.1.1.pyc b/tests/compiled/simple_const.1.1.pyc new file mode 100644 index 0000000..fa93803 Binary files /dev/null and b/tests/compiled/simple_const.1.1.pyc differ diff --git a/tests/compiled/simple_const.1.2.pyc b/tests/compiled/simple_const.1.2.pyc new file mode 100644 index 0000000..1d91a84 Binary files /dev/null and b/tests/compiled/simple_const.1.2.pyc differ diff --git a/tests/compiled/simple_const.1.3.pyc b/tests/compiled/simple_const.1.3.pyc new file mode 100644 index 0000000..6526c59 Binary files /dev/null and b/tests/compiled/simple_const.1.3.pyc differ diff --git a/tests/compiled/simple_const.1.4.pyc b/tests/compiled/simple_const.1.4.pyc new file mode 100644 index 0000000..191a417 Binary files /dev/null and b/tests/compiled/simple_const.1.4.pyc differ diff --git a/tests/compiled/simple_const.1.5.pyc b/tests/compiled/simple_const.1.5.pyc new file mode 100644 index 0000000..4760828 Binary files /dev/null and b/tests/compiled/simple_const.1.5.pyc differ diff --git a/tests/compiled/simple_const.1.6.pyc b/tests/compiled/simple_const.1.6.pyc new file mode 100644 index 0000000..c7717dd Binary files /dev/null and b/tests/compiled/simple_const.1.6.pyc differ diff --git a/tests/compiled/simple_const.2.0.pyc b/tests/compiled/simple_const.2.0.pyc new file mode 100644 index 0000000..0a33284 Binary files /dev/null and b/tests/compiled/simple_const.2.0.pyc differ diff --git a/tests/compiled/simple_const.2.1.pyc b/tests/compiled/simple_const.2.1.pyc new file mode 100644 index 0000000..0f293b0 Binary files /dev/null and b/tests/compiled/simple_const.2.1.pyc differ diff --git a/tests/compiled/simple_const.2.2.pyc b/tests/compiled/simple_const.2.2.pyc new file mode 100644 index 0000000..4a11c0f Binary files /dev/null and b/tests/compiled/simple_const.2.2.pyc differ diff --git a/tests/compiled/simple_const.2.3.pyc b/tests/compiled/simple_const.2.3.pyc new file mode 100644 index 0000000..7e79622 Binary files /dev/null and b/tests/compiled/simple_const.2.3.pyc differ diff --git a/tests/compiled/simple_const.2.4.pyc b/tests/compiled/simple_const.2.4.pyc new file mode 100644 index 0000000..74a69b5 Binary files /dev/null and b/tests/compiled/simple_const.2.4.pyc differ diff --git a/tests/compiled/simple_const.2.5.pyc b/tests/compiled/simple_const.2.5.pyc new file mode 100644 index 0000000..64f5dc6 Binary files /dev/null and b/tests/compiled/simple_const.2.5.pyc differ diff --git a/tests/compiled/simple_const.2.6.pyc b/tests/compiled/simple_const.2.6.pyc new file mode 100644 index 0000000..a3364a6 Binary files /dev/null and b/tests/compiled/simple_const.2.6.pyc differ diff --git a/tests/compiled/simple_const.2.7.pyc b/tests/compiled/simple_const.2.7.pyc new file mode 100644 index 0000000..c78e878 Binary files /dev/null and b/tests/compiled/simple_const.2.7.pyc differ diff --git a/tests/compiled/simple_const.3.0.pyc b/tests/compiled/simple_const.3.0.pyc new file mode 100644 index 0000000..6976883 Binary files /dev/null and b/tests/compiled/simple_const.3.0.pyc differ diff --git a/tests/compiled/simple_const.3.1.pyc b/tests/compiled/simple_const.3.1.pyc new file mode 100644 index 0000000..f2dfde6 Binary files /dev/null and b/tests/compiled/simple_const.3.1.pyc differ diff --git a/tests/compiled/simple_const.3.2.pyc b/tests/compiled/simple_const.3.2.pyc new file mode 100644 index 0000000..b509841 Binary files /dev/null and b/tests/compiled/simple_const.3.2.pyc differ diff --git a/tests/compiled/simple_const.3.3.pyc b/tests/compiled/simple_const.3.3.pyc new file mode 100644 index 0000000..d8f19bf Binary files /dev/null and b/tests/compiled/simple_const.3.3.pyc differ diff --git a/tests/compiled/simple_const.3.4.pyc b/tests/compiled/simple_const.3.4.pyc new file mode 100644 index 0000000..72e40f9 Binary files /dev/null and b/tests/compiled/simple_const.3.4.pyc differ diff --git a/tests/compiled/simple_const.3.5.pyc b/tests/compiled/simple_const.3.5.pyc new file mode 100644 index 0000000..93e2eac Binary files /dev/null and b/tests/compiled/simple_const.3.5.pyc differ diff --git a/tests/compiled/simple_const.3.6.pyc b/tests/compiled/simple_const.3.6.pyc new file mode 100644 index 0000000..0db4096 Binary files /dev/null and b/tests/compiled/simple_const.3.6.pyc differ diff --git a/tests/compiled/simple_const.3.7.pyc b/tests/compiled/simple_const.3.7.pyc new file mode 100644 index 0000000..7da3ad5 Binary files /dev/null and b/tests/compiled/simple_const.3.7.pyc differ diff --git a/tests/decompyle_test.sh b/tests/decompyle_test.sh new file mode 100755 index 0000000..1a8f71a --- /dev/null +++ b/tests/decompyle_test.sh @@ -0,0 +1,61 @@ +#!/bin/bash + +srcdir="$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. && pwd)" +testdir="$srcdir/tests" +testname="$1" +outdir="$2" + +if [[ -z "$testname" ]]; then + echo "Missing required parameter: testname" >&2 + exit 1 +fi +if [[ -z "$outdir" ]]; then + echo "Missing required parameter: outdir" >&2 + exit 1 +fi + +shopt -s nullglob +compfiles=( "$testdir/compiled/$testname".?.?.pyc ) +shopt -u nullglob + +if (( ${#compfiles[@]} == 0 )); then + echo "No compiled modules found for compiled/$testname.*.pyc" + exit 1 +fi + +mkdir -p "$outdir" + +fails=0 +for pyc in "${compfiles[@]}"; do + base="$outdir/$(basename "$pyc")" + + echo -ne "\033[1m*** $(basename "$pyc"):\033[0m " + + ./pycdc "$pyc" 2>"$base.err" 1>"$base.src.py" + if (( $? )) || [[ -s "$base.err" ]] + then + let fails+=1 + echo -e "\033[31mFAIL\033[m" + cat "$base.err" + continue + fi + + "$srcdir"/scripts/token_dump "$base.src.py" 2>"$base.tok.err" 1>"$base.tok.txt" + if (( $? )) || [[ -s "$base.tok.err" ]] + then + let fails+=1 + echo -e "\033[31mFAIL\033[m" + cat "$base.tok.err" + continue + fi + + if ! diff "$base.tok.txt" "$testdir/tokenized/$testname.txt" >/dev/null + then + let fails+=1 + echo -e "\033[31mFAIL\033[m" + echo "$base.tok.txt does not match $testdir/tokenized/$testname.txt" + continue + fi + + echo -e "\033[32mPASS\033[m" +done diff --git a/tests/input/simple_const.py b/tests/input/simple_const.py new file mode 100644 index 0000000..074c67e --- /dev/null +++ b/tests/input/simple_const.py @@ -0,0 +1,12 @@ +# Description: Simple constants applicable to any Python version +# Valid Pythons: all + +# Note: Python 1.0 will insert an implied "print" statement into each line + +a = 42 +b = 3.14159 +c = "test" +d = (1, 2) +e = (3,) +f = [1, 2] +g = {'key': 42} diff --git a/tests/tokenized/simple_const.txt b/tests/tokenized/simple_const.txt new file mode 100644 index 0000000..378d0c2 --- /dev/null +++ b/tests/tokenized/simple_const.txt @@ -0,0 +1,7 @@ +a = 42 +b = 3.14159 +c = 'test' +d = ( 1 , 2 ) +e = ( 3 , ) +f = [ 1 , 2 ] +g = { 'key' : 42 }