Source code for pyminifier.analyze

# -*- coding: utf-8 -*-

__doc__ = """\
A module of useful functions for analyzing Python code.
"""

# Import builtins
import os, sys, re, tokenize, keyword
try:
    import cStringIO as io
except ImportError: # Ahh, Python 3
    import io

# Globals
py3 = False
if sys.version_info.major == 3:
    py3 = True
shebang = re.compile('^#\!.*$')
encoding = re.compile(".*coding[:=]\s*([-\w.]+)")
# __builtins__ is different for every module so we need a hard-coded list:
builtins = [
    'ArithmeticError',
    'AssertionError',
    'AttributeError',
    'BaseException',
    'BufferError',
    'BytesWarning',
    'DeprecationWarning',
    'EOFError',
    'Ellipsis',
    'EnvironmentError',
    'Exception',
    'False',
    'FloatingPointError',
    'FutureWarning',
    'GeneratorExit',
    'IOError',
    'ImportError',
    'ImportWarning',
    'IndentationError',
    'IndexError',
    'KeyError',
    'KeyboardInterrupt',
    'LookupError',
    'MemoryError',
    'NameError',
    'None',
    'NotImplemented',
    'NotImplementedError',
    'OSError',
    'OverflowError',
    'PendingDeprecationWarning',
    'ReferenceError',
    'RuntimeError',
    'RuntimeWarning',
    'StandardError',
    'StopIteration',
    'SyntaxError',
    'SyntaxWarning',
    'SystemError',
    'SystemExit',
    'TabError',
    'True',
    'TypeError',
    'UnboundLocalError',
    'UnicodeDecodeError',
    'UnicodeEncodeError',
    'UnicodeError',
    'UnicodeTranslateError',
    'UnicodeWarning',
    'UserWarning',
    'ValueError',
    'Warning',
    'ZeroDivisionError',
    '__IPYTHON__',
    '__IPYTHON__active',
    '__debug__',
    '__doc__',
    '__import__',
    '__name__',
    '__package__',
    'abs',
    'all',
    'any',
    'apply',
    'basestring',
    'bin',
    'bool',
    'buffer',
    'bytearray',
    'bytes',
    'callable',
    'chr',
    'classmethod',
    'cmp',
    'coerce',
    'compile',
    'complex',
    'copyright',
    'credits',
    'delattr',
    'dict',
    'dir',
    'divmod',
    'dreload',
    'enumerate',
    'eval',
    'execfile',
    'exit',
    'file',
    'filter',
    'float',
    'format',
    'frozenset',
    'getattr',
    'globals',
    'hasattr',
    'hash',
    'help',
    'hex',
    'id',
    'input',
    'int',
    'intern',
    'ip_set_hook',
    'ipalias',
    'ipmagic',
    'ipsystem',
    'isinstance',
    'issubclass',
    'iter',
    'jobs',
    'len',
    'license',
    'list',
    'locals',
    'long',
    'map',
    'max',
    'min',
    'next',
    'object',
    'oct',
    'open',
    'ord',
    'pow',
    'print',
    'property',
    'quit',
    'range',
    'raw_input',
    'reduce',
    'reload',
    'repr',
    'reversed',
    'round',
    'set',
    'setattr',
    'slice',
    'sorted',
    'staticmethod',
    'str',
    'sum',
    'super',
    'tuple',
    'type',
    'unichr',
    'unicode',
    'vars',
    'xrange',
    'zip'
]

reserved_words = keyword.kwlist + builtins

[docs]def enumerate_keyword_args(tokens): """ Iterates over *tokens* and returns a dictionary with function names as the keys and lists of keyword arguments as the values. """ keyword_args = {} inside_function = False for index, tok in enumerate(tokens): token_type = tok[0] token_string = tok[1] if token_type == tokenize.NEWLINE: inside_function = False if token_type == tokenize.NAME: if token_string == "def": function_name = tokens[index+1][1] inside_function = function_name keyword_args.update({function_name: []}) elif inside_function: if tokens[index+1][1] == '=': # keyword argument keyword_args[function_name].append(token_string) return keyword_args
[docs]def enumerate_imports(tokens): """ Iterates over *tokens* and returns a list of all imported modules. **Note:** This is intelligent about the use of the 'as' keyword. """ imported_modules = [] import_line = False for index, tok in enumerate(tokens): token_type = tok[0] token_string = tok[1] if token_type == tokenize.NEWLINE: import_line = False elif token_string == "import": import_line = True elif import_line: if token_type == tokenize.NAME and tokens[index+1][1] != 'as': if token_string not in reserved_words: if token_string not in imported_modules: imported_modules.append(token_string) return imported_modules
[docs]def enumerate_global_imports(tokens): """ Returns a list of all globally imported modules (skips modules imported inside of classes, methods, or functions). Example: >>> enumerate_global_modules(tokens) ['sys', 'os', 'tokenize', 're'] """ imported_modules = [] import_line = False parent_module = "" function_count = 0 indentation = 0 for index, tok in enumerate(tokens): token_type = tok[0] token_string = tok[1] if token_type == tokenize.INDENT: indentation += 1 elif token_type == tokenize.DEDENT: indentation -= 1 elif token_type == tokenize.NEWLINE: import_line = False elif token_type == tokenize.NAME: if token_string in ["def", "class"]: function_count += 1 if indentation == function_count - 1: function_count -= 1 elif function_count >= indentation: if token_string == "import": import_line = True elif import_line: if token_type == tokenize.NAME and tokens[index+1][1] != 'as': if token_string not in reserved_words: if token_string not in imported_modules: if tokens[index+1][1] == '.': # module.module parent_module = token_string + '.' else: if parent_module: module_string = parent_module + token_string imported_modules.append(module_string) parent_module = '' else: imported_modules.append(token_string) return imported_modules # TODO: Finish this (even though it isn't used):
[docs]def enumerate_dynamic_imports(tokens): """ Returns a dictionary of all dynamically imported modules (those inside of classes or functions) in the form of {<func or class name>: [<modules>]} Example: >>> enumerate_dynamic_modules(tokens) {'myfunc': ['zlib', 'base64']} """ imported_modules = [] import_line = False for index, tok in enumerate(tokens): token_type = tok[0] token_string = tok[1] if token_type == tokenize.NEWLINE: import_line = False elif token_string == "import": try: if tokens[index-1][0] == tokenize.NEWLINE: import_line = True except IndexError: import_line = True # Just means this is the first line elif import_line: if token_type == tokenize.NAME and tokens[index+1][1] != 'as': if token_string not in reserved_words: if token_string not in imported_modules: imported_modules.append(token_string) return imported_modules
[docs]def enumerate_method_calls(tokens, modules): """ Returns a list of all object (not module) method calls in the given tokens. *modules* is expected to be a list of all global modules imported into the source code we're working on. For example: >>> enumerate_method_calls(tokens) ['re.compile', 'sys.argv', 'f.write'] """ out = [] for index, tok in enumerate(tokens): token_type = tok[0] token_string = tok[1] if token_type == tokenize.NAME: next_tok_string = tokens[index+1][1] if next_tok_string == '(': # Method call prev_tok_string = tokens[index-1][1] # Check if we're attached to an object or module if prev_tok_string == '.': # We're attached prev_prev_tok_string = tokens[index-2][1] if prev_prev_tok_string not in ['""',"''", ']', ')', '}']: if prev_prev_tok_string not in modules: to_replace = "%s.%s" % ( prev_prev_tok_string, token_string) if to_replace not in out: out.append(to_replace) return out
[docs]def enumerate_builtins(tokens): """ Returns a list of all the builtins being used in *tokens*. """ out = [] for index, tok in enumerate(tokens): token_type = tok[0] token_string = tok[1] #if token_type == tokenize.NAME: if token_string in builtins: # Note: I need to test if print can be replaced in Python 3 special_special = ['print'] # Print is special in Python 2 if py3: special_special = [] if token_string not in special_special: if not token_string.startswith('__'): # Don't count magic funcs if tokens[index-1][1] != '.' and tokens[index+1][1] != '=': if token_string not in out: out.append(token_string) return out
[docs]def enumerate_import_methods(tokens): """ Returns a list of imported module methods (such as re.compile) inside *tokens*. """ global_imports = enumerate_global_imports(tokens) out = [] for item in global_imports: for index, tok in enumerate(tokens): try: next_tok = tokens[index+1] try: next_next_tok = tokens[index+2] except IndexError: # Pretend it is a newline next_next_tok = (54, '\n', (1, 1), (1, 2), '#\n') except IndexError: # Last token, no biggie # Pretend it is a newline here too next_tok = (54, '\n', (1, 1), (1, 2), '#\n') token_type = tok[0] token_string = tok[1] if token_string == item: if next_tok[1] == '.': # We're calling a method module_method = "%s.%s" % (token_string, next_next_tok[1]) if module_method not in out: out.append(module_method) return out
[docs]def enumerate_local_modules(tokens, path): """ Returns a list of modules inside *tokens* that are local to *path*. **Note:** Will recursively look inside *path* for said modules. """ # Have to get a list of all modules before we can do anything else modules = enumerate_imports(tokens) local_modules = [] parent = "" # Now check the local dir for matching modules for root, dirs, files in os.walk(path): if not parent: parent = os.path.split(root)[1] for f in files: if f.endswith('.py'): f = f[:-3] # Strip .py module_tree = root.split(parent)[1].replace('/', '.') module_tree = module_tree.lstrip('.') if module_tree: module = "%s.%s" % (module_tree, f) else: module = f if module in modules: local_modules.append(module) return local_modules
[docs]def get_shebang(tokens): """ Returns the shebang string in *tokens* if it exists. None if not. """ # This (short) loop preserves shebangs and encoding strings: for tok in tokens[0:4]: # Will always be in the first four tokens line = tok[4] # Save the first comment line if it starts with a shebang # (e.g. '#!/usr/bin/env python') if shebang.match(line): # Must be first line return line