# -*- coding: utf-8 -*-
__doc__ = """\
compression.py - A module providing functions to turn a python script into a
self-executing archive in a few different formats...
**gz_pack format:**
- Typically provides better compression than bzip2 (for Python scripts).
- Scripts compressed via this method can still be imported as modules.
- The resulting binary data is base64-encoded which isn't optimal compression.
**bz2_pack format:**
- In some cases may provide better compression than gzip.
- Scripts compressed via this method can still be imported as modules.
- The resulting binary data is base64-encoded which isn't optimal compression.
**lzma_pack format:**
- In some cases may provide better compression than bzip2.
- Scripts compressed via this method can still be imported as modules.
- The resulting binary data is base64-encoded which isn't optimal compression.
The gz_pack, bz2_pack, and lzma_pack formats only work on individual .py
files. To pack a number of files at once using this method use the
``--destdir`` command line option:
.. code-block: shell
$ pyminifier --gzip --destdir=/tmp/minified *.py
**zip_pack format:**
- Provides the best compression of Python scripts.
- Resulting script cannot be imported as a module.
- Any required modules that are local (implied path) will be automatically included in the archive.
"""
# Import standard library modules
import os, sys, tempfile, shutil
# Import our own supporting modules
from . import analyze, token_utils, minification, obfuscate
py3 = False
if sys.version_info.major == 3:
py3 = True
[docs]def bz2_pack(source):
"""
Returns 'source' as a bzip2-compressed, self-extracting python script.
.. note::
This method uses up more space than the zip_pack method but it has the
advantage in that the resulting .py file can still be imported into a
python program.
"""
import bz2, base64
out = ""
# Preserve shebangs (don't care about encodings for this)
first_line = source.split('\n')[0]
if analyze.shebang.match(first_line):
if py3:
if first_line.rstrip().endswith('python'): # Make it python3
first_line = first_line.rstrip()
first_line += '3' #!/usr/bin/env python3
out = first_line + '\n'
compressed_source = bz2.compress(source.encode('utf-8'))
out += 'import bz2, base64\n'
out += "exec(bz2.decompress(base64.b64decode('"
out += base64.b64encode(compressed_source).decode('utf-8')
out += "')))\n"
return out
[docs]def gz_pack(source):
"""
Returns 'source' as a gzip-compressed, self-extracting python script.
.. note::
This method uses up more space than the zip_pack method but it has the
advantage in that the resulting .py file can still be imported into a
python program.
"""
import zlib, base64
out = ""
# Preserve shebangs (don't care about encodings for this)
first_line = source.split('\n')[0]
if analyze.shebang.match(first_line):
if py3:
if first_line.rstrip().endswith('python'): # Make it python3
first_line = first_line.rstrip()
first_line += '3' #!/usr/bin/env python3
out = first_line + '\n'
compressed_source = zlib.compress(source.encode('utf-8'))
out += 'import zlib, base64\n'
out += "exec(zlib.decompress(base64.b64decode('"
out += base64.b64encode(compressed_source).decode('utf-8')
out += "')))\n"
return out
[docs]def lzma_pack(source):
"""
Returns 'source' as a lzma-compressed, self-extracting python script.
.. note::
This method uses up more space than the zip_pack method but it has the
advantage in that the resulting .py file can still be imported into a
python program.
"""
import lzma, base64
out = ""
# Preserve shebangs (don't care about encodings for this)
first_line = source.split('\n')[0]
if analyze.shebang.match(first_line):
if py3:
if first_line.rstrip().endswith('python'): # Make it python3
first_line = first_line.rstrip()
first_line += '3' #!/usr/bin/env python3
out = first_line + '\n'
compressed_source = lzma.compress(source.encode('utf-8'))
out += 'import lzma, base64\n'
out += "exec(lzma.decompress(base64.b64decode('"
out += base64.b64encode(compressed_source).decode('utf-8')
out += "')))\n"
return out
[docs]def prepend(line, path):
"""
Appends *line* to the _beginning_ of the file at the given *path*.
If *line* doesn't end in a newline one will be appended to the end of it.
"""
if isinstance(line, str):
line = line.encode('utf-8')
if not line.endswith(b'\n'):
line += b'\n'
temp = tempfile.NamedTemporaryFile('wb')
temp_name = temp.name # We really only need a random path-safe name
temp.close()
with open(temp_name, 'wb') as temp:
temp.write(line)
with open(path, 'rb') as r:
temp.write(r.read())
# Now replace the original with the modified version
shutil.move(temp_name, path)
[docs]def zip_pack(filepath, options):
"""
Creates a zip archive containing the script at *filepath* along with all
imported modules that are local to *filepath* as a self-extracting python
script. A shebang will be appended to the beginning of the resulting
zip archive which will allow it to
If being run inside Python 3 and the `lzma` module is available the
resulting 'pyz' file will use ZIP_LZMA compression to maximize compression.
*options* is expected to be the the same options parsed from pyminifier.py
on the command line.
.. note::
* The file resulting from this method cannot be imported as a module into another python program (command line execution only).
* Any required local (implied path) modules will be automatically included (well, it does its best).
* The result will be saved as a .pyz file (which is an extension I invented for this format).
"""
import zipfile
# Hopefully some day we'll be able to use ZIP_LZMA too as the compression
# format to save even more space...
compression_format = zipfile.ZIP_DEFLATED
cumulative_size = 0 # For tracking size reduction stats
# Record the filesize for later comparison
cumulative_size += os.path.getsize(filepath)
dest = options.pyz
z = zipfile.ZipFile(dest, "w", compression_format)
# Take care of minifying our primary script first:
source = open(filepath).read()
primary_tokens = token_utils.listified_tokenizer(source)
# Preserve shebangs (don't care about encodings for this)
shebang = analyze.get_shebang(primary_tokens)
if not shebang:
# We *must* have a shebang for this to work so make a conservative default:
shebang = "#!/usr/bin/env python"
if py3:
if shebang.rstrip().endswith('python'): # Make it python3 (to be safe)
shebang = shebang.rstrip()
shebang += '3\n' #!/usr/bin/env python3
if not options.nominify: # Minify as long as we don't have this option set
source = minification.minify(primary_tokens, options)
# Write out to a temporary file to add to our zip
temp = tempfile.NamedTemporaryFile(mode='w')
temp.write(source)
temp.flush()
# Need the path where the script lives for the next steps:
path = os.path.split(filepath)[0]
if not path:
path = os.getcwd()
main_py = path + '/__main__.py'
if os.path.exists(main_py):
# There's an existing __main__.py, use it
z.write(main_py, '__main__.py')
z.write(temp.name, os.path.split(filepath)[1])
else:
# No __main__.py so we rename our main script to be the __main__.py
# This is so it will still execute as a zip
z.write(filepath, '__main__.py')
temp.close()
# Now write any required modules into the zip as well
local_modules = analyze.enumerate_local_modules(primary_tokens, path)
name_generator = None # So we can tell if we need to obfuscate
if options.obfuscate or options.obf_classes \
or options.obf_functions or options.obf_variables \
or options.obf_builtins or options.obf_import_methods:
# Put together that will be used for all obfuscation functions:
identifier_length = int(options.replacement_length)
if options.use_nonlatin:
if sys.version_info[0] == 3:
name_generator = obfuscate.obfuscation_machine(
use_unicode=True, count=identifier_length
)
else:
print(
"ERROR: You can't use nonlatin characters without Python 3")
sys.exit(2)
else:
name_generator = obfuscate.obfuscation_machine(
count=identifier_length)
table =[{}]
included_modules = []
for module in local_modules:
module = module.replace('.', '/')
module = "%s.py" % module
# Add the filesize to our total
cumulative_size += os.path.getsize(module)
# Also record that we've added it to the archive
included_modules.append(module)
# Minify these files too
source = open(os.path.join(path, module)).read()
tokens = token_utils.listified_tokenizer(source)
maybe_more_modules = analyze.enumerate_local_modules(tokens, path)
for mod in maybe_more_modules:
if mod not in local_modules:
local_modules.append(mod) # Extend the current loop, love it =)
if not options.nominify:
# Perform minification (this also handles obfuscation)
source = minification.minify(tokens, options)
# Have to re-tokenize for obfucation (it's quick):
tokens = token_utils.listified_tokenizer(source)
# Perform obfuscation if any of the related options were set
if name_generator:
obfuscate.obfuscate(
module,
tokens,
options,
name_generator=name_generator,
table=table
)
# Convert back to text
result = token_utils.untokenize(tokens)
result += (
"# Created by pyminifier "
"(https://github.com/liftoff/pyminifier)\n")
# Write out to a temporary file to add to our zip
temp = tempfile.NamedTemporaryFile(mode='w')
temp.write(source)
temp.flush()
z.write(temp.name, module)
temp.close()
z.close()
# Finish up by writing the shebang to the beginning of the zip
prepend(shebang, dest)
os.chmod(dest, 0o755) # Make it executable (since we added the shebang)
pyz_filesize = os.path.getsize(dest)
percent_saved = round(float(pyz_filesize) / float(cumulative_size) * 100, 2)
print('%s saved as compressed executable zip: %s' % (filepath, dest))
print('The following modules were automatically included (as automagic '
'dependencies):\n')
for module in included_modules:
print('\t%s' % module)
print('\nOverall size reduction: %s%% of original size' % percent_saved)