Source code for woff_info

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
#       Copyright 2013 Liftoff Software Corporation

# Meta
__version__ = '1.0'
__version_info__ = (1, 0)
__license__ = "AGPLv3 or Proprietary (see LICENSE.txt)"

__doc__ = """\
.. _woff_info.py:

Provides a number of functions that can be used to extract the 'name' data from
.woff (web font) files.

.. note::

    In most cases .woff files have the metadata stripped (to save space) which
    is why this module only grabs the 'name' records from the snft (font data)
    tables.

Example::

    >>> from pprint import pprint
    >>> from woff_info import woff_name_data
    >>> woff_path = '/opt/gateone/applications/terminal/static/fonts/ubuntumono-normal.woff'
    >>> pprint(woff_info(woff_path))
    {'Compatible Full': 'Ubuntu Mono',
    'Copyright': 'Copyright 2011 Canonical Ltd.  Licensed under the Ubuntu Font Licence 1.0',
    'Designer': 'Dalton Maag Ltd',
    'Designer URL': 'http://www.daltonmaag.com/',
    'Font Family': 'Ubuntu Mono',
    'Font Subfamily': 'Regular',
    'Full Name': 'Ubuntu Mono',
    'ID': 'Ubuntu Mono Regular Version 0.80',
    'Manufacturer': 'Dalton Maag Ltd',
    'Postscript Name': 'UbuntuMono-Regular',
    'Preferred Family': 'Ubuntu Mono',
    'Preferred Subfamily': 'Regular',
    'Trademark': 'Ubuntu and Canonical are registered trademarks of Canonical Ltd.',
    'Vendor URL': 'http://www.daltonmaag.com/',
    'Version': 'Version 0.80'}

This script can also be executed on the command line to display the name
information for any given WOFF file:

.. ansi-block::

    \x1b[1;34muser\x1b[0m@modern-host\x1b[1;34m:~ $\x1b[0m ./woff_info static/fonts/ubuntumono-normal.woff
    {
        "Compatible Full": "Ubuntu Mono",
        "Copyright": "Copyright 2011 Canonical Ltd.  Licensed under the Ubuntu Font Licence 1.0",
        "Designer": "Dalton Maag Ltd",
        "Designer URL": "http://www.daltonmaag.com/",
        "Font Family": "Ubuntu Mono",
        "Font Subfamily": "Regular",
        "Full Name": "Ubuntu Mono",
        "ID": "Ubuntu Mono Regular Version 0.80",
        "Manufacturer": "Dalton Maag Ltd",
        "Postscript Name": "UbuntuMono-Regular",
        "Preferred Family": "Ubuntu Mono",
        "Preferred Subfamily": "Regular",
        "Trademark": "Ubuntu and Canonical are registered trademarks of Canonical Ltd.",
        "Vendor URL": "http://www.daltonmaag.com/",
        "Version": "Version 0.80"
    }

..note::

    The command line output is JSON so it can be easily used by other programs.
"""

import sys, struct, zlib, functools

def memoize(obj):
    cache = obj.cache = {}
    @functools.wraps(obj)
    def memoizer(*args, **kwargs):
        key = str(args) + str(kwargs)
        if key not in cache:
            cache[key] = obj(*args, **kwargs)
        return cache[key]
    return memoizer

# Try using Gate One's memoize decorator (with self-expiry!)
try:
    from gateone.core.utils import memoize
except ImportError:
    pass # No big, use the one above

# Globals
ENCODING_MAP = {
    0: 'ascii',
    1: 'latin-1',
    2: 'iso-8859-1'
}

NAME_ID_MAP = { # For human-readable names
    0: u"Copyright",
    1: u"Font Family",
    2: u"Font Subfamily",
    3: u"ID",
    4: u"Full Name",
    5: u"Version",
    6: u"Postscript Name",
    7: u"Trademark",
    8: u"Manufacturer",
    9: u"Designer",
    10: u"Description",
    11: u"Vendor URL",
    12: u"Designer URL",
    13: u"License Description",
    14: u"License URL",
    15: u"Reserved",
    16: u"Preferred Family",
    17: u"Preferred Subfamily",
    18: u"Compatible Full",
    19: u"Sample Text",
    20: u"Postscript CID",
    21: u"WWS Family Name",
    22: u"WWS Subfamily Name",
    #200: u"???" # Liberation Mono uses this, "Webfont 1.0" is the value but
    # what is ID 200 supposed to be?  Webfont version?
    #201: u"???" # Liberation Mono also uses this.  Looks like a date of some
    # sort.  Creation date, perhaps?
}

NAME_HEADER_FORMAT = """
    format:         H
    count:          H
    offset:         H
"""

NAME_RECORD_FORMAT = """
    platform_id:    H
    encoding:       H
    language:       H
    name_id:        H
    length:         H
    offset:         H
"""

HEADER_FORMAT = """
    signature:      4s
    flavor:         4s
    length:         L
    numTables:      H
    reserved:       H
    totalSfntSize:  L
    majorVersion:   H
    minorVersion:   H
    metaOffset:     L
    metaLength:     L
    metaOrigLength: L
    privOffset:     L
    privLength:     L
"""

DIRECTORY_FORMAT = """
    tag:            4s
    offset:         L
    compLength:     L
    origLength:     L
    origChecksum:   L
"""

[docs]class BadWoff(Exception): """ Raised when the name data cannot be extracted from a a .woff file (for whatever reason). """ pass
# Much of this code was copied from the W3C WOFF validator.py: # http://dev.w3.org/webfonts/WOFF/tools/validator/ # ...which is covered by the W3C's own MIT-like license: # http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231 # This was inspired by Just van Rossum's sstruct module. # http://fonttools.svn.sourceforge.net/svnroot/fonttools/trunk/Lib/sstruct.py def struct_unpack(format, data): keys, format_string = _struct_get_format(format) size = struct.calcsize(format_string) values = struct.unpack(format_string, data[:size]) unpacked = {} for index, key in enumerate(keys): value = values[index] unpacked[key] = value return unpacked, data[size:] def struct_calc_size(format): keys, format_string = _struct_get_format(format) return struct.calcsize(format_string) _struct_format_cache = {} def _struct_get_format(format): if format not in _struct_format_cache: keys = [] format_string = [">"] # always big endian for line in format.strip().splitlines(): line = line.split("#", 1)[0].strip() if not line: continue key, format_char = line.split(":") key = key.strip() format_char = format_char.strip() keys.append(key) format_string.append(format_char) _struct_format_cache[format] = (keys, "".join(format_string)) return _struct_format_cache[format] HEADER_SIZE = struct_calc_size(HEADER_FORMAT) def unpack_header(data): return struct_unpack(HEADER_FORMAT, data)[0] def unpack_directory(data): header = unpack_header(data) numTables = header["numTables"] data = data[HEADER_SIZE:] directory = [] for index in range(numTables): table, data = struct_unpack(DIRECTORY_FORMAT, data) directory.append(table) return directory def unpack_table_data(data): directory = unpack_directory(data) tables = {} for entry in directory: tag = entry["tag"] offset = entry["offset"] origLength = entry["origLength"] compLength = entry["compLength"] if offset > len(data) or offset < 0 or (offset + compLength) < 0: tableData = "" elif offset + compLength > len(data): tableData = data[offset:] else: tableData = data[offset:offset+compLength] if compLength < origLength: try: td = zlib.decompress(tableData) tableData = td except zlib.error: tableData = None tables[tag] = tableData return tables def unpack_name_data(data): header, remaining_data = struct_unpack(NAME_HEADER_FORMAT, data) count = header["count"] storage_offset = header["offset"] name_records = [] for index in range(count): record, remaining_data = struct_unpack( NAME_RECORD_FORMAT, remaining_data) # Add the strings to the table offset = storage_offset + record['offset'] end = offset + record['length'] # Remove any null chars from the string (they can have lots) record['string'] = data[offset:end].replace(b'\x00', b'') # Now make sure the string is unicode encoding = ENCODING_MAP[record['encoding']] try: record['string'] = record['string'].decode(encoding) except UnicodeDecodeError: # Sometimes the listed encoding is incorrect. Fall back to latin-1 # (which covers the most common non-ascii characters such as the # copyright symbol: \xa9) record['string'] = record['string'].decode('latin-1') name_records.append(record) return name_records
[docs]def woff_name_data(path): """ Returns the 'name' table data from the .woff font file at the given *path*. .. note:: Only returns the English language stuff. """ with open(path, 'rb') as f: table_data = unpack_table_data(f.read()) if b'name' not in table_data: raise BadWoff("WOFF file is invalid") name_data = unpack_name_data(table_data[b'name']) name_dict = {} for record in name_data: if record['language'] == 0: # English name_id = record['name_id'] del record['name_id'] # To reduce redundancy name_dict[name_id] = record if not name_dict: # Fallback to using the first language we find language = None for record in name_data: if not language: language = record['language'] if record['language'] == language: name_id = record['name_id'] del record['name_id'] # To reduce redundancy name_dict[name_id] = record return name_dict
@memoize def woff_info(path): """ Returns a dictionary containing the English-language name (string) data from the WOFF file at the given *path*. """ name_dict = woff_name_data(path) human_name_dict = {} for name_id, record in name_dict.items(): human_name = NAME_ID_MAP.get(name_id, 'Unknown Name ID: %s' % name_id) human_name_dict[human_name] = record['string'] return human_name_dict if __name__ == "__main__": import json if len(sys.argv) < 2: print("Usage: %s <woff file>" % sys.argv[0]) sys.exit(1) path = sys.argv[1] try: print(json.dumps(woff_info(path), indent=4, sort_keys=True)) except BadWoff as e: print("Could not decode name table (metadata) from %s" % path) sys.exit(1)