# -*- coding: utf-8 -*-
# -----------------------------------------------------------------------------
# Copyright 2015-2020 by PyCLibrary Authors, see AUTHORS for more details.
#
# Distributed under the terms of the MIT/X11 license.
#
# The full license is in the file LICENCE, distributed with this software.
# -----------------------------------------------------------------------------
"""
Used for extracting data such as macro definitions, variables, typedefs, and
function signatures from C header files.
"""
import sys
import re
import os
import logging
from inspect import cleandoc
from traceback import format_exc
from .errors import DefinitionError
from .utils import find_header
# Import parsing elements
from pyparsing import \
(ParserElement, ParseResults, Forward, Optional, Word, WordStart,
WordEnd, Keyword, Regex, Literal, SkipTo, ZeroOrMore, OneOrMore,
Group, LineEnd, quotedString, oneOf, nestedExpr,
delimitedList, restOfLine, cStyleComment, alphas, alphanums, hexnums,
lineno, Suppress)
ParserElement.enablePackrat()
logger = logging.getLogger(__name__)
__all__ = ['win_defs', 'CParser']
class Type(tuple):
"""
Representation of a C type. CParser uses this class to store the parsed
typedefs and the types of variable/func.
**ATTENTION:** Due to compatibility issues with 0.1.0 this class derives
from tuple and can be seen as the tuples from 0.1.0. In future this might
change to a tuple-like object!!!
Parameters
----------
type_spec : str
a string referring the base type of this type defintion. This may
either be a fundametal type (i.e. 'int', 'enum x') or a type definition
made by a typedef-statement
declarators : str or list of tuple
all following parameters are deriving a type from the type defined
until now. Types can be derived by:
- The string '*': define a pointer to the base type
(i.E. Type('int', '*'))
- The string '&': a reference. T.B.D.
- A list of integers of len 1: define an array with N elements
(N is the first and single entry in the list of integers). If N is
-1, the array definition is seen as 'int x[]'
(i.E. Type('int', [1])
- a N-tuple of 3-tuples: defines a function of N parameters. Every
parameter is a 3 tuple of the form:
(<parameter-name-or-None>, <param-type>, None).
Due to compatibility reasons the return value of the function is
stored in Type.type_spec parameter
(This is **not** the case for function pointers):
(i.E. Type(Type('int', '*'), ( ('param1', Type('int'), None), ) ) )
type_quals : dict of int to list of str (optional)
this optional (keyword-)argument allows to optionally add type
qualifiers for every declarator level. The key 0 refers the type
qualifier of type_spec, while 1 refers to declarators[0], 2 refers to
declarators[1] and so on.
To build more complex types any number of declarators can be combined. i.E.
>>> int * (*a[2])(char *, signed c[]);
if represented as:
>>> Type('int', '*',
>>> ( (None, Type('char', '*'), None),
>>> ('c', Type('signed', [-1]), None) )),
>>> '*', [2])
"""
# Cannot slot a subclass of tuple.
def __new__(cls, type_spec, *declarators, **argv):
return super(Type, cls).__new__(cls, (type_spec,) + declarators)
def __init__(self, type_spec, *declarators, **argv):
super(Type, self).__init__()
self.type_quals = (argv.pop('type_quals', None) or
((),) * (1 + len(declarators)))
if len(self.type_quals) != 1 + len(declarators):
raise ValueError("wrong number of type qualifiers")
assert len(argv) == 0, 'Invalid Parameter'
def __eq__(self, other):
if isinstance(other, Type):
if self.type_quals != other.type_quals:
return False
return super(Type, self).__eq__(other)
def __ne__(self, other):
return not self.__eq__(other)
@property
def declarators(self):
"""Return a tuple of all declarators.
"""
return tuple(self[1:])
@property
def type_spec(self):
"""Return the base type of this type.
"""
return self[0]
def is_fund_type(self):
"""Returns True, if this type is a fundamental type.
Fundamental types are all types, that are not defined via typedef
"""
if (self[0].startswith('struct ') or self[0].startswith('union ') or
self[0].startswith('enum ')):
return True
names = (num_types + nonnum_types + size_modifiers + sign_modifiers +
extra_type_list)
for w in self[0].split():
if w not in names:
return False
return True
def eval(self, type_map, used=None):
"""Resolves the type_spec of this type recursively if it is referring
to a typedef. For resolving the type type_map is used for lookup.
Returns a new Type object.
Parameters
----------
type_map : dict of str to Type
All typedefs that shall be resolved have to be stored in this
type_map.
used : list of str
For internal use only to prevent circular typedefs
"""
used = used or []
if self.is_fund_type():
# Remove 'signed' before returning evaluated type
return Type(re.sub(r'\bsigned\b', '', self.type_spec).strip(),
*self.declarators,
type_quals=self.type_quals)
parent = self.type_spec
if parent in used:
m = 'Recursive loop while evaluating types. (typedefs are {})'
raise DefinitionError(m.format(' -> '.join(used+[parent])))
used.append(parent)
if parent not in type_map:
m = 'Unknown type "{}" (typedefs are {})'
raise DefinitionError(m.format(parent, ' -> '.join(used)))
pt = type_map[parent]
evaled_type = Type(pt.type_spec, *(pt.declarators + self.declarators),
type_quals=(pt.type_quals[:-1] +
(pt.type_quals[-1] +
self.type_quals[0],) +
self.type_quals[1:])
)
return evaled_type.eval(type_map, used)
def add_compatibility_hack(self):
"""If This Type is refering to a function (**not** a function pointer)
a new type is returned, that matches the hack from version 0.1.0.
This hack enforces the return value be encapsulated in a separated Type
object:
Type('int', '*', ())
is converted to
Type(Type('int', '*'), ())
"""
if type(self[-1]) == tuple:
return Type(Type(*self[:-1], type_quals=self.type_quals[:-1]),
self[-1],
type_quals=((), self.type_quals[-1]))
else:
return self
def remove_compatibility_hack(self):
"""Returns a Type object, where the hack from .add_compatibility_hack()
is removed
"""
if len(self) == 2 and isinstance(self[0], Type):
return Type(*(self[0] + (self[1],)))
else:
return self
def __repr__(self):
type_qual_str = ('' if not any(self.type_quals) else
', type_quals='+repr(self.type_quals))
return (type(self).__name__ + '(' +
', '.join(map(repr, self)) + type_qual_str + ')')
def __getnewargs__(self):
return (self.type_spec,) + self.declarators
class Compound(dict):
"""Base class for representing object using a dict-like interface.
"""
__slots__ = ()
def __init__(self, *members, **argv):
members = list(members)
pack = argv.pop('pack', None)
assert len(argv) == 0
super(Compound, self).__init__(dict(members=members, pack=pack))
def __repr__(self):
packParam = ', pack='+repr(self.pack) if self.pack is not None else ''
return (type(self).__name__ + '(' +
', '.join(map(repr, self.members)) + packParam + ')')
@property
def members(self):
return self['members']
@property
def pack(self):
return self['pack']
class Struct(Compound):
"""Representation of a C struct. CParser uses this class to store the parsed
structs.
**ATTENTION:** Due to compatibility issues with 0.1.0 this class derives
from dict and can be seen as the dicts from 0.1.0. In future this might
change to a dict-like object!!!
"""
__slots__ = ()
class Union(Compound):
"""Representation of a C union. CParser uses this class to store the parsed
unions.
**ATTENTION:** Due to compatibility issues with 0.1.0 this class derives
from dict and can be seen as the dicts from 0.1.0. In future this might
change to a dict-like object!!!
"""
__slots__ = ()
class Enum(dict):
"""Representation of a C enum. CParser uses this class to store the parsed
enums.
**ATTENTION:** Due to compatibility issues with 0.1.0 this class derives
from dict and can be seen as the dicts from 0.1.0. In future this might
change to a dict-like object!!!
"""
__slots__ = ()
def __init__(self, **args):
super(Enum, self).__init__(args)
def __repr__(self):
return (type(self).__name__ + '(' +
', '.join(nm + '=' + repr(val)
for nm, val in sorted(self.items())) +
')')
[docs]def win_defs(version='1500'):
"""Loads selection of windows headers included with PyCLibrary.
These definitions can either be accessed directly or included before
parsing another file like this:
>>> windefs = c_parser.win_defs()
>>> p = c_parser.CParser("headerFile.h", copy_from=windefs)
Definitions are pulled from a selection of header files included in Visual
Studio (possibly not legal to distribute? Who knows.), some of which have
been abridged because they take so long to parse.
Parameters
----------
version : unicode
Version of the MSVC to consider when parsing.
Returns
-------
parser : CParser
CParser containing all the infos from te windows headers.
"""
header_files = ['WinNt.h', 'WinDef.h', 'WinBase.h', 'BaseTsd.h',
'WTypes.h', 'WinUser.h']
if not CParser._init:
logger.info('Automatic initialisation : OS is assumed to be win32')
from .init import auto_init
auto_init()
d = os.path.dirname(__file__)
p = CParser(
header_files,
macros={'_WIN32': '', '_MSC_VER': version, 'CONST': 'const',
'NO_STRICT': None, 'MS_WIN32': ''},
process_all=False
)
p.process_all(cache=os.path.join(d, 'headers', 'WinDefs.cache'))
return p
[docs]class CParser(object):
"""Class for parsing C code to extract variable, struct, enum, and function
declarations as well as preprocessor macros.
This is not a complete C parser; instead, it is meant to simplify the
process of extracting definitions from header files in the absence of a
complete build system. Many files will require some amount of manual
intervention to parse properly (see 'replace' and extra arguments)
Parameters
----------
files : str or iterable, optional
File or files which should be parsed.
copy_from : CParser or iterable of CParser, optional
CParser whose definitions should be included.
replace : dict, optional
Specify som string replacements to perform before parsing. Format is
{'searchStr': 'replaceStr', ...}
process_all : bool, optional
Flag indicating whether files should be parsed immediatly. True by
default.
cache : unicode, optional
Path of the cache file from which to load definitions/to which save
definitions as parsing is an expensive operation.
check_cache_validity : bool, optional
Flag indicating whether to perform validity checking when using a cache file. This is useful
in a scenario where the python wrapper needs to be used without access to the headers
kwargs :
Extra parameters may be used to specify the starting state of the
parser. For example, one could provide a set of missing type
declarations by types={'UINT': ('unsigned int'), 'STRING': ('char', 1)}
Similarly, preprocessor macros can be specified: macros={'WINAPI': ''}
Example
-------
Create parser object, load two files
>>> p = CParser(['header1.h', 'header2.h'])
Remove comments, preprocess, and search for declarations
>>> p.process_ all()
Just to see what was successfully parsed from the files
>>> p.print_all()
Access parsed declarations
>>> all_values = p.defs['values']
>>> functionSignatures = p.defs['functions']
To see what was not successfully parsed
>>> unp = p.process_all(return_unparsed=True)
>>> for s in unp:
print s
"""
#: Increment every time cache structure or parsing changes to invalidate
#: old cache files.
# 2 : add C99 integers
cache_version = 2
#: Private flag allowing to know if the parser has been initiliased.
_init = False
def __init__(self, files=None, copy_from=None, replace=None,
process_all=True, cache=None, check_cache_validity=True, **kwargs):
if not self._init:
logger.info('Automatic initialisation based on OS detection')
from .init import auto_init
auto_init()
# Holds all definitions
self.defs = {}
# Holds definitions grouped by the file they came from
self.file_defs = {}
# Description of the struct packing rules as defined by #pragma pack
self.pack_list = {}
self.init_opts = kwargs.copy()
self.init_opts['files'] = []
self.init_opts['replace'] = {}
self.data_list = ['types', 'variables', 'fnmacros', 'macros',
'structs', 'unions', 'enums', 'functions', 'values']
self.file_order = []
self.files = {}
if files is not None:
if isinstance(files, str):
files = [files]
for f in self.find_headers(files):
self.load_file(f, replace)
# Initialize empty definition lists
for k in self.data_list:
self.defs[k] = {}
# Holds translations from typedefs/structs/unions to fundamental types
self.compiled_types = {}
self.current_file = None
# Import extra arguments if specified
for t in kwargs:
for k in kwargs[t].keys():
self.add_def(t, k, kwargs[t][k])
# Import from other CParsers if specified
if copy_from is not None:
if not isinstance(copy_from, (list, tuple)):
copy_from = [copy_from]
for p in copy_from:
self.import_dict(p.file_defs)
if process_all:
self.process_all(cache=cache, check_cache_validity=check_cache_validity)
[docs] def process_all(self, cache=None, return_unparsed=False,
print_after_preprocess=False, check_cache_validity=True):
""" Remove comments, preprocess, and parse declarations from all files.
This operates in memory, and thus does not alter the original files.
Parameters
----------
cache : unicode, optional
File path where cached results are be stored or retrieved. The
cache is automatically invalidated if any of the arguments to
__init__ are changed, or if the C files are newer than the cache.
return_unparsed : bool, optional
Passed directly to parse_defs.
print_after_preprocess : bool, optional
If true prints the result of preprocessing each file.
Returns
-------
results : list
List of the results from parse_defs.
"""
if cache is not None and self.load_cache(cache, check_validity=check_cache_validity):
logger.debug("Loaded cached definitions; will skip parsing.")
# Cached values loaded successfully, nothing left to do here
return
results = []
logger.debug(cleandoc('''Parsing C header files (no valid cache found).
This could take several minutes...'''))
for f in self.file_order:
if self.files[f] is None:
# This means the file could not be loaded and there was no
# cache.
mess = 'Could not find header file "{}" or a cache file.'
raise IOError(mess.format(f))
logger.debug("Removing comments from file '{}'...".format(f))
self.remove_comments(f)
logger.debug("Preprocessing file '{}'...".format(f))
self.preprocess(f)
if print_after_preprocess:
print("===== PREPROCSSED {} =======".format(f))
print(self.files[f])
logger.debug("Parsing definitions in file '{}'...".format(f))
results.append(self.parse_defs(f, return_unparsed))
if cache is not None:
logger.debug("Writing cache file '{}'".format(cache))
self.write_cache(cache)
return results
[docs] def load_cache(self, cache_file, check_validity=False):
"""Load a cache file.
Used internally if cache is specified in process_all().
Parameters
----------
cache_file : unicode
Path of the file from which the cache should be loaded.
check_validity : bool, optional
If True, then run several checks before loading the cache:
- cache file must not be older than any source files
- cache file must not be older than this library file
- options recorded in cache must match options used to initialize
CParser
Returns
-------
result : bool
Did the loading succeeded.
"""
# Make sure cache file exists
if not isinstance(cache_file, str):
raise ValueError("Cache file option must be a str.")
if not os.path.isfile(cache_file):
# If file doesn't exist, search for it in this module's path
d = os.path.dirname(__file__)
cache_file = os.path.join(d, "headers", cache_file)
if not os.path.isfile(cache_file):
logger.debug("Can't find requested cache file.")
return False
# Make sure cache is newer than all input files
if check_validity:
mtime = os.stat(cache_file).st_mtime
for f in self.file_order:
# If file does not exist, then it does not count against the
# validity of the cache.
if os.path.isfile(f) and os.stat(f).st_mtime > mtime:
logger.debug("Cache file is out of date.")
return False
try:
# Read cache file
import pickle
cache = pickle.load(open(cache_file, 'rb'))
# Make sure __init__ options match
if check_validity:
if cache['opts'] != self.init_opts:
db = logger.debug
db("Cache file is not valid")
db("It was created using different initialization options")
db('{}'.format(cache['opts']))
db('{}'.format(self.init_opts))
return False
else:
logger.debug("Cache init opts are OK:")
logger.debug('{}'.format(cache['opts']))
if cache['version'] < self.cache_version:
mess = "Cache file is not valid--cache format has changed."
logger.debug(mess)
return False
# Import all parse results
self.import_dict(cache['file_defs'])
return True
except Exception:
logger.exception("Warning--cache read failed:")
return False
[docs] def import_dict(self, data):
"""Import definitions from a dictionary.
The dict format should be the same as CParser.file_defs.
Used internally; does not need to be called manually.
"""
for f in data.keys():
self.current_file = f
for k in self.data_list:
for n in data[f][k]:
self.add_def(k, n, data[f][k][n])
[docs] def write_cache(self, cache_file):
"""Store all parsed declarations to cache. Used internally.
"""
cache = {}
cache['opts'] = self.init_opts
cache['file_defs'] = self.file_defs
cache['version'] = self.cache_version
import pickle
with open(cache_file, 'wb') as f:
pickle.dump(cache, f)
[docs] def load_file(self, path, replace=None):
"""Read a file, make replacements if requested.
Called by __init__, should not be called manually.
Parameters
----------
path : unicode
Path of the file to load.
replace : dict, optional
Dictionary containing strings to replace by the associated value
when loading the file.
"""
if not os.path.isfile(path):
# Not a fatal error since we might be able to function properly if
# there is a cache file.
mess = "Warning: C header '{}' is missing, this may cause trouble."
logger.warning(mess.format(path))
self.files[path] = None
return False
with open(path, "r") as fd:
self.files[path] = fd.read()
if replace is not None:
for s in replace:
self.files[path] = re.sub(s, replace[s], self.files[path])
self.file_order.append(path)
bn = os.path.basename(path)
self.init_opts['replace'][bn] = replace
# Only interested in the file names, the directory may change between
# systems.
self.init_opts['files'].append(bn)
return True
def _format_parsed_file(self, filename=None):
from pprint import pformat
s = ""
for k in self.data_list:
s += "============== {} ==================\n".format(k)
if filename is None:
s += pformat(self.defs[k], indent=4) + "\n"
else:
s += pformat(self.file_defs[filename][k]) + "\n"
return s
[docs] def print_all(self, filename=None):
"""Print everything parsed from files. Useful for debugging.
Parameters
----------
filename : unicode, optional
Name of the file whose definition should be printed.
"""
print(self._format_parsed_file(filename))
def __str__(self):
return self._format_parsed_file()
# =========================================================================
# --- Processing functions
# =========================================================================
# --- Pre processing
[docs] def preprocess(self, path):
"""Scan named file for preprocessor directives, removing them while
expanding macros.
Operates in memory, does not alter the original files.
Currently support :
- conditionals : ifdef, ifndef, if, elif, else (defined can be used
in a if statement).
- definition : define, undef
- pragmas : pragma
"""
# We need this so that eval_expr works properly
self.build_parser()
self.current_file = path
# Stack for #pragma pack push/pop
pack_stack = [(None, None)]
self.pack_list[path] = [(0, None)]
packing = None # Current packing value
text = self.files[path]
# First join together lines split by \\n
text = Literal('\\\n').suppress().transformString(text)
# Define the structure of a macro definition
name = Word(alphas+'_', alphanums+'_')('name')
deli_list = Optional(lparen + delimitedList(name) + rparen)
self.pp_define = (name.setWhitespaceChars(' \t')("macro") +
deli_list.setWhitespaceChars(' \t')('args') +
SkipTo(LineEnd())('value'))
self.pp_define.setParseAction(self.process_macro_defn)
# Comb through lines, process all directives
lines = text.split('\n')
result = []
directive = re.compile(r'\s*#\s*([a-zA-Z]+)(.*)$')
if_true = [True]
if_hit = []
for i, line in enumerate(lines):
new_line = ''
m = directive.match(line)
# Regular code line
if m is None:
# Only include if we are inside the correct section of an IF
# block
if if_true[-1]:
new_line = self.expand_macros(line)
# Macro line
else:
d = m.groups()[0]
rest = m.groups()[1]
if d == 'ifdef':
d = 'if'
rest = 'defined ' + rest
elif d == 'ifndef':
d = 'if'
rest = '!defined ' + rest
# Evaluate 'defined' operator before expanding macros
if d in ['if', 'elif']:
def pa(t):
is_macro = t['name'] in self.defs['macros']
is_macro_func = t['name'] in self.defs['fnmacros']
return ['0', '1'][is_macro or is_macro_func]
rest = (Keyword('defined') +
(name | lparen + name + rparen)
).setParseAction(pa).transformString(rest)
elif d in ['define', 'undef']:
match = re.match(r'\s*([a-zA-Z_][a-zA-Z0-9_]*)(.*)$', rest)
macroName, rest = match.groups()
# Expand macros if needed
if rest is not None and (all(if_true) or d in ['if', 'elif']):
rest = self.expand_macros(rest)
if d == 'elif':
if if_hit[-1] or not all(if_true[:-1]):
ev = False
else:
ev = self.eval_preprocessor_expr(rest)
logger.debug(" "*(len(if_true)-2) + line +
'{}, {}'.format(rest, ev))
if_true[-1] = ev
if_hit[-1] = if_hit[-1] or ev
elif d == 'else':
logger.debug(" "*(len(if_true)-2) + line +
'{}'.format(not if_hit[-1]))
if_true[-1] = (not if_hit[-1]) and all(if_true[:-1])
if_hit[-1] = True
elif d == 'endif':
if_true.pop()
if_hit.pop()
logger.debug(" "*(len(if_true)-1) + line)
elif d == 'if':
if all(if_true):
ev = self.eval_preprocessor_expr(rest)
else:
ev = False
logger.debug(" "*(len(if_true)-1) + line +
'{}, {}'.format(rest, ev))
if_true.append(ev)
if_hit.append(ev)
elif d == 'define':
if not if_true[-1]:
continue
logger.debug(" "*(len(if_true)-1) + "define: " +
'{}, {}'.format(macroName, rest))
try:
# Macro is registered here
self.pp_define.parseString(macroName + ' ' + rest)
except Exception:
logger.exception("Error processing macro definition:" +
'{}, {}'.format(macroName, rest))
elif d == 'undef':
if not if_true[-1]:
continue
try:
self.rem_def('macros', macroName.strip())
except Exception:
if sys.exc_info()[0] is not KeyError:
mess = "Error removing macro definition '{}'"
logger.exception(mess.format(macroName.strip()))
# Check for changes in structure packing
# Support only for #pragme pack (with all its variants
# save show), None is used to signal that the default packing
# is used.
# Those two definition disagree :
# https://gcc.gnu.org/onlinedocs/gcc/Structure-Packing-Pragmas.html
# http://msdn.microsoft.com/fr-fr/library/2e70t5y1.aspx
# The current implementation follows the MSVC doc.
elif d == 'pragma':
if not if_true[-1]:
continue
m = re.match(r'\s+pack\s*\(([^\)]*)\)', rest)
if not m:
continue
if m.groups():
opts = [s.strip() for s in m.groups()[0].split(',')]
pushpop = id = val = None
for o in opts:
if o in ['push', 'pop']:
pushpop = o
elif o.isdigit():
val = int(o)
else:
id = o
packing = val
if pushpop == 'push':
pack_stack.append((packing, id))
elif opts[0] == 'pop':
if id is None:
pack_stack.pop()
else:
ind = None
for j, s in enumerate(pack_stack):
if s[1] == id:
ind = j
break
if ind is not None:
pack_stack = pack_stack[:ind]
if val is None:
packing = pack_stack[-1][0]
mess = ">> Packing changed to {} at line {}"
logger.debug(mess.format(str(packing), i))
self.pack_list[path].append((i, packing))
else:
# Ignore any other directives
mess = 'Ignored directive {} at line {}'
logger.debug(mess.format(d, i))
result.append(new_line)
self.files[path] = '\n'.join(result)
[docs] def eval_preprocessor_expr(self, expr):
# Make a few alterations so the expression can be eval'd
macro_diffs = (
Literal('!').setParseAction(lambda: ' not ') |
Literal('&&').setParseAction(lambda: ' and ') |
Literal('||').setParseAction(lambda: ' or ') |
Word(alphas + '_', alphanums + '_').setParseAction(lambda: '0'))
expr2 = macro_diffs.transformString(expr).strip()
try:
ev = bool(eval(expr2))
except Exception:
mess = "Error evaluating preprocessor expression: {} [{}]\n{}"
logger.debug(mess.format(expr, repr(expr2), format_exc()))
ev = False
return ev
[docs] def process_macro_defn(self, t):
"""Parse a #define macro and register the definition.
"""
logger.debug("Processing MACRO: {}".format(t))
macro_val = t.value.strip()
if macro_val in self.defs['fnmacros']:
self.add_def('fnmacros', t.macro, self.defs['fnmacros'][macro_val])
logger.debug(" Copy fn macro {} => {}".format(macro_val, t.macro))
else:
if t.args == '':
val = self.eval_expr(macro_val)
self.add_def('macros', t.macro, macro_val)
self.add_def('values', t.macro, val)
mess = " Add macro: {} ({}); {}"
logger.debug(mess.format(t.macro, val,
self.defs['macros'][t.macro]))
else:
self.add_def('fnmacros', t.macro,
self.compile_fn_macro(macro_val,
[x for x in t.args]))
mess = " Add fn macro: {} ({}); {}"
logger.debug(mess.format(t.macro, t.args,
self.defs['fnmacros'][t.macro]))
return "#define " + t.macro + " " + macro_val
[docs] def compile_fn_macro(self, text, args):
"""Turn a function macro spec into a compiled description.
"""
# Find all instances of each arg in text.
args_str = '|'.join(args)
arg_regex = re.compile(r'("(\\"|[^"])*")|(\b({})\b)'.format(args_str))
start = 0
parts = []
arg_order = []
# The group number to check for macro names
N = 3
for m in arg_regex.finditer(text):
arg = m.groups()[N]
if arg is not None:
parts.append(text[start:m.start(N)] + '{}')
start = m.end(N)
arg_order.append(args.index(arg))
parts.append(text[start:])
return (''.join(parts), arg_order)
[docs] def expand_macros(self, line):
"""Expand all the macro expressions in a string.
Faulty calls to macro function are left untouched.
"""
reg = re.compile(r'("(\\"|[^"])*")|(\b(\w+)\b)')
parts = []
# The group number to check for macro names
N = 3
macros = self.defs['macros']
fnmacros = self.defs['fnmacros']
while True:
m = reg.search(line)
if not m:
break
name = m.groups()[N]
if name in macros:
parts.append(line[:m.start(N)])
line = line[m.end(N):]
parts.append(macros[name])
elif name in fnmacros:
# If function macro expansion fails, just ignore it.
try:
exp, end = self.expand_fn_macro(name, line[m.end(N):])
except Exception:
exp = name
end = line[m.end(N):]
mess = "Function macro expansion failed: {}, {}\n {}"
logger.error(mess.format(name, line[m.end(N):],
format_exc()))
parts.append(line[:m.start(N)])
line = end
parts.append(exp)
else:
start = m.end(N)
parts.append(line[:start])
line = line[start:]
parts.append(line)
return ''.join(parts)
[docs] def expand_fn_macro(self, name, text):
"""Replace a function macro.
"""
# defn looks like ('%s + %s / %s', (0, 0, 1))
defn = self.defs['fnmacros'][name]
try:
args, end = text.split(')', 1)
_, args = args.split('(', 1)
args = [a.strip() for a in args.split(',')]
except Exception:
mess = "Function macro {} argument analysis failed :\n{}"
raise DefinitionError(0, mess.format(name, format_exc()))
args = [self.expand_macros(arg) for arg in args]
new_str = defn[0].format(*[args[i] for i in defn[1]])
return (new_str, end)
# --- Compilation functions
[docs] def parse_defs(self, path, return_unparsed=False):
"""Scan through the named file for variable, struct, enum, and function
declarations.
Parameters
----------
path : unicode
Path of the file to parse for definitions.
return_unparsed : bool, optional
If true, return a string of all lines that failed to match (for
debugging purposes).
Returns
-------
tokens : list
Entire tree of successfully parsed tokens.
"""
self.current_file = path
parser = self.build_parser()
if return_unparsed:
text = parser.suppress().transformString(self.files[path])
return re.sub(r'\n\s*\n', '\n', text)
else:
return [x[0] for x in parser.scanString(self.files[path])]
[docs] def build_parser(self):
"""Builds the entire tree of parser elements for the C language (the
bits we support, anyway).
"""
if hasattr(self, 'parser'):
return self.parser
self.struct_type = Forward()
self.enum_type = Forward()
type_ = (fund_type |
Optional(kwl(size_modifiers + sign_modifiers)) + ident |
self.struct_type |
self.enum_type)
if extra_modifier is not None:
type_ += extra_modifier
type_.setParseAction(recombine)
self.type_spec = Group(type_qualifier('pre_qual') + type_("name"))
# --- Abstract declarators for use in function pointer arguments
# Thus begins the extremely hairy business of parsing C declarators.
# Whomever decided this was a reasonable syntax should probably never
# breed.
# The following parsers combined with the process_declarator function
# allow us to turn a nest of type modifiers into a correctly
# ordered list of modifiers.
self.declarator = Forward()
self.abstract_declarator = Forward()
# Abstract declarators look like:
# <empty string>
# *
# **[num]
# (*)(int, int)
# *( )(int, int)[10]
# ...etc...
self.abstract_declarator << Group(
type_qualifier('first_typequal') +
Group(ZeroOrMore(Group(Suppress('*') + type_qualifier)))('ptrs') +
((Optional('&')('ref')) |
(lparen + self.abstract_declarator + rparen)('center')) +
Optional(lparen +
Optional(delimitedList(Group(
self.type_spec('type') +
self.abstract_declarator('decl') +
Optional(Literal('=').suppress() + expression,
default=None)('val')
)), default=None) +
rparen)('args') +
Group(ZeroOrMore(lbrack + Optional(expression, default='-1') +
rbrack))('arrays')
)
# Declarators look like:
# varName
# *varName
# **varName[num]
# (*fnName)(int, int)
# * fnName(int arg1=0)[10]
# ...etc...
self.declarator << Group(
type_qualifier('first_typequal') + call_conv +
Group(ZeroOrMore(Group(Suppress('*') + type_qualifier)))('ptrs') +
((Optional('&')('ref') + ident('name')) |
(lparen + self.declarator + rparen)('center')) +
Optional(lparen +
Optional(delimitedList(
Group(self.type_spec('type') +
(self.declarator |
self.abstract_declarator)('decl') +
Optional(Literal('=').suppress() +
expression, default=None)('val')
)),
default=None) +
rparen)('args') +
Group(ZeroOrMore(lbrack + Optional(expression, default='-1') +
rbrack))('arrays')
)
self.declarator_list = Group(delimitedList(self.declarator))
# Typedef
self.type_decl = (Keyword('typedef') + self.type_spec('type') +
self.declarator_list('decl_list') + semi)
self.type_decl.setParseAction(self.process_typedef)
# Variable declaration
self.variable_decl = (
Group(storage_class_spec +
self.type_spec('type') +
Optional(self.declarator_list('decl_list')) +
Optional(Literal('=').suppress() +
(expression('value') |
(lbrace +
Group(delimitedList(expression))('array_values') +
rbrace
)
)
)
) +
semi)
self.variable_decl.setParseAction(self.process_variable)
# Function definition
self.typeless_function_decl = (self.declarator('decl') +
nestedExpr('{', '}').suppress())
self.function_decl = (storage_class_spec +
self.type_spec('type') +
self.declarator('decl') +
nestedExpr('{', '}').suppress())
self.function_decl.setParseAction(self.process_function)
# Struct definition
self.struct_decl = Forward()
struct_kw = (Keyword('struct') | Keyword('union'))
self.struct_member = (
Group(self.variable_decl.copy().setParseAction(lambda: None)) |
# Hack to handle bit width specification.
Group(Group(self.type_spec('type') +
Optional(self.declarator_list('decl_list')) +
colon + integer('bit') + semi)) |
(self.type_spec + self.declarator +
nestedExpr('{', '}')).suppress() |
(self.declarator + nestedExpr('{', '}')).suppress()
)
self.decl_list = (lbrace +
Group(OneOrMore(self.struct_member))('members') +
rbrace)
self.struct_type << (struct_kw('struct_type') +
((Optional(ident)('name') +
self.decl_list) | ident('name'))
)
self.struct_type.setParseAction(self.process_struct)
self.struct_decl = self.struct_type + semi
# Enum definition
enum_var_decl = Group(ident('name') +
Optional(Literal('=').suppress() +
expression('value')))
self.enum_type << (Keyword('enum') +
(Optional(ident)('name') +
lbrace +
Group(delimitedList(enum_var_decl))('members') +
Optional(comma) + rbrace | ident('name'))
)
self.enum_type.setParseAction(self.process_enum)
self.enum_decl = self.enum_type + semi
self.parser = (self.type_decl | self.variable_decl |
self.function_decl)
return self.parser
[docs] def process_declarator(self, decl):
"""Process a declarator (without base type) and return a tuple
(name, [modifiers])
See process_type(...) for more information.
"""
toks = []
quals = [tuple(decl.get('first_typequal', []))]
name = None
logger.debug("DECL: {}".format(decl))
if 'call_conv' in decl and len(decl['call_conv']) > 0:
toks.append(decl['call_conv'])
quals.append(None)
if 'ptrs' in decl and len(decl['ptrs']) > 0:
toks += ('*',) * len(decl['ptrs'])
quals += map(tuple, decl['ptrs'])
if 'arrays' in decl and len(decl['arrays']) > 0:
toks.extend([self.eval_expr(x)] for x in decl['arrays'])
quals += [()] * len(decl['arrays'])
if 'args' in decl and len(decl['args']) > 0:
if decl['args'][0] is None:
toks.append(())
else:
ex = lambda x: (x[0],) if len(x)!=0 else (None,)
toks.append(tuple([self.process_type(a['type'],
a['decl'][0]) +
ex(a['val']) for a in decl['args']]
)
)
quals.append(())
if 'ref' in decl:
toks.append('&')
quals.append(())
if 'center' in decl:
(n, t, q) = self.process_declarator(decl['center'][0])
if n is not None:
name = n
toks.extend(t)
quals = quals[:-1] + [quals[-1] + q[0]] + list(q[1:])
if 'name' in decl:
name = decl['name']
return (name, toks, tuple(quals))
[docs] def process_type(self, typ, decl):
"""Take a declarator + base type and return a serialized name/type
description.
The description will be a list of elements (name, [basetype, modifier,
modifier, ...]):
- name is the string name of the declarator or None for an abstract
declarator
- basetype is the string representing the base type
- modifiers can be:
- `*` : pointer (multiple pointers `***` allowed)
- `&` : reference
- `__X` : calling convention (windows only). X can be `cdecl` or
`stdcall`
- list : array. Value(s) indicate the length of each array, -1
for incomplete type.
- tuple : function, items are the output of processType for each
function argument.
Examples:
- int *x[10] => ('x', ['int', [10], '*'])
- char fn(int x) => ('fn', ['char', [('x', ['int'])]])
- struct s (*)(int, int*) =>
(None, ["struct s", ((None, ['int']), (None, ['int', '*'])), '*'])
"""
logger.debug("PROCESS TYPE/DECL: {}/{}".format(typ['name'], decl))
(name, decl, quals) = self.process_declarator(decl)
pre_typequal = tuple(typ.get('pre_qual', []))
return (name, Type(typ['name'], *decl,
type_quals=(pre_typequal + quals[0],) + quals[1:]))
[docs] def process_enum(self, s, l, t):
"""
"""
try:
logger.debug("ENUM: {}".format(t))
if t.name == '':
n = 0
while True:
name = 'anon_enum{}'.format(n)
if name not in self.defs['enums']:
break
n += 1
else:
name = t.name[0]
logger.debug(" name: {}".format(name))
if name not in self.defs['enums']:
i = 0
enum = {}
for v in t.members:
if v.value != '':
try:
i = self.eval_expr(v.value)
except Exception:
pass
enum[v.name] = i
self.add_def('values', v.name, i)
i += 1
logger.debug(" members: {}".format(enum))
self.add_def('enums', name, enum)
self.add_def('types', 'enum '+name, Type('enum', name))
return ('enum ' + name)
except:
logger.exception("Error processing enum: {}".format(t))
[docs] def process_function(self, s, l, t):
"""Build a function definition from the parsing tokens.
"""
logger.debug("FUNCTION {} : {}".format(t, t.keys()))
try:
name, decl = self.process_type(t.type, t.decl[0])
if len(decl) == 0 or type(decl[-1]) != tuple:
logger.error('{}'.format(t))
mess = "Incorrect declarator type for function definition."
raise DefinitionError(mess)
logger.debug(" name: {}".format(name))
logger.debug(" sig: {}".format(decl))
self.add_def('functions', name, decl.add_compatibility_hack())
except Exception:
logger.exception("Error processing function: {}".format(t))
[docs] def packing_at(self, line):
"""Return the structure packing value at the given line number.
"""
packing = None
for p in self.pack_list[self.current_file]:
if p[0] <= line:
packing = p[1]
else:
break
return packing
[docs] def process_struct(self, s, l, t):
"""
"""
try:
str_typ = t.struct_type # struct or union
# Check for extra packing rules
packing = self.packing_at(lineno(l, s))
logger.debug('{} {} {}'.format(str_typ.upper(), t.name, t))
if t.name == '':
n = 0
while True:
sname = 'anon_{}{}'.format(str_typ, n)
if sname not in self.defs[str_typ+'s']:
break
n += 1
else:
if isinstance(t.name, str):
sname = t.name
else:
sname = t.name[0]
logger.debug(" NAME: {}".format(sname))
if (len(t.members) > 0 or sname not in self.defs[str_typ+'s'] or
self.defs[str_typ+'s'][sname] == {}):
logger.debug(" NEW " + str_typ.upper())
struct = []
for m in t.members:
typ = m[0].type
val = self.eval_expr(m[0].value)
logger.debug(" member: {}, {}, {}".format(
m, m[0].keys(), m[0].decl_list))
if len(m[0].decl_list) == 0: # anonymous member
member = [None, Type(typ[0]), None]
if m[0].bit:
member.append(int(m[0].bit))
struct.append(tuple(member))
for d in m[0].decl_list:
(name, decl) = self.process_type(typ, d)
member = [name, decl, val]
if m[0].bit:
member.append(int(m[0].bit))
struct.append(tuple(member))
logger.debug(" {} {} {} {}".format(name, decl,
val, m[0].bit))
str_cls = (Struct if str_typ == 'struct' else Union)
self.add_def(str_typ + 's', sname,
str_cls(*struct, pack=packing))
self.add_def('types', str_typ+' '+sname, Type(str_typ, sname))
return str_typ + ' ' + sname
except Exception:
logger.exception('Error processing struct: {}'.format(t))
[docs] def process_variable(self, s, l, t):
"""
"""
logger.debug("VARIABLE: {}".format(t))
try:
val = self.eval_expr(t[0])
for d in t[0].decl_list:
(name, typ) = self.process_type(t[0].type, d)
# This is a function prototype
if type(typ[-1]) is tuple:
logger.debug(" Add function prototype: {} {} {}".format(
name, typ, val))
self.add_def('functions', name,
typ.add_compatibility_hack())
# This is a variable
else:
logger.debug(" Add variable: {} {} {}".format(name,
typ, val))
self.add_def('variables', name, (val, typ))
self.add_def('values', name, val)
except Exception:
logger.exception('Error processing variable: {}'.format(t))
[docs] def process_typedef(self, s, l, t):
"""
"""
logger.debug("TYPE: {}".format(t))
typ = t.type
for d in t.decl_list:
(name, decl) = self.process_type(typ, d)
logger.debug(" {} {}".format(name, decl))
self.add_def('types', name, decl)
# --- Utility methods
[docs] def eval_expr(self, toks):
"""Evaluates expressions.
Currently only works for expressions that also happen to be valid
python expressions.
"""
logger.debug("Eval: {}".format(toks))
try:
if isinstance(toks, str):
val = self.eval(toks, None, self.defs['values'])
elif toks.array_values != '':
val = [self.eval(x, None, self.defs['values'])
for x in toks.array_values]
elif toks.value != '':
val = self.eval(toks.value, None, self.defs['values'])
else:
val = None
return val
except Exception:
logger.debug(" failed eval {} : {}".format(toks, format_exc()))
return None
[docs] def eval(self, expr, *args):
"""Just eval with a little extra robustness."""
expr = expr.strip()
cast = (lparen + self.type_spec + self.abstract_declarator +
rparen).suppress()
expr = (quotedString | number | cast).transformString(expr)
if expr == '':
return None
return eval(expr, *args)
[docs] def add_def(self, typ, name, val):
"""Add a definition of a specific type to both the definition set for
the current file and the global definition set.
"""
self.defs[typ][name] = val
if self.current_file is None:
base_name = None
else:
base_name = os.path.basename(self.current_file)
if base_name not in self.file_defs:
self.file_defs[base_name] = {}
for k in self.data_list:
self.file_defs[base_name][k] = {}
self.file_defs[base_name][typ][name] = val
[docs] def rem_def(self, typ, name):
"""Remove a definition of a specific type to both the definition set
for the current file and the global definition set.
"""
if self.current_file is None:
base_name = None
else:
base_name = os.path.basename(self.current_file)
del self.defs[typ][name]
del self.file_defs[base_name][typ][name]
[docs] def is_fund_type(self, typ):
"""Return True if this type is a fundamental C type, struct, or
union.
**ATTENTION: This function is legacy and should be replaced by
Type.is_fund_type()**
"""
return Type(typ).is_fund_type()
[docs] def eval_type(self, typ):
"""Evaluate a named type into its fundamental type.
**ATTENTION: This function is legacy and should be replaced by
Type.eval()**
"""
if not isinstance(typ, Type):
typ = Type(*typ)
return typ.eval(self.defs['types'])
[docs] def find(self, name):
"""Search all definitions for the given name.
"""
res = []
for f in self.file_defs:
fd = self.file_defs[f]
for t in fd:
typ = fd[t]
for k in typ:
if isinstance(name, str):
if k == name:
res.append((f, t))
else:
if re.match(name, k):
res.append((f, t, k))
return res
[docs] def find_text(self, text):
"""Search all file strings for text, return matching lines.
"""
res = []
for f in self.files:
l = self.files[f].split('\n')
for i in range(len(l)):
if text in l[i]:
res.append((f, i, l[i]))
return res
# --- Basic parsing elements.
def kwl(strs):
"""Generate a match-first list of keywords given a list of strings."""
return Regex(r'\b({})\b'.format('|'.join(strs)))
def flatten(lst):
res = []
for i in lst:
if isinstance(i, (list, tuple)):
res.extend(flatten(i))
else:
res.append(str(i))
return res
def recombine(tok):
"""Flattens a tree of tokens and joins into one big string.
"""
return " ".join(flatten(tok.asList()))
def print_parse_results(pr, depth=0, name=''):
"""For debugging; pretty-prints parse result objects.
"""
start = name + " " * (20 - len(name)) + ':' + '..' * depth
if isinstance(pr, ParseResults):
print(start)
for i in pr:
name = ''
for k in pr.keys():
if pr[k] is i:
name = k
break
print_parse_results(i, depth+1, name)
else:
print(start + str(pr))
# Syntatic delimiters
comma = Literal(",").ignore(quotedString).suppress()
colon = Literal(":").ignore(quotedString).suppress()
semi = Literal(";").ignore(quotedString).suppress()
lbrace = Literal("{").ignore(quotedString).suppress()
rbrace = Literal("}").ignore(quotedString).suppress()
lbrack = Literal("[").ignore(quotedString).suppress()
rbrack = Literal("]").ignore(quotedString).suppress()
lparen = Literal("(").ignore(quotedString).suppress()
rparen = Literal(")").ignore(quotedString).suppress()
# Numbers
int_strip = lambda t: t[0].rstrip('UL')
hexint = Regex('[+-]?\s*0[xX][{}]+[UL]*'.format(hexnums)).setParseAction(int_strip)
decint = Regex('[+-]?\s*[0-9]+[UL]*').setParseAction(int_strip)
integer = (hexint | decint)
# The floating regex is ugly but it is because we do not want to match
# integer to it.
floating = Regex(r'[+-]?\s*((((\d(\.\d*)?)|(\.\d+))[eE][+-]?\d+)|((\d\.\d*)|(\.\d+)))')
number = (floating | integer)
# Miscelaneous
bi_operator = oneOf("+ - / * | & || && ! ~ ^ % == != > < >= <= -> . :: << >> = ? :")
uni_right_operator = oneOf("++ --")
uni_left_operator = oneOf("++ -- - + * sizeof new")
wordchars = alphanums+'_$'
name = (WordStart(wordchars) + Word(alphas+"_", alphanums+"_$") +
WordEnd(wordchars))
size_modifiers = ['short', 'long']
sign_modifiers = ['signed', 'unsigned']
# Syntax elements defined by _init_parser.
expression = Forward()
array_op = lbrack + expression + rbrack
base_types = None
ident = None
call_conv = None
type_qualifier = None
storage_class_spec = None
extra_modifier = None
fund_type = None
extra_type_list = []
c99_int_types = ['int8_t', 'uint8_t', 'int16_t', 'uint16_t',
'int32_t', 'uint32_t', 'int64_t', 'uint64_t']
num_types = ['int', 'float', 'double'] + c99_int_types
nonnum_types = ['char', 'bool', 'void']
# Define some common language elements when initialising.
def _init_cparser(extra_types=None, extra_modifiers=None):
global expression
global call_conv, ident
global base_types
global type_qualifier, storage_class_spec, extra_modifier
global fund_type
global extra_type_list
# Some basic definitions
extra_type_list = [] if extra_types is None else list(extra_types)
base_types = nonnum_types + num_types + extra_type_list
storage_classes = ['inline', 'static', 'extern']
qualifiers = ['const', 'volatile', 'restrict', 'near', 'far']
keywords = (['struct', 'enum', 'union', '__stdcall', '__cdecl'] +
qualifiers + base_types + size_modifiers + sign_modifiers)
keyword = kwl(keywords)
wordchars = alphanums+'_$'
ident = (WordStart(wordchars) + ~keyword +
Word(alphas + "_", alphanums + "_$") +
WordEnd(wordchars)).setParseAction(lambda t: t[0])
call_conv = Optional(Keyword('__cdecl') |
Keyword('__stdcall'))('call_conv')
# Removes '__name' from all type specs. may cause trouble.
underscore_2_ident = (WordStart(wordchars) + ~keyword + '__' +
Word(alphanums, alphanums+"_$") +
WordEnd(wordchars)).setParseAction(lambda t: t[0])
type_qualifier = ZeroOrMore((underscore_2_ident + Optional(nestedExpr())) |
kwl(qualifiers))
storage_class_spec = Optional(kwl(storage_classes))
if extra_modifiers:
extra_modifier = ZeroOrMore(kwl(extra_modifiers) +
Optional(nestedExpr())).suppress()
else:
extra_modifier = None
# Language elements
fund_type = OneOrMore(kwl(sign_modifiers + size_modifiers +
base_types)).setParseAction(lambda t: ' '.join(t))
# Is there a better way to process expressions with cast operators??
cast_atom = (
ZeroOrMore(uni_left_operator) + Optional('('+ident+')').suppress() +
((ident + '(' + Optional(delimitedList(expression)) + ')' |
ident + OneOrMore('[' + expression + ']') |
ident | number | quotedString
) |
('(' + expression + ')')) +
ZeroOrMore(uni_right_operator)
)
uncast_atom = (
ZeroOrMore(uni_left_operator) +
((ident + '(' + Optional(delimitedList(expression)) + ')' |
ident + OneOrMore('[' + expression + ']') |
ident | number | quotedString
) |
('(' + expression + ')')) +
ZeroOrMore(uni_right_operator)
)
atom = cast_atom | uncast_atom
expression << Group(atom + ZeroOrMore(bi_operator + atom))
expression.setParseAction(recombine)