diff --git a/astnodes.py b/astnodes.py index a2eaa1e1375b5cc73bc02ff4140d818754db2e16..2ea3de40a0d35dd479cd1c61b52dd398706e55c6 100644 --- a/astnodes.py +++ b/astnodes.py @@ -1,6 +1,7 @@ import sympy as sp +from sympy.tensor import IndexedBase from pystencils.field import Field -from pystencils.types import TypedSymbol, DataType, get_type_from_sympy, _c_dtype_dict +from pystencils.types import TypedSymbol, createType, get_type_from_sympy class Node(object): @@ -261,7 +262,7 @@ class LoopOverCoordinate(Node): @staticmethod def getLoopCounterSymbol(coordinateToLoopOver): - return TypedSymbol(LoopOverCoordinate.getLoopCounterName(coordinateToLoopOver), DataType('int')) + return TypedSymbol(LoopOverCoordinate.getLoopCounterName(coordinateToLoopOver), 'int') @property def loopCounterSymbol(self): @@ -479,8 +480,8 @@ class Indexed(Expr): def __init__(self, args, base, parent=None): super(Indexed, self).__init__(args, parent) self.base = base - #Get dtype from label, and unpointer it - self.dtype = DataType(base.label.dtype.dtype) + # Get dtype from label, and unpointer it + self.dtype = createType(base.label.dtype.baseType) def __repr__(self): return '%s[%s]' % (self.args[0], self.args[1]) diff --git a/backends/__init__.py b/backends/__init__.py index fe72a215430a981ab1e29a05551e23770914d67c..0944afa4aca4b70e97f8b13689f3f9bfb59ec299 100644 --- a/backends/__init__.py +++ b/backends/__init__.py @@ -1,3 +1,7 @@ -from .llvm import generateLLVM -from .cbackend import generateC, generateCUDA +try: + from .llvm import generateLLVM +except ImportError: + pass + +from .cbackend import generateC from .dot import dotprint diff --git a/backends/cbackend.py b/backends/cbackend.py index dadb807c8e45e58ac2a15a68da3793d1fda85f63..63172de8a32a194f478c2101132276776e3e71bf 100644 --- a/backends/cbackend.py +++ b/backends/cbackend.py @@ -1,5 +1,6 @@ from sympy.utilities.codegen import CCodePrinter from pystencils.astnodes import Node +from pystencils.types import createType def generateC(astNode): @@ -7,17 +8,11 @@ def generateC(astNode): Prints the abstract syntax tree as C function """ fieldTypes = set([f.dtype for f in astNode.fieldsAccessed]) - useFloatConstants = "double" not in fieldTypes - printer = CBackend(cuda=False, constantsAsFloats=useFloatConstants) + useFloatConstants = createType("double") not in fieldTypes + printer = CBackend(constantsAsFloats=useFloatConstants) return printer(astNode) -def generateCUDA(astNode): - fieldTypes = set([f.dtype for f in astNode.fieldsAccessed]) - useFloatConstants = "double" not in fieldTypes - printer = CBackend(cuda=True, constantsAsFloats=useFloatConstants) - return printer(astNode) - # --------------------------------------- Backend Specific Nodes ------------------------------------------------------- @@ -55,8 +50,7 @@ class PrintNode(CustomCppCode): class CBackend(object): - def __init__(self, cuda=False, constantsAsFloats=False, sympyPrinter=None): - self.cuda = cuda + def __init__(self, constantsAsFloats=False, sympyPrinter=None): if sympyPrinter is None: self.sympyPrinter = CustomSympyPrinter(constantsAsFloats) else: @@ -76,8 +70,7 @@ class CBackend(object): def _print_KernelFunction(self, node): functionArguments = ["%s %s" % (str(s.dtype), s.name) for s in node.parameters] - prefix = "__global__ void" if self.cuda else "void" - funcDeclaration = "%s %s(%s)" % (prefix, node.functionName, ", ".join(functionArguments)) + funcDeclaration = "FUNC_PREFIX void %s(%s)" % (node.functionName, ", ".join(functionArguments)) body = self._print(node.body) return funcDeclaration + "\n" + body diff --git a/backends/llvm.py b/backends/llvm.py index 330dbb0322f9499bd43eb9583fc7f1f983b649cc..144f7a23ef550da28fc488ef3f14ce24ba60e6f0 100644 --- a/backends/llvm.py +++ b/backends/llvm.py @@ -6,7 +6,7 @@ from sympy import S # S is numbers? from pystencils.llvm.control_flow import Loop -from ..types import DataType +from ..types import createType, to_llvmlite_type from ..astnodes import Indexed @@ -38,9 +38,9 @@ class LLVMPrinter(Printer): self.tmp_var[name] = value def _print_Number(self, n): - if n.dtype == DataType("int"): + if n.dtype == createType("int"): return ir.Constant(self.integer, int(n)) - elif n.dtype == DataType("double"): + elif n.dtype == createType("double"): return ir.Constant(self.fp_type, float(n)) else: raise NotImplementedError("Numbers can only have int and double", n) @@ -88,7 +88,7 @@ class LLVMPrinter(Printer): def _print_Mul(self, expr): nodes = [self._print(a) for a in expr.args] e = nodes[0] - if expr.dtype == DataType('double'): + if expr.dtype == createType('double'): mul = self.builder.fmul else: # int TODO others? mul = self.builder.mul @@ -99,7 +99,7 @@ class LLVMPrinter(Printer): def _print_Add(self, expr): nodes = [self._print(a) for a in expr.args] e = nodes[0] - if expr.dtype == DataType('double'): + if expr.dtype == createType('double'): add = self.builder.fadd else: # int TODO others? add = self.builder.add @@ -164,17 +164,22 @@ class LLVMPrinter(Printer): from_dtype = conversion.args[0].dtype # (From, to) decision = { - (DataType("int"), DataType("double")): functools.partial(self.builder.sitofp, node, self.fp_type), - (DataType("double"), DataType("int")): functools.partial(self.builder.fptosi, node, self.integer), - (DataType("double *"), DataType("int")): functools.partial(self.builder.ptrtoint, node, self.integer), - (DataType("int"), DataType("double *")): functools.partial(self.builder.inttoptr, node, self.fp_pointer), - (DataType("double * __restrict__"), DataType("int")): functools.partial(self.builder.ptrtoint, node, self.integer), - (DataType("int"), DataType("double * __restrict__")): functools.partial(self.builder.inttoptr, node, self.fp_pointer), - (DataType("const double * __restrict__"), DataType("int")): functools.partial(self.builder.ptrtoint, node, self.integer), - (DataType("int"), DataType("const double * __restrict__")): functools.partial(self.builder.inttoptr, node, self.fp_pointer), - } + (createType("int"), createType("double")): functools.partial(self.builder.sitofp, node, self.fp_type), + (createType("double"), createType("int")): functools.partial(self.builder.fptosi, node, self.integer), + (createType("double *"), createType("int")): functools.partial(self.builder.ptrtoint, node, self.integer), + (createType("int"), createType("double *")): functools.partial(self.builder.inttoptr, node, self.fp_pointer), + (createType("double * restrict"), createType("int")): functools.partial(self.builder.ptrtoint, node, self.integer), + (createType("int"), createType("double * restrict")): functools.partial(self.builder.inttoptr, node, self.fp_pointer), + (createType("double * restrict const"), createType("int")): functools.partial(self.builder.ptrtoint, node, self.integer), + (createType("int"), createType("double * restrict const")): functools.partial(self.builder.inttoptr, node, self.fp_pointer), + } # TODO float, const, restrict # TODO bitcast, addrspacecast + # print([x for x in decision.keys()]) + # print("Types:") + # print([(type(x), type(y)) for (x, y) in decision.keys()]) + # print("Cast:") + # print((from_dtype, to_dtype)) return decision[(from_dtype, to_dtype)]() def _print_Indexed(self, indexed): diff --git a/cpu/cpujit.py b/cpu/cpujit.py index 20d414aeb3979c8461b176356d708d7a07ff5111..365b0462c5704ed8d8e431ea0ea3da5466766bd6 100644 --- a/cpu/cpujit.py +++ b/cpu/cpujit.py @@ -1,16 +1,78 @@ +""" + +*pystencils* looks for a configuration file in JSON format at the following locations in the listed order. + +1. at the path specified in the environment variable ``PYSTENCILS_CONFIG`` +2. in the current working direction for a file named ``pystencils.json`` +3. or in your home directory at ``~/.config/pystencils/config.json`` (Linux) or + ``%HOMEPATH%\.pystencils\config.json`` (Windows) + +If no configuration file is found, a default configuration is created at the above mentioned location in your home. +So run *pystencils* once, then edit the created configuration file. + + +Compiler Config (Linux) +----------------------- + +- **'os'**: should be detected automatically as 'linux' +- **'command'**: path to C++ compiler (defaults to 'g++') +- **'flags'**: space separated list of compiler flags. Make sure to activate OpenMP in your compiler +- **'restrictQualifier'**: the restrict qualifier is not standardized accross compilers. + For most Linux compilers the qualifier is ``__restrict__`` + + +Compiler Config (Windows) +------------------------- + +*pystencils* uses the mechanism of *setuptools.msvc* to search for a compilation environment. +Then 'cl.exe' is used to compile. + +- **'os'**: should be detected automatically as 'windows' +- **'msvcVersion'**: either a version number, year number, 'auto' or 'latest' for automatic detection of latest + installed version or 'setuptools' for setuptools-based detection +- **'arch'**: 'x86' or 'x64' +- **'flags'**: flags passed to 'cl.exe', make sure OpenMP is activated +- **'restrictQualifier'**: the restrict qualifier is not standardized accross compilers. + For Windows compilers the qualifier should be ``__restrict`` + + +Cache Config +------------ + +*pystencils* uses a directory to store intermediate files like the generated C++ files, compiled object files and +the shared libraries which are then loaded from Python using ctypes. The file names are SHA hashes of the +generated code. If the same kernel was already compiled, the existing object file is used - no recompilation is done. + +If 'sharedLibrary' is specified, all kernels that are currently in the cache are compiled into a single shared library. +This mechanism can be used to run *pystencils* on systems where compilation is not possible, e.g. on clusters where +compilation on the compute nodes is not possible. +First the script is run on a system where compilation is possible (e.g. the login node) with +'readFromSharedLibrary=False' and with 'sharedLibrary' set a valid path. +All kernels generated during the run are put into the cache and at the end +compiled into the shared library. Then, the same script can be run from the compute nodes, with +'readFromSharedLibrary=True', such that kernels are taken from the library instead of compiling them. + + +- **'readFromSharedLibrary'**: if true kernels are not compiled but assumed to be in the shared library +- **'objectCache'**: path to a folder where intermediate files are stored +- **'clearCacheOnStart'**: when true the cache is cleared on each start of a *pystencils* script +- **'sharedLibrary'**: path to a shared library file, which is created if `readFromSharedLibrary=false` + +""" from __future__ import print_function import os import subprocess -from ctypes import cdll, c_double, c_float, sizeof -import tempfile -import shutil -from pystencils.backends.cbackend import generateC -import numpy as np -import pickle import hashlib import json -from collections import OrderedDict +import platform +import glob +import atexit +import shutil +from ctypes import cdll, sizeof +from pystencils.backends.cbackend import generateC +from collections import OrderedDict, Mapping from pystencils.transformations import symbolNameToVariableName +from pystencils.types import toCtypes, getBaseType, createType def makePythonFunction(kernelFunctionNode, argumentDict={}): @@ -32,7 +94,7 @@ def makePythonFunction(kernelFunctionNode, argumentDict={}): except KeyError: # not all parameters specified yet return makePythonFunctionIncompleteParams(kernelFunctionNode, argumentDict) - func = compileAndLoad(kernelFunctionNode)[kernelFunctionNode.functionName] + func = compileAndLoad(kernelFunctionNode) func.restype = None return lambda: func(*args) @@ -53,28 +115,43 @@ def setCompilerConfig(config): An example JSON file with all possible keys. If not all keys are specified, default values are used `` { - "compiler": "/software/intel/2017/bin/icpc", - "flags": "-Ofast -DNDEBUG -fPIC -shared -march=native -fopenmp", - "env": { - "LM_PROJECT": "iwia", + 'compiler' : + { + "command": "/software/intel/2017/bin/icpc", + "flags": "-Ofast -DNDEBUG -fPIC -march=native -fopenmp", + "env": { + "LM_PROJECT": "iwia", + } } } `` """ - global _compilerConfig - _compilerConfig = config.copy() + global _config + _config = config.copy() + + +def _recursiveDictUpdate(d, u): + for k, v in u.items(): + if isinstance(v, Mapping): + r = _recursiveDictUpdate(d.get(k, {}), v) + d[k] = r + else: + d[k] = u[k] + return d def getConfigurationFilePath(): - configFileName = ".pystencils.json" - configPathInHome = os.path.expanduser(os.path.join("~", configFileName)) + if platform.system().lower() == 'linux': + configPathInHome = os.path.expanduser(os.path.join("~", '.config', 'pystencils', 'config.json')) + else: + configPathInHome = os.path.expanduser(os.path.join("~", '.pystencils', 'config.json')) # 1) Read path from environment variable if found if 'PYSTENCILS_CONFIG' in os.environ: return os.environ['PYSTENCILS_CONFIG'], True - # 2) Look in current directory for .pystencils.json - elif os.path.exists(configFileName): - return configFileName, True + # 2) Look in current directory for pystencils.json + elif os.path.exists("pystencils.json"): + return "pystencils.json", True # 3) Try ~/.pystencils.json elif os.path.exists(configPathInHome): return configPathInHome, True @@ -82,99 +159,204 @@ def getConfigurationFilePath(): return configPathInHome, False -def readCompilerConfig(): - defaultConfig = OrderedDict([ - ('compiler', 'g++'), - ('flags', '-Ofast -DNDEBUG -fPIC -shared -march=native -fopenmp'), - ]) +def createFolder(path, isFile): + if isFile: + path = os.path.split(path)[0] + try: + os.makedirs(path) + except os.error: + pass + + +def readConfig(): + if platform.system().lower() == 'linux': + defaultCompilerConfig = OrderedDict([ + ('os', 'linux'), + ('command', 'g++'), + ('flags', '-Ofast -DNDEBUG -fPIC -march=native -fopenmp -std=c++11'), + ('restrictQualifier', '__restrict__') + ]) + defaultCacheConfig = OrderedDict([ + ('readFromSharedLibrary', False), + ('objectCache', '/tmp/pystencils/objectcache'), + ('clearCacheOnStart', False), + ('sharedLibrary', '/tmp/pystencils/cache.so'), + ]) + elif platform.system().lower() == 'windows': + defaultCompilerConfig = OrderedDict([ + ('os', 'windows'), + ('msvcVersion', 'latest'), + ('arch', 'x64'), + ('flags', '/Ox /fp:fast /openmp /arch:avx'), + ('restrictQualifier', '__restrict') + ]) + defaultCacheConfig = OrderedDict([ + ('readFromSharedLibrary', False), + ('objectCache', os.path.join('~', '.pystencils', 'objectcache')), + ('clearCacheOnStart', False), + ('sharedLibrary', os.path.join('~', '.pystencils', 'cache.dll')), + ]) + + defaultConfig = OrderedDict([('compiler', defaultCompilerConfig), + ('cache', defaultCacheConfig)]) + configPath, configExists = getConfigurationFilePath() config = defaultConfig.copy() if configExists: - config.update(json.load(open(configPath, 'r'))) - json.dump(config, open(configPath, 'w'), indent=4) + loadedConfig = json.load(open(configPath, 'r')) + config = _recursiveDictUpdate(config, loadedConfig) + else: + createFolder(configPath, True) + json.dump(config, open(configPath, 'w'), indent=4) + + config['cache']['sharedLibrary'] = os.path.expanduser(config['cache']['sharedLibrary']) + config['cache']['objectCache'] = os.path.expanduser(config['cache']['objectCache']) + + if config['cache']['clearCacheOnStart']: + shutil.rmtree(config['cache']['objectCache'], ignore_errors=True) + + createFolder(config['cache']['objectCache'], False) + createFolder(config['cache']['sharedLibrary'], True) + + if 'env' not in config['compiler']: + config['compiler']['env'] = {} + + if config['compiler']['os'] == 'windows': + from pystencils.cpu.msvc_detection import getEnvironment + msvcEnv = getEnvironment(config['compiler']['msvcVersion'], config['compiler']['arch']) + config['compiler']['env'].update(msvcEnv) + return config -_compilerConfig = readCompilerConfig() +_config = readConfig() def getCompilerConfig(): - return _compilerConfig + return _config['compiler'] -def ctypeFromString(typename, includePointers=True): - import ctypes as ct +def getCacheConfig(): + return _config['cache'] - typename = str(typename).replace("*", " * ") - typeComponents = typename.split() - basicTypeMap = { - 'double': ct.c_double, - 'float': ct.c_float, - 'int': ct.c_int, - 'long': ct.c_long, - } +def hashToFunctionName(h): + res = "func_%s" % (h,) + return res.replace('-', 'm') - resultType = None - for typeComponent in typeComponents: - typeComponent = typeComponent.strip() - if typeComponent == "const" or typeComponent == "restrict" or typeComponent == "volatile": - continue - if typeComponent in basicTypeMap: - resultType = basicTypeMap[typeComponent] - elif typeComponent == "*" and includePointers: - assert resultType is not None - resultType = ct.POINTER(resultType) - return resultType +def compileObjectCacheToSharedLibrary(): + compilerConfig = getCompilerConfig() + cacheConfig = getCacheConfig() + sharedLibrary = cacheConfig['sharedLibrary'] + if len(sharedLibrary) == 0 or cacheConfig['readFromSharedLibrary']: + return -def ctypeFromNumpyType(numpyType): - typeMap = { - np.dtype('float64'): c_double, - np.dtype('float32'): c_float, - } - return typeMap[numpyType] + configEnv = compilerConfig['env'] if 'env' in compilerConfig else {} + compileEnvironment = os.environ.copy() + compileEnvironment.update(configEnv) + + try: + if compilerConfig['os'] == 'windows': + allObjectFiles = glob.glob(os.path.join(cacheConfig['objectCache'], '*.obj')) + linkCmd = ['link.exe', '/DLL', '/out:' + sharedLibrary] + else: + allObjectFiles = glob.glob(os.path.join(cacheConfig['objectCache'], '*.o')) + linkCmd = [compilerConfig['command'], '-shared', '-o', sharedLibrary] + + linkCmd += allObjectFiles + if len(allObjectFiles) > 0: + runCompileStep(linkCmd) + except subprocess.CalledProcessError as e: + print(e.output) + raise e + +atexit.register(compileObjectCacheToSharedLibrary) -def compile(code, tmpDir, libFile, createAssemblyCode=False): - srcFile = os.path.join(tmpDir, 'source.cpp') - with open(srcFile, 'w') as sourceFile: - print('#include <iostream>', file=sourceFile) - print("#include <cmath>", file=sourceFile) +def generateCode(ast, includes, restrictQualifier, functionPrefix, targetFile): + with open(targetFile, 'w') as sourceFile: + code = generateC(ast) + includes = "\n".join(["#include <%s>" % (includeFile,) for includeFile in includes]) + print(includes, file=sourceFile) + print("#define RESTRICT %s" % (restrictQualifier,), file=sourceFile) + print("#define FUNC_PREFIX %s" % (functionPrefix,), file=sourceFile) print('extern "C" { ', file=sourceFile) print(code, file=sourceFile) print('}', file=sourceFile) - config = getCompilerConfig() - compilerCmd = [config['compiler']] + config['flags'].split() - compilerCmd += [srcFile, '-o', libFile] - configEnv = config['env'] if 'env' in config else {} - env = os.environ.copy() - env.update(configEnv) + +def runCompileStep(command): + compilerConfig = getCompilerConfig() + configEnv = compilerConfig['env'] if 'env' in compilerConfig else {} + compileEnvironment = os.environ.copy() + compileEnvironment.update(configEnv) + try: - subprocess.check_output(compilerCmd, env=env, stderr=subprocess.STDOUT) + shell = True if compilerConfig['os'].lower() == 'windows' else False + subprocess.check_output(command, env=compileEnvironment, stderr=subprocess.STDOUT, shell=shell) except subprocess.CalledProcessError as e: + print(" ".join(command)) print(e.output) raise e - assembly = None - if createAssemblyCode: - assemblyFile = os.path.join(tmpDir, "assembly.s") - compilerCmd = [config['compiler'], '-S', '-o', assemblyFile, srcFile] + config['flags'].split() - subprocess.call(compilerCmd, env=env) - assembly = open(assemblyFile, 'r').read() - return assembly +def compileLinux(ast, codeHashStr, srcFile, libFile): + cacheConfig = getCacheConfig() + compilerConfig = getCompilerConfig() + + objectFile = os.path.join(cacheConfig['objectCache'], codeHashStr + '.o') + # Compilation + if not os.path.exists(objectFile): + generateCode(ast, ['iostream', 'cmath', 'cstdint'], compilerConfig['restrictQualifier'], '', srcFile) + compileCmd = [compilerConfig['command'], '-c'] + compilerConfig['flags'].split() + compileCmd += ['-o', objectFile, srcFile] + runCompileStep(compileCmd) + + # Linking + runCompileStep([compilerConfig['command'], '-shared', objectFile, '-o', libFile] + compilerConfig['flags'].split()) + + +def compileWindows(ast, codeHashStr, srcFile, libFile): + cacheConfig = getCacheConfig() + compilerConfig = getCompilerConfig() + + objectFile = os.path.join(cacheConfig['objectCache'], codeHashStr + '.obj') + # Compilation + if not os.path.exists(objectFile): + generateCode(ast, ['iostream', 'cmath', 'cstdint'], compilerConfig['restrictQualifier'], + '__declspec(dllexport)', srcFile) -def compileAndLoad(kernelFunctionNode): - tmpDir = tempfile.mkdtemp() - libFile = os.path.join(tmpDir, "jit.so") - compile(generateC(kernelFunctionNode), tmpDir, libFile) - loadedJitLib = cdll.LoadLibrary(libFile) - shutil.rmtree(tmpDir) + # /c compiles only, /EHsc turns of exception handling in c code + compileCmd = ['cl.exe', '/c', '/EHsc'] + compilerConfig['flags'].split() + compileCmd += [srcFile, '/Fo' + objectFile] + runCompileStep(compileCmd) - return loadedJitLib + # Linking + runCompileStep(['link.exe', '/DLL', '/out:' + libFile, objectFile]) + + +def compileAndLoad(ast): + cacheConfig = getCacheConfig() + + codeHashStr = hashlib.sha256(generateC(ast).encode()).hexdigest() + ast.functionName = hashToFunctionName(codeHashStr) + + srcFile = os.path.join(cacheConfig['objectCache'], codeHashStr + ".cpp") + + if cacheConfig['readFromSharedLibrary']: + return cdll.LoadLibrary(cacheConfig['sharedLibrary'])[ast.functionName] + else: + if getCompilerConfig()['os'].lower() == 'windows': + libFile = os.path.join(cacheConfig['objectCache'], codeHashStr + ".dll") + if not os.path.exists(libFile): + compileWindows(ast, codeHashStr, srcFile, libFile) + else: + libFile = os.path.join(cacheConfig['objectCache'], codeHashStr + ".so") + if not os.path.exists(libFile): + compileLinux(ast, codeHashStr, srcFile, libFile) + return cdll.LoadLibrary(libFile)[ast.functionName] def buildCTypeArgumentList(parameterSpecification, argumentDict): @@ -183,10 +365,14 @@ def buildCTypeArgumentList(parameterSpecification, argumentDict): arrayShapes = set() for arg in parameterSpecification: if arg.isFieldArgument: - field = argumentDict[arg.fieldName] + try: + field = argumentDict[arg.fieldName] + except KeyError: + raise KeyError("Missing field parameter for kernel call " + arg.fieldName) + symbolicField = arg.field if arg.isFieldPtrArgument: - ctArguments.append(field.ctypes.data_as(ctypeFromString(arg.dtype))) + ctArguments.append(field.ctypes.data_as(toCtypes(arg.dtype))) if symbolicField.hasFixedShape: if tuple(int(i) for i in symbolicField.shape) != field.shape: raise ValueError("Passed array '%s' has shape %s which does not match expected shape %s" % @@ -199,11 +385,11 @@ def buildCTypeArgumentList(parameterSpecification, argumentDict): if not symbolicField.isIndexField: arrayShapes.add(field.shape[:symbolicField.spatialDimensions]) elif arg.isFieldShapeArgument: - dataType = ctypeFromString(arg.dtype, includePointers=False) + dataType = toCtypes(getBaseType(arg.dtype)) ctArguments.append(field.ctypes.shape_as(dataType)) elif arg.isFieldStrideArgument: - dataType = ctypeFromString(arg.dtype, includePointers=False) - baseFieldType = ctypeFromNumpyType(field.dtype) + dataType = toCtypes(getBaseType(arg.dtype)) + baseFieldType = toCtypes(createType(field.dtype)) strides = field.ctypes.strides_as(dataType) for i in range(len(field.shape)): assert strides[i] % sizeof(baseFieldType) == 0 @@ -212,8 +398,11 @@ def buildCTypeArgumentList(parameterSpecification, argumentDict): else: assert False else: - param = argumentDict[arg.name] - expectedType = ctypeFromString(arg.dtype) + try: + param = argumentDict[arg.name] + except KeyError: + raise KeyError("Missing parameter for kernel call " + arg.name) + expectedType = toCtypes(arg.dtype) ctArguments.append(expectedType(param)) if len(arrayShapes) > 1: @@ -222,7 +411,7 @@ def buildCTypeArgumentList(parameterSpecification, argumentDict): def makePythonFunctionIncompleteParams(kernelFunctionNode, argumentDict): - func = compileAndLoad(kernelFunctionNode)[kernelFunctionNode.functionName] + func = compileAndLoad(kernelFunctionNode) func.restype = None parameters = kernelFunctionNode.parameters @@ -235,70 +424,3 @@ def makePythonFunctionIncompleteParams(kernelFunctionNode, argumentDict): return wrapper -class CachedKernel(object): - def __init__(self, configDict, ast, parameterValues): - self.configDict = configDict - self.ast = ast - self.parameterValues = parameterValues - self.funcPtr = None - - def __compile(self): - self.funcPtr = makePythonFunction(self.ast, self.parameterValues) - - def __call__(self, *args, **kwargs): - if self.funcPtr is None: - self.__compile() - self.funcPtr(*args, **kwargs) - - -def hashToFunctionName(h): - res = "func_%s" % (h,) - return res.replace('-', 'm') - - -def createLibrary(cachedKernels, libraryFile): - libraryInfoFile = libraryFile + ".info" - - tmpDir = tempfile.mkdtemp() - code = "" - infoDict = {} - for cachedKernel in cachedKernels: - s = repr(sorted(cachedKernel.configDict.items())) - configHash = hashlib.sha1(s.encode()).hexdigest() - cachedKernel.ast.functionName = hashToFunctionName(configHash) - kernelCode = generateC(cachedKernel.ast) - code += kernelCode + "\n" - infoDict[configHash] = {'code': kernelCode, - 'parameterValues': cachedKernel.parameterValues, - 'configDict': cachedKernel.configDict, - 'parameterSpecification': cachedKernel.ast.parameters} - - compile(code, tmpDir, libraryFile) - pickle.dump(infoDict, open(libraryInfoFile, "wb")) - shutil.rmtree(tmpDir) - - -def loadLibrary(libraryFile): - libraryInfoFile = libraryFile + ".info" - - libraryFile = cdll.LoadLibrary(libraryFile) - libraryInfo = pickle.load(open(libraryInfoFile, 'rb')) - - def getKernel(**kwargs): - s = repr(sorted(kwargs.items())) - configHash = hashlib.sha1(s.encode()).hexdigest() - if configHash not in libraryInfo: - raise ValueError("No such kernel in library") - func = libraryFile[hashToFunctionName(configHash)] - func.restype = None - - def wrapper(**kwargs): - from copy import copy - fullArguments = copy(libraryInfo[configHash]['parameterValues']) - fullArguments.update(kwargs) - args = buildCTypeArgumentList(libraryInfo[configHash]['parameterSpecification'], fullArguments) - func(*args) - wrapper.configDict = libraryInfo[configHash]['configDict'] - return wrapper - - return getKernel diff --git a/cpu/kernelcreation.py b/cpu/kernelcreation.py index f5fb46569331c68256e017c4e4f30ffaed55c08a..f4e306a35a348bc8445b3b68f2c9adea6a311a63 100644 --- a/cpu/kernelcreation.py +++ b/cpu/kernelcreation.py @@ -1,7 +1,7 @@ import sympy as sp from pystencils.transformations import resolveFieldAccesses, makeLoopOverDomain, typingFromSympyInspection, \ typeAllEquations, getOptimalLoopOrdering, parseBasePointerInfo, moveConstantsBeforeLoop, splitInnerLoop -from pystencils.types import TypedSymbol, DataType +from pystencils.types import TypedSymbol from pystencils.field import Field import pystencils.astnodes as ast @@ -37,7 +37,7 @@ def createKernel(listOfEquations, functionName="kernel", typeForSymbol=None, spl if isinstance(term, Field.Access) or isinstance(term, TypedSymbol): return term elif isinstance(term, sp.Symbol): - return TypedSymbol(term.name, DataType(typeForSymbol[term.name])) + return TypedSymbol(term.name, typeForSymbol[term.name]) else: raise ValueError("Term has to be field access or symbol") diff --git a/cpu/msvc_detection.py b/cpu/msvc_detection.py new file mode 100644 index 0000000000000000000000000000000000000000..61cfe4aed5670eab6dc18996efe72c9e6ba74b7d --- /dev/null +++ b/cpu/msvc_detection.py @@ -0,0 +1,81 @@ +import subprocess +import os + + +def getEnvironment(versionSpecifier, arch='x64'): + """ + Returns an environment dictionary, for activating the Visual Studio compiler + :param versionSpecifier: either a version number, year number, 'auto' or 'latest' for automatic detection of latest + installed version or 'setuptools' for setuptools-based detection + :param arch: x86 or x64 + """ + if versionSpecifier == 'setuptools': + return getEnvironmentFromSetupTools(arch) + else: + if versionSpecifier in ('auto', 'latest'): + versionNr = findLatestMsvcVersionUsingEnvironmentVariables() + else: + versionNr = normalizeMsvcVersion(versionSpecifier) + vcVarsPath = getVcVarsPath(versionNr) + return getEnvironmentFromVcVarsFile(vcVarsPath, arch) + + +def findLatestMsvcVersionUsingEnvironmentVariables(): + import re + regex = re.compile('VS(\d\d)\dCOMNTOOLS') + versions = [] + for key, value in os.environ.items(): + match = regex.match(key) + if match: + versions.append(int(match.group(1))) + if len(versions) == 0: + raise ValueError("Visual Studio not found.") + versions.sort() + return versions[-1] + + +def normalizeMsvcVersion(version): + """ + Takes version specifiers in the following form: + - year: 2012, 2013, 2015, either as int or string + - version numbers with or without dot i.e. 11.0 or 11 + :return: integer version number + """ + if isinstance(version, str) and '.' in version: + version = version.split('.')[0] + + version = int(version) + mapping = { + 2015: 14, + 2013: 12, + 2012: 11 + } + if version in mapping: + return mapping[version] + else: + return version + + +def getEnvironmentFromVcVarsFile(vcVarsFile, arch): + out = subprocess.check_output( + 'cmd /u /c "{}" {} && set'.format(vcVarsFile, arch), + stderr=subprocess.STDOUT, + ).decode('utf-16le', errors='replace') + + env = {key.upper(): value for key, _, value in (line.partition('=') for line in out.splitlines()) if key and value} + return env + + +def getVcVarsPath(versionNr): + environmentVarName = 'VS%d0COMNTOOLS' % (versionNr,) + vcPath = os.environ[environmentVarName] + path = os.path.join(vcPath, '..', '..', 'VC', 'vcvarsall.bat') + return os.path.abspath(path) + + +def getEnvironmentFromSetupTools(arch): + from setuptools.msvc import msvc14_get_vc_env + msvcEnv = msvc14_get_vc_env(arch) + return {k.upper(): v for k, v in msvcEnv.items()} + + diff --git a/equationcollection/equationcollection.py b/equationcollection/equationcollection.py index 05787b5f3e6766b8415136fe463118d352d654ee..86fb5700cab04af7e1cf7a2c022dff05be2160d3 100644 --- a/equationcollection/equationcollection.py +++ b/equationcollection/equationcollection.py @@ -66,8 +66,8 @@ class EquationCollection(object): newEquations = [fastSubs(eq, substitutionDict) for eq in self.mainEquations] if addSubstitutionsAsSubexpressions: newSubexpressions = [sp.Eq(b, a) for a, b in substitutionDict.items()] + newSubexpressions - - return self.copy(newEquations, sortEquationsTopologically(newSubexpressions)) + newSubexpressions = sortEquationsTopologically(newSubexpressions) + return self.copy(newEquations, newSubexpressions) def addSimplificationHint(self, key, value): """ diff --git a/field.py b/field.py index acb31391f3eb7aeb78f65fe7c60eb975a9fb2c78..546e049d2b2ec732a4d9dcfc9354877a86c7dc0d 100644 --- a/field.py +++ b/field.py @@ -3,7 +3,7 @@ import numpy as np import sympy as sp from sympy.core.cache import cacheit from sympy.tensor import IndexedBase -from pystencils.types import TypedSymbol +from pystencils.types import TypedSymbol, createType class Field(object): @@ -122,7 +122,7 @@ class Field(object): def __init__(self, fieldName, dtype, layout, shape, strides): """Do not use directly. Use static create* methods""" self._fieldName = fieldName - self._dtype = numpyDataTypeToC(dtype) + self._dtype = createType(dtype) self._layout = normalizeLayout(layout) self.shape = shape self.strides = strides @@ -372,17 +372,6 @@ def computeStrides(shape, layout): return tuple(strides) -def numpyDataTypeToC(dtype): - """Mapping numpy data types to C data types""" - if dtype == np.float64: - return "double" - elif dtype == np.float32: - return "float" - elif dtype == np.int32: - return "int" - raise NotImplementedError("Cannot convert type " + str(dtype)) - - def offsetComponentToDirectionString(coordinateId, value): """ Translates numerical offset to string notation. diff --git a/gpucuda/__init__.py b/gpucuda/__init__.py index fb98db294e71205b7a3dc02bf8a9381c42d08a7b..d211c9eb4dcdecc95a6ba72eb543dff673b34577 100644 --- a/gpucuda/__init__.py +++ b/gpucuda/__init__.py @@ -1,3 +1,2 @@ from pystencils.gpucuda.kernelcreation import createCUDAKernel from pystencils.gpucuda.cudajit import makePythonFunction -from pystencils.backends.cbackend import generateCUDA \ No newline at end of file diff --git a/gpucuda/cudajit.py b/gpucuda/cudajit.py index d7385eb1300ead22a18fbbca40d0fd72abaa6ba1..57fb3da588b085cae372b025d5ae243accbdf7f3 100644 --- a/gpucuda/cudajit.py +++ b/gpucuda/cudajit.py @@ -3,7 +3,7 @@ import pycuda.driver as cuda import pycuda.autoinit from pycuda.compiler import SourceModule from pycuda.gpuarray import GPUArray -from pystencils.backends.cbackend import generateCUDA +from pystencils.backends.cbackend import generateC from pystencils.transformations import symbolNameToVariableName @@ -58,7 +58,11 @@ def buildNumpyArgumentList(kernelFunctionNode, argumentDict): def makePythonFunction(kernelFunctionNode, argumentDict={}): - mod = SourceModule(str(generateCUDA(kernelFunctionNode)), options=["-w"]) + code = "#define FUNC_PREFIX __global__\n" + code += "#define RESTRICT __restrict__\n\n" + code += str(generateC(kernelFunctionNode)) + + mod = SourceModule(code, options=["-w"]) func = mod.get_function(kernelFunctionNode.functionName) def wrapper(**kwargs): diff --git a/llvm/jit.py b/llvm/jit.py index 345161811233c029c954b7623713f759de833de2..8cd67cd06e8029cdff5524291dab9783959bcfd7 100644 --- a/llvm/jit.py +++ b/llvm/jit.py @@ -1,6 +1,8 @@ -import llvmlite.binding as llvm import llvmlite.ir as ir +import llvmlite.binding as llvm import logging.config +from ..types import toCtypes, createType + import ctypes as ct @@ -69,21 +71,19 @@ class Eval(object): f.write(target_machine.emit_object(llvmmod)) fptr = {} - # TODO cpujit has its own string version - types = {str(ir.DoubleType()): ct.c_double, - str(ir.IntType(64)): ct.c_int64, # TODO int width? - str(ir.FloatType()): ct.c_float, - str(ir.VoidType()): None, # TODO thats a void pointer use None??? - str(ir.DoubleType().as_pointer()): ct.POINTER(ct.c_double), - str(ir.IntType(64).as_pointer()): ct.POINTER(ct.c_int), # TODO int width? - str(ir.FloatType().as_pointer()): ct.POINTER(ct.c_float), - #ir.VoidType().as_pointer(): ct.c_void_p, # TODO there is no void pointer in llvmlite? - } # TODO Aggregate types for function in module.functions: if not function.is_declaration: + print(function.name) print(type(function)) - fptr[function.name] = ct.CFUNCTYPE(types[str(function.ftype.return_type)], *[types[str(arg)] for arg in function.ftype.args])(ee.get_function_address(function.name)) + print(function.ftype.return_type) + print(type(function.ftype.return_type)) + return_type = None + if function.ftype.return_type != ir.VoidType(): + return_type = toCtypes(createType(str(function.ftype.return_type))) + args = [toCtypes(createType(str(arg))) for arg in function.ftype.args] + function_address = ee.get_function_address(function.name) + fptr[function.name] = ct.CFUNCTYPE(return_type, *args)(function_address) # result = fptr(2, 3) # print(result) return fptr diff --git a/llvm/kernelcreation.py b/llvm/kernelcreation.py index b07a5fb8feb760da988a47128e7942fa4ebe0e74..8c520cff33199184458de651150c9b86c3287a52 100644 --- a/llvm/kernelcreation.py +++ b/llvm/kernelcreation.py @@ -2,7 +2,7 @@ import sympy as sp from pystencils.transformations import resolveFieldAccesses, makeLoopOverDomain, typingFromSympyInspection, \ typeAllEquations, getOptimalLoopOrdering, parseBasePointerInfo, moveConstantsBeforeLoop, splitInnerLoop, \ desympy_ast, insert_casts -from pystencils.types import TypedSymbol, DataType +from pystencils.types import TypedSymbol from pystencils.field import Field import pystencils.astnodes as ast @@ -36,7 +36,7 @@ def createKernel(listOfEquations, functionName="kernel", typeForSymbol=None, spl if isinstance(term, Field.Access) or isinstance(term, TypedSymbol): return term elif isinstance(term, sp.Symbol): - return TypedSymbol(term.name, DataType(typeForSymbol[term.name])) + return TypedSymbol(term.name, typeForSymbol[term.name]) else: raise ValueError("Term has to be field access or symbol") diff --git a/slicing.py b/slicing.py index 7eebb38eaa3709a5f01b68e48e4d8129f5de881f..cf3a53cb3e72c42eee4e35f4bb54933dfd0e776e 100644 --- a/slicing.py +++ b/slicing.py @@ -47,6 +47,10 @@ def normalizeSlice(slices, sizes): return tuple(result) +def shiftSlice(slices, offset): + return [slice(k.start+offset, k.stop + offset, k.step) for k in slices] + + def sliceFromDirection(directionName, dim, normalOffset=0, tangentialOffset=0): """ Create a slice from a direction named by compass scheme: @@ -98,3 +102,95 @@ def addGhostLayers(arr, indexDimensions=0, ghostLayers=1): indexing += [slice(None, None, None)] * indexDimensions result[indexing] = arr return result + + +def getSliceBeforeGhostLayer(direction, ghostLayers=1, thickness=None, fullSlice=False): + """ + Returns slicing expression for region before ghost layer + :param direction: tuple specifying direction of slice + :param ghostLayers: number of ghost layers + :param thickness: thickness of the slice, defaults to number of ghost layers + :param fullSlice: if true also the ghost cells in directions orthogonal to direction are contained in the + returned slice. Example (d=W ): if fullSlice then also the ghost layer in N-S and T-B + are included, otherwise only inner cells are returned + """ + if not thickness: + thickness = ghostLayers + fullSliceInc = ghostLayers if not fullSlice else 0 + slices = [] + for dirComponent in direction: + if dirComponent == -1: + s = slice(ghostLayers, thickness + ghostLayers) + elif dirComponent == 0: + end = -fullSliceInc + s = slice(fullSliceInc, end if end != 0 else None) + elif dirComponent == 1: + start = -thickness - ghostLayers + end = -ghostLayers + s = slice(start if start != 0 else None, end if end != 0 else None) + else: + raise ValueError("Invalid direction: only -1, 0, 1 components are allowed") + slices.append(s) + return tuple(slices) + + +def getGhostRegionSlice(direction, ghostLayers=1, thickness=None, fullSlice=False): + """ + Returns slice of ghost region. For parameters see :func:`getSliceBeforeGhostLayer` + """ + if not thickness: + thickness = ghostLayers + assert thickness > 0 + assert thickness <= ghostLayers + fullSliceInc = ghostLayers if not fullSlice else 0 + slices = [] + for dirComponent in direction: + if dirComponent == -1: + s = slice(ghostLayers - thickness, ghostLayers) + elif dirComponent == 0: + end = -fullSliceInc + s = slice(fullSliceInc, end if end != 0 else None) + elif dirComponent == 1: + start = -ghostLayers + end = - ghostLayers + thickness + s = slice(start if start != 0 else None, end if end != 0 else None) + else: + raise ValueError("Invalid direction: only -1, 0, 1 components are allowed") + slices.append(s) + return tuple(slices) + + +def getPeriodicBoundaryFunctor(stencil, ghostLayers=1, thickness=None): + """ + Returns a function that applies periodic boundary conditions + :param stencil: sequence of directions e.g. ( [0,1], [0,-1] ) for y periodicity + :param ghostLayers: how many ghost layers the array has + :param thickness: how many of the ghost layers to copy, None means 'all' + :return: function that takes a single array and applies the periodic copy operation + """ + srcDstSliceTuples = [] + + for d in stencil: + if sum([abs(e) for e in d]) == 0: + continue + invDir = (-e for e in d) + src = getSliceBeforeGhostLayer(invDir, ghostLayers, thickness=thickness, fullSlice=False) + dst = getGhostRegionSlice(d, ghostLayers, thickness=thickness, fullSlice=False) + print(d, ", ", src, "->", dst) + srcDstSliceTuples.append((src, dst)) + + def functor(field): + for srcSlice, dstSlice in srcDstSliceTuples: + field[dstSlice] = field[srcSlice] + + return functor + + +if __name__ == '__main__': + import numpy as np + f = np.arange(0, 8*8).reshape(8, 8) + from lbmpy.stencils import getStencil + res = getPeriodicBoundaryFunctor(getStencil("D2Q9"), ghostLayers=2, thickness=1) + print(f) + res(f) + print(f) diff --git a/sympyextensions.py b/sympyextensions.py index 687f97d79b7fbefdbdf7df471df054d966124eda..73dc9e2faa4751b79366dffc08e617ec2eac6963 100644 --- a/sympyextensions.py +++ b/sympyextensions.py @@ -129,7 +129,7 @@ def replaceSecondOrderProducts(expr, searchSymbols, positive=None, replaceMixed= else: otherFactors *= t if len(distinctVelTerms) == 2 and nrOfVelTerms == 2: - u, v = list(distinctVelTerms) + u, v = sorted(list(distinctVelTerms), key=lambda symbol: symbol.name) if positive is None: otherFactorsWithoutSymbols = otherFactors for s in otherFactors.atoms(sp.Symbol): diff --git a/transformations.py b/transformations.py index dc709985f6d9757a5a4052f9b00fe1df049dde8e..3c94329520c8699e2befe35c483bfede2c2352b1 100644 --- a/transformations.py +++ b/transformations.py @@ -1,4 +1,4 @@ -from collections import defaultdict +from collections import defaultdict, OrderedDict from operator import attrgetter import sympy as sp @@ -6,7 +6,7 @@ from sympy.logic.boolalg import Boolean from sympy.tensor import IndexedBase from pystencils.field import Field, offsetComponentToDirectionString -from pystencils.types import TypedSymbol, DataType +from pystencils.types import TypedSymbol, createType, PointerType from pystencils.slicing import normalizeSlice import pystencils.astnodes as ast @@ -62,7 +62,7 @@ def makeLoopOverDomain(body, functionName, iterationSlice=None, ghostLayers=None if len(shapeSet) != 1: raise ValueError("Differently sized field accesses in loop body: " + str(shapeSet)) - shape = list(shapeSet)[0] + shape = list(sorted(shapeSet, key=lambda e: str(e[0])))[0] if iterationSlice is not None: iterationSlice = normalizeSlice(iterationSlice, shape) @@ -109,7 +109,7 @@ def createIntermediateBasePointer(fieldAccess, coordinates, previousPtr): Example: >>> field = Field.createGeneric('myfield', spatialDimensions=2, indexDimensions=1) >>> x, y = sp.symbols("x y") - >>> prevPointer = TypedSymbol("ptr", DataType("double")) + >>> prevPointer = TypedSymbol("ptr", "double") >>> createIntermediateBasePointer(field[1,-2](5), {0: x}, prevPointer) (ptr_E, x*fstride_myfield[0] + fstride_myfield[0]) >>> createIntermediateBasePointer(field[1,-2](5), {0: x, 1 : y }, prevPointer) @@ -140,7 +140,7 @@ def createIntermediateBasePointer(fieldAccess, coordinates, previousPtr): if len(listToHash) > 0: name += "%0.6X" % (abs(hash(tuple(listToHash)))) - newPtr = TypedSymbol(previousPtr.name + name, DataType(previousPtr.dtype)) + newPtr = TypedSymbol(previousPtr.name + name, previousPtr.dtype) return newPtr, offset @@ -222,6 +222,9 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn counters to index the field these symbols are used as coordinates :return: transformed AST """ + fieldToBasePointerInfo = OrderedDict(sorted(fieldToBasePointerInfo.items(), key=lambda pair: pair[0])) + fieldToFixedCoordinates = OrderedDict(sorted(fieldToFixedCoordinates.items(), key=lambda pair: pair[0])) + def visitSympyExpr(expr, enclosingBlock, sympyAssignment): if isinstance(expr, Field.Access): fieldAccess = expr @@ -231,12 +234,7 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn else: basePointerInfo = [list(range(field.indexDimensions + field.spatialDimensions))] - dtype = DataType(field.dtype) - dtype.alias = False - dtype.ptr = True - if field.name in readOnlyFieldNames: - dtype.const = True - + dtype = PointerType(field.dtype, const=field.name in readOnlyFieldNames, restrict=True) fieldPtr = TypedSymbol("%s%s" % (Field.DATA_PREFIX, symbolNameToVariableName(field.name)), dtype) lastPointer = fieldPtr @@ -249,7 +247,7 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn coordDict[e] = fieldToFixedCoordinates[field.name][e] else: ctrName = ast.LoopOverCoordinate.LOOP_COUNTER_NAME_PREFIX - coordDict[e] = TypedSymbol("%s_%d" % (ctrName, e), DataType('int')) + coordDict[e] = TypedSymbol("%s_%d" % (ctrName, e), 'int') else: coordDict[e] = fieldAccess.index[e-field.spatialDimensions] return coordDict @@ -359,9 +357,8 @@ def splitInnerLoop(astNode, symbolGroups): assert len(outerLoop) == 1, "Error in AST, multiple outermost loops." outerLoop = outerLoop[0] - symbolsWithTemporaryArray = dict() - - assignmentMap = {a.lhs: a for a in innerLoop.body.args} + symbolsWithTemporaryArray = OrderedDict() + assignmentMap = OrderedDict((a.lhs, a) for a in innerLoop.body.args) assignmentGroups = [] for symbolGroup in symbolGroups: @@ -431,7 +428,7 @@ def typeAllEquations(eqs, typeForSymbol): elif isinstance(term, TypedSymbol): return term elif isinstance(term, sp.Symbol): - return TypedSymbol(symbolNameToVariableName(term.name), DataType(typeForSymbol[term.name])) + return TypedSymbol(symbolNameToVariableName(term.name), typeForSymbol[term.name]) else: newArgs = [processRhs(arg) for arg in term.args] return term.func(*newArgs) if newArgs else term @@ -444,7 +441,7 @@ def typeAllEquations(eqs, typeForSymbol): elif isinstance(term, TypedSymbol): return term elif isinstance(term, sp.Symbol): - return TypedSymbol(term.name, DataType(typeForSymbol[term.name])) + return TypedSymbol(term.name, typeForSymbol[term.name]) else: assert False, "Expected a symbol as left-hand-side" @@ -537,9 +534,9 @@ def get_type(node): # TODO sp.NumberSymbol elif isinstance(node, sp.Number): if isinstance(node, sp.Float): - return DataType('double') + return createType('double') elif isinstance(node, sp.Integer): - return DataType('int') + return createType('int') else: raise NotImplemented('Not yet supported: %s %s' % (node, type(node))) else: @@ -558,6 +555,9 @@ def insert_casts(node): #TODO revmove this pass elif isinstance(node, ast.Expr): + print(node.args) + print([type(arg) for arg in node.args]) + print([arg.dtype for arg in node.args]) args = sorted((arg for arg in node.args), key=attrgetter('dtype')) target = args[0] for i in range(len(args)): @@ -565,6 +565,7 @@ def insert_casts(node): args[i] = ast.Conversion(args[i], target.dtype, node) node.args = args node.dtype = target.dtype + print(node.dtype) elif isinstance(node, ast.SympyAssignment): if node.lhs.dtype != node.rhs.dtype: node.replace(node.rhs, ast.Conversion(node.rhs, node.lhs.dtype)) diff --git a/types.py b/types.py index 0fd58daa79aca1d838cf08b49a92e95545f8cc84..fbf39ea3c55d07d1c0e1872415d659d2cbb5a4b6 100644 --- a/types.py +++ b/types.py @@ -1,16 +1,18 @@ +import ctypes import sympy as sp +import numpy as np +import llvmlite.ir as ir from sympy.core.cache import cacheit class TypedSymbol(sp.Symbol): - def __new__(cls, name, *args, **kwds): obj = TypedSymbol.__xnew_cached_(cls, name, *args, **kwds) return obj def __new_stage2__(cls, name, dtype): obj = super(TypedSymbol, cls).__xnew__(cls, name) - obj._dtype = DataType(dtype) if isinstance(dtype, str) else dtype + obj._dtype = createType(dtype) return obj __xnew__ = staticmethod(__new_stage2__) @@ -29,41 +31,230 @@ class TypedSymbol(sp.Symbol): return self.name, self.dtype -_c_dtype_dict = {0: 'bool', 1: 'int', 2: 'float', 3: 'double'} -_dtype_dict = {'bool': 0, 'int': 1, 'float': 2, 'double': 3} - - -class DataType(object): - def __init__(self, dtype): - self.alias = True - self.const = False - self.ptr = False - self.dtype = 0 - if isinstance(dtype, str): - for s in dtype.split(): - if s == 'const': - self.const = True - elif s == '*': - self.ptr = True - elif s == '__restrict__': - self.alias = False - else: - self.dtype = _dtype_dict[s] - elif isinstance(dtype, DataType): - self.__dict__.update(dtype.__dict__) +def createType(specification): + """ + Create a subclass of Type according to a string or an object of subclass Type + :param specification: Type object, or a string + :return: Type object, or a new Type object parsed from the string + """ + if isinstance(specification, Type): + return specification + elif isinstance(specification, str): + return createTypeFromString(specification) + else: + npDataType = np.dtype(specification) + return BasicType(npDataType, const=False) + + +def createTypeFromString(specification): + """ + Creates a new Type object from a c-like string specification + :param specification: Specification string + :return: Type object + """ + specification = specification.lower().split() + parts = [] + current = [] + for s in specification: + if s == '*': + parts.append(current) + current = [s] else: - self.dtype = dtype + current.append(s) + if len(current) > 0: + parts.append(current) + # Parse native part + basePart = parts.pop(0) + const = False + if 'const' in basePart: + const = True + basePart.remove('const') + assert len(basePart) == 1 + if basePart[0][-1] == "*": + basePart[0] = basePart[0][:-1] + parts.append('*') + baseType = BasicType(basePart[0], const) + currentType = baseType + # Parse pointer parts + for part in parts: + restrict = False + const = False + if 'restrict' in part: + restrict = True + part.remove('restrict') + if 'const' in part: + const = True + part.remove("const") + assert len(part) == 1 and part[0] == '*' + currentType = PointerType(currentType, const, restrict) + return currentType + + +def getBaseType(type): + while type.baseType is not None: + type = type.baseType + return type + + +def toCtypes(dataType): + """ + Transforms a given Type into ctypes + :param dataType: Subclass of Type + :return: ctypes type object + """ + if isinstance(dataType, PointerType): + return ctypes.POINTER(toCtypes(dataType.baseType)) + else: + return toCtypes.map[dataType.numpyDtype] + + +toCtypes.map = { + np.dtype(np.int8): ctypes.c_int8, + np.dtype(np.int16): ctypes.c_int16, + np.dtype(np.int32): ctypes.c_int32, + np.dtype(np.int64): ctypes.c_int64, + + np.dtype(np.uint8): ctypes.c_uint8, + np.dtype(np.uint16): ctypes.c_uint16, + np.dtype(np.uint32): ctypes.c_uint32, + np.dtype(np.uint64): ctypes.c_uint64, + + np.dtype(np.float32): ctypes.c_float, + np.dtype(np.float64): ctypes.c_double, +} + + +def to_llvmlite_type(data_type): + """ + Transforms a given type into ctypes + :param data_type: Subclass of Type + :return: llvmlite type object + """ + if isinstance(data_type, PointerType): + return to_llvmlite_type.map[data_type.baseType].as_pointer() + else: + return to_llvmlite_type.map[data_type.numpyDType] + +to_llvmlite_type.map = { + np.dtype(np.int8): ir.IntType(8), + np.dtype(np.int16): ir.IntType(16), + np.dtype(np.int32): ir.IntType(32), + np.dtype(np.int64): ir.IntType(64), + + # TODO llvmlite doesn't seem to differentiate between Int types + np.dtype(np.uint8): ir.IntType(8), + np.dtype(np.uint16): ir.IntType(16), + np.dtype(np.uint32): ir.IntType(32), + np.dtype(np.uint64): ir.IntType(64), + + np.dtype(np.float32): ir.FloatType(), + np.dtype(np.float64): ir.DoubleType(), + # TODO const, restrict, void +} + + +class Type(object): + def __lt__(self, other): + # Needed for sorting the types inside an expression + if isinstance(self, BasicType): + if isinstance(other, BasicType): + return self.numpyDtype < other.numpyDtype # TODO const + if isinstance(other, PointerType): + return True # TODO test + if isinstance(other, StructType): + raise NotImplementedError("Struct type comparison is not yet implemented") + if isinstance(self, PointerType): + if isinstance(other, BasicType): + return False # TODO test + if isinstance(other, PointerType): + return self.baseType < other.baseType # TODO const, restrict + if isinstance(other, StructType): + raise NotImplementedError("Struct type comparison is not yet implemented") + if isinstance(self, StructType): + raise NotImplementedError("Struct type comparison is not yet implemented") + + +class BasicType(Type): + @staticmethod + def numpyNameToC(name): + if name == 'float64': + return 'double' + elif name == 'float32': + return 'float' + elif name.startswith('int'): + width = int(name[len("int"):]) + return "int%d_t" % (width,) + elif name.startswith('uint'): + width = int(name[len("int"):]) + return "uint%d_t" % (width,) + elif name == 'bool': + return 'bool' + else: + raise NotImplemented("Can map numpy to C name for %s" % (name,)) + + def __init__(self, dtype, const=False): + self.const = const + self._dtype = np.dtype(dtype) + assert self._dtype.fields is None, "Tried to initialize NativeType with a structured type" + assert self._dtype.hasobject is False + assert self._dtype.subdtype is None + + @property + def baseType(self): + return None + + @property + def numpyDtype(self): + return self._dtype def __repr__(self): - return "{!s} {!s}{!s} {!s}".format("const" if self.const else "", _c_dtype_dict[self.dtype], - "*" if self.ptr else "", "__restrict__" if not self.alias else "") + result = BasicType.numpyNameToC(str(self._dtype)) + if self.const: + result += " const" + return result def __eq__(self, other): - if self.alias == other.alias and self.const == other.const and self.ptr == other.ptr and self.dtype == other.dtype: - return True + if not isinstance(other, BasicType): + return False else: + return (self.numpyDtype, self.const) == (other.numpyDtype, other.const) + + def __hash__(self): + return hash(str(self)) + + +class PointerType(Type): + def __init__(self, baseType, const=False, restrict=True): + self._baseType = baseType + self.const = const + self.restrict = restrict + + @property + def alias(self): + return not self.restrict + + @property + def baseType(self): + return self._baseType + + def __eq__(self, other): + if not isinstance(other, PointerType): return False + else: + return (self.baseType, self.const, self.restrict) == (other.baseType, other.const, other.restrict) + + def __repr__(self): + return "%s * %s%s" % (self.baseType, "RESTRICT " if self.restrict else "", "const " if self.const else "") + + def __hash__(self): + return hash(str(self)) + + +class StructType(object): + def __init__(self, numpyType): + self._dtype = np.dtype(numpyType) + # TODO this should not work at all!!! def __gt__(self, other): if self.ptr and not other.ptr: return True @@ -75,6 +266,11 @@ class DataType(object): def get_type_from_sympy(node): + """ + Creates a Type object from a Sympy object + :param node: Sympy object + :return: Type object + """ # Rational, NumberSymbol? # Zero, One, NegativeOne )= Integer # Half )= Rational @@ -85,10 +281,10 @@ def get_type_from_sympy(node): raise TypeError(node, 'is not a sp.Number') if isinstance(node, sp.Float) or isinstance(node, sp.RealNumber): - return DataType('double'), float(node) + return createType('double'), float(node) elif isinstance(node, sp.Integer): - return DataType('int'), int(node) + return createType('int'), int(node) elif isinstance(node, sp.Rational): raise NotImplementedError('Rationals are not supported yet') else: - raise TypeError(node, ' is not a supported type!') + raise TypeError(node, ' is not a supported type (yet)!')