diff --git a/backends/__init__.py b/backends/__init__.py index b4f7b6786b4061c7174fbad0bff8bf9a1deb55a6..3b23aca7af30894625ab210ec25a9eeb6d14d50b 100644 --- a/backends/__init__.py +++ b/backends/__init__.py @@ -1,2 +1,6 @@ -from .llvm import generateLLVM -from .cbackend import generateC, generateCUDA +try: + from .llvm import generateLLVM +except ImportError: + pass + +from .cbackend import generateC diff --git a/backends/cbackend.py b/backends/cbackend.py index dadb807c8e45e58ac2a15a68da3793d1fda85f63..e4a63541459426de4b48052274fac41ddedcdeca 100644 --- a/backends/cbackend.py +++ b/backends/cbackend.py @@ -8,16 +8,10 @@ def generateC(astNode): """ fieldTypes = set([f.dtype for f in astNode.fieldsAccessed]) useFloatConstants = "double" not in fieldTypes - printer = CBackend(cuda=False, constantsAsFloats=useFloatConstants) + printer = CBackend(constantsAsFloats=useFloatConstants) return printer(astNode) -def generateCUDA(astNode): - fieldTypes = set([f.dtype for f in astNode.fieldsAccessed]) - useFloatConstants = "double" not in fieldTypes - printer = CBackend(cuda=True, constantsAsFloats=useFloatConstants) - return printer(astNode) - # --------------------------------------- Backend Specific Nodes ------------------------------------------------------- @@ -55,8 +49,7 @@ class PrintNode(CustomCppCode): class CBackend(object): - def __init__(self, cuda=False, constantsAsFloats=False, sympyPrinter=None): - self.cuda = cuda + def __init__(self, constantsAsFloats=False, sympyPrinter=None): if sympyPrinter is None: self.sympyPrinter = CustomSympyPrinter(constantsAsFloats) else: @@ -76,8 +69,7 @@ class CBackend(object): def _print_KernelFunction(self, node): functionArguments = ["%s %s" % (str(s.dtype), s.name) for s in node.parameters] - prefix = "__global__ void" if self.cuda else "void" - funcDeclaration = "%s %s(%s)" % (prefix, node.functionName, ", ".join(functionArguments)) + funcDeclaration = "FUNC_PREFIX void %s(%s)" % (node.functionName, ", ".join(functionArguments)) body = self._print(node.body) return funcDeclaration + "\n" + body diff --git a/cpu/cpujit.py b/cpu/cpujit.py index 459359e3a347c113d389e60434b1d02362de284a..7027fd987557bc55e3dcf734974e8d49b703e1b2 100644 --- a/cpu/cpujit.py +++ b/cpu/cpujit.py @@ -2,14 +2,15 @@ from __future__ import print_function import os import subprocess from ctypes import cdll, c_double, c_float, sizeof -import tempfile import shutil from pystencils.backends.cbackend import generateC import numpy as np -import pickle import hashlib import json -from collections import OrderedDict +import platform +import glob +import atexit +from collections import OrderedDict, Mapping from pystencils.transformations import symbolNameToVariableName @@ -32,7 +33,7 @@ def makePythonFunction(kernelFunctionNode, argumentDict={}): except KeyError: # not all parameters specified yet return makePythonFunctionIncompleteParams(kernelFunctionNode, argumentDict) - func = compileAndLoad(kernelFunctionNode)[kernelFunctionNode.functionName] + func = compileAndLoad(kernelFunctionNode) func.restype = None return lambda: func(*args) @@ -53,28 +54,43 @@ def setCompilerConfig(config): An example JSON file with all possible keys. If not all keys are specified, default values are used `` { - "compiler": "/software/intel/2017/bin/icpc", - "flags": "-Ofast -DNDEBUG -fPIC -shared -march=native -fopenmp", - "env": { - "LM_PROJECT": "iwia", + 'compiler' : + { + "command": "/software/intel/2017/bin/icpc", + "flags": "-Ofast -DNDEBUG -fPIC -march=native -fopenmp", + "env": { + "LM_PROJECT": "iwia", + } } } `` """ - global _compilerConfig - _compilerConfig = config.copy() + global _config + _config = config.copy() + + +def _recursiveDictUpdate(d, u): + for k, v in u.items(): + if isinstance(v, Mapping): + r = _recursiveDictUpdate(d.get(k, {}), v) + d[k] = r + else: + d[k] = u[k] + return d def getConfigurationFilePath(): - configFileName = ".pystencils.json" - configPathInHome = os.path.expanduser(os.path.join("~", configFileName)) + if platform.system().lower() == 'linux': + configPathInHome = os.path.expanduser(os.path.join("~", '.config', 'pystencils', 'config.json')) + else: + configPathInHome = os.path.expanduser(os.path.join("~", '.pystencils', 'config.json')) # 1) Read path from environment variable if found if 'PYSTENCILS_CONFIG' in os.environ: return os.environ['PYSTENCILS_CONFIG'], True - # 2) Look in current directory for .pystencils.json - elif os.path.exists(configFileName): - return configFileName, True + # 2) Look in current directory for pystencils.json + elif os.path.exists("pystencils.json"): + return "pystencils.json", True # 3) Try ~/.pystencils.json elif os.path.exists(configPathInHome): return configPathInHome, True @@ -82,24 +98,86 @@ def getConfigurationFilePath(): return configPathInHome, False -def readCompilerConfig(): - defaultConfig = OrderedDict([ - ('compiler', 'g++'), - ('flags', '-Ofast -DNDEBUG -fPIC -shared -march=native -fopenmp'), - ]) +def createFolder(path, isFile): + if isFile: + path = os.path.split(path)[0] + try: + os.makedirs(path) + except os.error: + pass + + +def readConfig(): + if platform.system().lower() == 'linux': + defaultCompilerConfig = OrderedDict([ + ('os', 'linux'), + ('command', 'g++'), + ('flags', '-Ofast -DNDEBUG -fPIC -march=native -fopenmp'), + ('restrictQualifier', '__restrict__') + ]) + defaultCacheConfig = OrderedDict([ + ('readFromSharedLibrary', False), + ('objectCache', '/tmp/pystencils/objectcache'), + ('clearCacheOnStart', False), + ('sharedLibrary', '/tmp/pystencils/cache.so'), + ]) + elif platform.system().lower() == 'windows': + defaultCompilerConfig = OrderedDict([ + ('os', 'windows'), + ('arch', 'x64'), + ('flags', '/Ox /fp:fast /openmp'), + ('restrictQualifier', '__restrict') + ]) + defaultCacheConfig = OrderedDict([ + ('readFromSharedLibrary', False), + ('objectCache', os.path.join('~', '.pystencils', 'objectcache')), + ('clearCacheOnStart', False), + ('sharedLibrary', os.path.join('~', '.pystencils', 'cache.dll')), + ]) + + defaultConfig = OrderedDict([('compiler', defaultCompilerConfig), + ('cache', defaultCacheConfig)]) + configPath, configExists = getConfigurationFilePath() config = defaultConfig.copy() if configExists: - config.update(json.load(open(configPath, 'r'))) + loadedConfig = json.load(open(configPath, 'r')) + config = _recursiveDictUpdate(config, loadedConfig) + + config['cache']['sharedLibrary'] = os.path.expanduser(config['cache']['sharedLibrary']) + config['cache']['objectCache'] = os.path.expanduser(config['cache']['objectCache']) + + # create folders if they don't exist yet + createFolder(configPath, True) + + if config['cache']['clearCacheOnStart']: + shutil.rmtree(config['cache']['objectCache'], ignore_errors=True) + + createFolder(config['cache']['objectCache'], False) + createFolder(config['cache']['sharedLibrary'], True) + json.dump(config, open(configPath, 'w'), indent=4) + + if 'env' not in config['compiler']: + config['compiler']['env'] = {} + + if config['compiler']['os'] == 'windows': + from setuptools.msvc import msvc14_get_vc_env + msvcEnv = msvc14_get_vc_env(config['compiler']['arch']) + config['compiler']['env'].update({k.upper(): v for k, v in msvcEnv.items()}) + return config -_compilerConfig = readCompilerConfig() +_config = readConfig() def getCompilerConfig(): - return _compilerConfig + return _config['compiler'] + + +def getCacheConfig(): + return _config['cache'] def ctypeFromString(typename, includePointers=True): @@ -137,45 +215,121 @@ def ctypeFromNumpyType(numpyType): return typeMap[numpyType] -def compile(code, tmpDir, libFile, createAssemblyCode=False): - srcFile = os.path.join(tmpDir, 'source.cpp') - with open(srcFile, 'w') as sourceFile: - print('#include <iostream>', file=sourceFile) - print("#include <cmath>", file=sourceFile) +def hashToFunctionName(h): + res = "func_%s" % (h,) + return res.replace('-', 'm') + + +def compileObjectCacheToSharedLibrary(): + compilerConfig = getCompilerConfig() + cacheConfig = getCacheConfig() + + sharedLibrary = cacheConfig['sharedLibrary'] + if len(sharedLibrary) == 0 or cacheConfig['readFromSharedLibrary']: + return + + configEnv = compilerConfig['env'] if 'env' in compilerConfig else {} + compileEnvironment = os.environ.copy() + compileEnvironment.update(configEnv) + + try: + if compilerConfig['os'] == 'windows': + allObjectFiles = glob.glob(os.path.join(cacheConfig['objectCache'], '*.obj')) + linkCmd = ['link.exe', '/DLL', '/out:' + sharedLibrary] + else: + allObjectFiles = glob.glob(os.path.join(cacheConfig['objectCache'], '*.o')) + linkCmd = [compilerConfig['command'], '-shared', '-o', sharedLibrary] + + linkCmd += allObjectFiles + if len(allObjectFiles) > 0: + runCompileStep(linkCmd) + except subprocess.CalledProcessError as e: + print(e.output) + raise e + +atexit.register(compileObjectCacheToSharedLibrary) + + +def generateCode(ast, includes, restrictQualifier, functionPrefix, targetFile): + with open(targetFile, 'w') as sourceFile: + code = generateC(ast) + includes = "\n".join(["#include <%s>" % (includeFile,) for includeFile in includes]) + print(includes, file=sourceFile) + print("#define RESTRICT %s" % (restrictQualifier,), file=sourceFile) + print("#define FUNC_PREFIX %s" % (functionPrefix,), file=sourceFile) print('extern "C" { ', file=sourceFile) print(code, file=sourceFile) print('}', file=sourceFile) - config = getCompilerConfig() - compilerCmd = [config['compiler']] + config['flags'].split() - compilerCmd += [srcFile, '-o', libFile] - configEnv = config['env'] if 'env' in config else {} - env = os.environ.copy() - env.update(configEnv) + +def runCompileStep(command): + compilerConfig = getCompilerConfig() + configEnv = compilerConfig['env'] if 'env' in compilerConfig else {} + compileEnvironment = os.environ.copy() + compileEnvironment.update(configEnv) + try: - subprocess.check_output(compilerCmd, env=env, stderr=subprocess.STDOUT) + shell = True if compilerConfig['os'].lower() == 'windows' else False + subprocess.check_output(command, env=compileEnvironment, stderr=subprocess.STDOUT, shell=shell) except subprocess.CalledProcessError as e: + print(" ".join(command)) print(e.output) raise e - assembly = None - if createAssemblyCode: - assemblyFile = os.path.join(tmpDir, "assembly.s") - compilerCmd = [config['compiler'], '-S', '-o', assemblyFile, srcFile] + config['flags'].split() - subprocess.call(compilerCmd, env=env) - assembly = open(assemblyFile, 'r').read() - return assembly +def compileLinux(ast, codeHashStr, srcFile, libFile): + cacheConfig = getCacheConfig() + compilerConfig = getCompilerConfig() + + objectFile = os.path.join(cacheConfig['objectCache'], codeHashStr + '.o') + # Compilation + if not os.path.exists(objectFile): + generateCode(ast, ['iostream', 'cmath'], compilerConfig['restrictQualifier'], '', srcFile) + compileCmd = [compilerConfig['command'], '-c'] + compilerConfig['flags'].split() + compileCmd += ['-o', objectFile, srcFile] + runCompileStep(compileCmd) + + # Linking + runCompileStep([compilerConfig['command'], '-shared', objectFile, '-o', libFile] + compilerConfig['flags'].split()) + + +def compileWindows(ast, codeHashStr, srcFile, libFile): + cacheConfig = getCacheConfig() + compilerConfig = getCompilerConfig() + + objectFile = os.path.join(cacheConfig['objectCache'], codeHashStr + '.obj') + # Compilation + if not os.path.exists(objectFile): + generateCode(ast, ['iostream', 'cmath'], compilerConfig['restrictQualifier'], + '__declspec(dllexport)', srcFile) + + # /c compiles only, /EHsc turns of exception handling in c code + compileCmd = ['cl.exe', '/c', '/EHsc'] + compilerConfig['flags'].split() + compileCmd += [srcFile, '/Fo' + objectFile] + runCompileStep(compileCmd) + + # Linking + runCompileStep(['link.exe', '/DLL', '/out:' + libFile, objectFile]) -def compileAndLoad(kernelFunctionNode): - tmpDir = tempfile.mkdtemp() - libFile = os.path.join(tmpDir, "jit.so") - sourceCode = generateC(kernelFunctionNode) - compile(sourceCode, tmpDir, libFile) - loadedJitLib = cdll.LoadLibrary(libFile) - shutil.rmtree(tmpDir) - return loadedJitLib +def compileAndLoad(ast): + cacheConfig = getCacheConfig() + + codeHashStr = hashlib.sha256(generateC(ast).encode()).hexdigest() + ast.functionName = hashToFunctionName(codeHashStr) + + srcFile = os.path.join(cacheConfig['objectCache'], codeHashStr + ".cpp") + + if cacheConfig['readFromSharedLibrary']: + return cdll.LoadLibrary(cacheConfig['sharedLibrary'])[ast.functionName] + else: + if getCompilerConfig()['os'].lower() == 'windows': + libFile = os.path.join(cacheConfig['objectCache'], codeHashStr + ".dll") + compileWindows(ast, codeHashStr, srcFile, libFile) + else: + libFile = os.path.join(cacheConfig['objectCache'], codeHashStr + ".so") + compileLinux(ast, codeHashStr, srcFile, libFile) + return cdll.LoadLibrary(libFile)[ast.functionName] def buildCTypeArgumentList(parameterSpecification, argumentDict): @@ -223,7 +377,7 @@ def buildCTypeArgumentList(parameterSpecification, argumentDict): def makePythonFunctionIncompleteParams(kernelFunctionNode, argumentDict): - func = compileAndLoad(kernelFunctionNode)[kernelFunctionNode.functionName] + func = compileAndLoad(kernelFunctionNode) func.restype = None parameters = kernelFunctionNode.parameters @@ -236,70 +390,3 @@ def makePythonFunctionIncompleteParams(kernelFunctionNode, argumentDict): return wrapper -class CachedKernel(object): - def __init__(self, configDict, ast, parameterValues): - self.configDict = configDict - self.ast = ast - self.parameterValues = parameterValues - self.funcPtr = None - - def __compile(self): - self.funcPtr = makePythonFunction(self.ast, self.parameterValues) - - def __call__(self, *args, **kwargs): - if self.funcPtr is None: - self.__compile() - self.funcPtr(*args, **kwargs) - - -def hashToFunctionName(h): - res = "func_%s" % (h,) - return res.replace('-', 'm') - - -def createLibrary(cachedKernels, libraryFile): - libraryInfoFile = libraryFile + ".info" - - tmpDir = tempfile.mkdtemp() - code = "" - infoDict = {} - for cachedKernel in cachedKernels: - s = repr(sorted(cachedKernel.configDict.items())) - configHash = hashlib.sha1(s.encode()).hexdigest() - cachedKernel.ast.functionName = hashToFunctionName(configHash) - kernelCode = generateC(cachedKernel.ast) - code += kernelCode + "\n" - infoDict[configHash] = {'code': kernelCode, - 'parameterValues': cachedKernel.parameterValues, - 'configDict': cachedKernel.configDict, - 'parameterSpecification': cachedKernel.ast.parameters} - - compile(code, tmpDir, libraryFile) - pickle.dump(infoDict, open(libraryInfoFile, "wb")) - shutil.rmtree(tmpDir) - - -def loadLibrary(libraryFile): - libraryInfoFile = libraryFile + ".info" - - libraryFile = cdll.LoadLibrary(libraryFile) - libraryInfo = pickle.load(open(libraryInfoFile, 'rb')) - - def getKernel(**kwargs): - s = repr(sorted(kwargs.items())) - configHash = hashlib.sha1(s.encode()).hexdigest() - if configHash not in libraryInfo: - raise ValueError("No such kernel in library") - func = libraryFile[hashToFunctionName(configHash)] - func.restype = None - - def wrapper(**kwargs): - from copy import copy - fullArguments = copy(libraryInfo[configHash]['parameterValues']) - fullArguments.update(kwargs) - args = buildCTypeArgumentList(libraryInfo[configHash]['parameterSpecification'], fullArguments) - func(*args) - wrapper.configDict = libraryInfo[configHash]['configDict'] - return wrapper - - return getKernel diff --git a/gpucuda/__init__.py b/gpucuda/__init__.py index fb98db294e71205b7a3dc02bf8a9381c42d08a7b..d211c9eb4dcdecc95a6ba72eb543dff673b34577 100644 --- a/gpucuda/__init__.py +++ b/gpucuda/__init__.py @@ -1,3 +1,2 @@ from pystencils.gpucuda.kernelcreation import createCUDAKernel from pystencils.gpucuda.cudajit import makePythonFunction -from pystencils.backends.cbackend import generateCUDA \ No newline at end of file diff --git a/gpucuda/cudajit.py b/gpucuda/cudajit.py index d7385eb1300ead22a18fbbca40d0fd72abaa6ba1..57fb3da588b085cae372b025d5ae243accbdf7f3 100644 --- a/gpucuda/cudajit.py +++ b/gpucuda/cudajit.py @@ -3,7 +3,7 @@ import pycuda.driver as cuda import pycuda.autoinit from pycuda.compiler import SourceModule from pycuda.gpuarray import GPUArray -from pystencils.backends.cbackend import generateCUDA +from pystencils.backends.cbackend import generateC from pystencils.transformations import symbolNameToVariableName @@ -58,7 +58,11 @@ def buildNumpyArgumentList(kernelFunctionNode, argumentDict): def makePythonFunction(kernelFunctionNode, argumentDict={}): - mod = SourceModule(str(generateCUDA(kernelFunctionNode)), options=["-w"]) + code = "#define FUNC_PREFIX __global__\n" + code += "#define RESTRICT __restrict__\n\n" + code += str(generateC(kernelFunctionNode)) + + mod = SourceModule(code, options=["-w"]) func = mod.get_function(kernelFunctionNode.functionName) def wrapper(**kwargs): diff --git a/types.py b/types.py index 85d1b9124b8bafce0dd5a8a836676ecf13424a02..328863970e2343597f3dce71041a9db22a0c3b60 100644 --- a/types.py +++ b/types.py @@ -45,7 +45,7 @@ class DataType(object): self.const = True elif s == '*': self.ptr = True - elif s == '__restrict__': + elif s == 'RESTRICT': self.alias = False else: self.dtype = _dtype_dict[s] @@ -56,7 +56,7 @@ class DataType(object): def __repr__(self): return "{!s} {!s}{!s} {!s}".format("const" if self.const else "", _c_dtype_dict[self.dtype], - "*" if self.ptr else "", "__restrict__" if not self.alias else "") + "*" if self.ptr else "", "RESTRICT" if not self.alias else "") def __eq__(self, other): if self.alias == other.alias and self.const == other.const and self.ptr == other.ptr and self.dtype == other.dtype: @@ -64,5 +64,6 @@ class DataType(object): else: return False + def get_type_from_sympy(node): return DataType('int') \ No newline at end of file