Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Commits on Source (1134)
[flake8]
max-line-length=120
exclude=src/pystencils/jupyter.py,
src/pystencils/plot.py
src/pystencils/session.py
ignore = W293 W503 W291 C901 E741
src/pystencils/_version.py export-subst
__pycache__
.ipynb_checkpoints
.coverage*
*.pyc
*.vti
/build
/dist
*.egg-info
.cache
_build
/html_doc
/.idea
.vscode
.cache
_local_tmp
RELEASE-VERSION
test-report
src/pystencils/boundaries/createindexlistcython.c
src/pystencils/boundaries/createindexlistcython.*.so
tests/tmp
tests/var
tests/kerncraft_inputs/.2d-5pt.c_kerncraft/
tests/kerncraft_inputs/.3d-7pt.c_kerncraft/
report.xml
coverage_report/
# macOS
**/.DS_Store
*.uuid
stages:
- pretest
- test
- nightly
- docs
- deploy
# -------------------------- Templates ------------------------------------------------------------------------------------
# Base configuration for jobs meant to run at every commit
.every-commit:
rules:
- if: $CI_PIPELINE_SOURCE != "schedule"
# Configuration for jobs meant to run on each commit to pycodegen/pystencils/master
.every-commit-master:
rules:
- if: '$CI_PIPELINE_SOURCE != "schedule" && $CI_PROJECT_PATH == "pycodegen/pystencils" && $CI_COMMIT_BRANCH == "master"'
# Base configuration for jobs meant to run at a schedule
.scheduled:
rules:
- if: $CI_PIPELINE_SOURCE == "schedule"
# -------------------------- Tests ------------------------------------------------------------------------------------
# Normal test - runs on every commit all but "long run" tests
tests-and-coverage:
stage: pretest
extends: .every-commit
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full:cupy12.3
before_script:
- pip install -e .
script:
- env
- pip list
- export NUM_CORES=$(nproc --all)
- mkdir -p ~/.config/matplotlib
- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
- mkdir public
- pytest -v -n $NUM_CORES --cov-report html --cov-report xml --cov-report term --cov=. -m "not longrun" --html test-report/index.html --junitxml=report.xml
- python -m coverage xml
tags:
- docker
- cuda11
- AVX
coverage: /Total coverage:\s\d+.\d+\%/
artifacts:
when: always
paths:
- coverage_report
- test-report
reports:
coverage_report:
coverage_format: cobertura
path: coverage.xml
junit: report.xml
# Normal test with longruns
tests-and-coverage-with-longrun:
stage: test
when: manual
allow_failure: true
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full:cupy12.3
before_script:
- pip install sympy --upgrade
- pip install -e .
script:
- env
- pip list
- export NUM_CORES=$(nproc --all)
- mkdir -p ~/.config/matplotlib
- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
- mkdir public
- py.test -v -n $NUM_CORES
tags:
- docker
- cuda11
- AVX
# pipeline with latest python version
latest-python:
stage: test
extends: .every-commit
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/latest_python
before_script:
- pip install -e .
script:
- env
- pip list
- pip install -e .
- export NUM_CORES=$(nproc --all)
- mkdir -p ~/.config/matplotlib
- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
- mkdir public
- py.test -v -n $NUM_CORES -m "not longrun" --junitxml=report.xml
tags:
- docker
- AVX
artifacts:
when: always
reports:
junit: report.xml
# Minimal tests in windows environment
#minimal-windows:
# stage: test
# tags:
# - win
# script:
# - export NUM_CORES=$(nproc --all)
# - source /cygdrive/c/Users/build/Miniconda3/Scripts/activate
# - source activate pystencils
# - pip install joblib
# - pip list
# - python -c "import numpy"
# - py.test -v -m "not (notebook or longrun)"
ubuntu:
stage: test
extends: .every-commit
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/ubuntu
before_script:
- ln -s /usr/include/locale.h /usr/include/xlocale.h
- pip3 install -e .
script:
- export NUM_CORES=$(nproc --all)
- mkdir -p ~/.config/matplotlib
- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
- sed -i 's/--doctest-modules //g' pytest.ini
- env
- pip list
- pytest -v -n $NUM_CORES -m "not longrun" --junitxml=report.xml
tags:
- docker
- cuda11
- AVX
artifacts:
when: always
reports:
junit: report.xml
.multiarch_template:
stage: test
extends: .every-commit
allow_failure: true
before_script: &multiarch_before_script
# - pip3 install -v .
- export PYTHONPATH=src
- python3 -c "import pystencils as ps; ps.cpu.cpujit.read_config()"
- sed -i '/^fail_under.*/d' pytest.ini
script:
- export NUM_CORES=$(nproc --all)
- mkdir -p ~/.config/matplotlib
- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
- sed -i 's/--doctest-modules //g' pytest.ini
- env
- pip3 list
- python3 -m pytest -v -n $NUM_CORES --cov-report html --cov-report xml --cov=. --junitxml=report.xml tests/test_*vec*.py tests/test_random.py tests/test_half_precision.py
- python3 -m coverage xml
tags:
- docker
- AVX
artifacts:
when: always
paths:
- coverage_report
reports:
coverage_report:
coverage_format: cobertura
path: coverage.xml
junit: report.xml
arm64v8:
extends: .multiarch_template
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/arm64
variables:
QEMU_CPU: "cortex-a76"
before_script:
- *multiarch_before_script
- sed -i s/march=native/march=armv8-a/g ~/.config/pystencils/config.json
ppc64le:
extends: .multiarch_template
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/ppc64le
before_script:
- *multiarch_before_script
- sed -i s/mcpu=native/mcpu=power8/g ~/.config/pystencils/config.json
arm64v9:
# SVE support is still unreliable in GCC 11 (incorrect code for fixed-width vectors, internal compiler errors).
# For half precision Clang is necessary
extends: .multiarch_template
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/arm64
before_script:
- *multiarch_before_script
- sed -i s/march=native/march=armv9-a+sve2+sme/g ~/.config/pystencils/config.json
- sed -i s/g\+\+/clang++/g ~/.config/pystencils/config.json
riscv64:
# RISC-V vector extension are currently not supported by GCC.
extends: .multiarch_template
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/riscv64
variables:
# explicitly set SIMD as detection requires QEMU >= 8.1
PYSTENCILS_SIMD: "rvv"
QEMU_CPU: "rv64,v=true,zicboz=true"
before_script:
- *multiarch_before_script
- sed -i 's/march=native/march=rv64imfdvzicboz/g' ~/.config/pystencils/config.json
- sed -i s/g\+\+/clang++-15/g ~/.config/pystencils/config.json
minimal-conda:
stage: pretest
extends: .every-commit
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/minimal_conda
before_script:
- pip install -e .
script:
- python quicktest.py
tags:
- docker
- cuda
minimal-sympy-master:
stage: test
extends: .every-commit
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/minimal_conda
before_script:
- pip install -e .
script:
- python -m pip install --upgrade git+https://github.com/sympy/sympy.git
- python quicktest.py
allow_failure: true
tags:
- docker
- cuda
pycodegen-integration:
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full
stage: test
when: manual
allow_failure: true
script:
- git clone https://gitlab-ci-token:${CI_JOB_TOKEN}@i10git.cs.fau.de/pycodegen/pycodegen.git
- cd pycodegen
- git submodule sync --recursive
- git submodule update --init --recursive
- git submodule foreach git fetch origin # compare the latest master version!
- git submodule foreach git reset --hard origin/master
- cd pystencils
- git remote add test $CI_REPOSITORY_URL
- git fetch test
- git reset --hard $CI_COMMIT_SHA
- cd ..
- pip install -e pystencils/
- pip install -e lbmpy/
- cmake --version
- ./install_walberla.sh
- export NUM_CORES=$(nproc --all)
- mkdir -p ~/.config/matplotlib
- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
- cd pystencils
- py.test -v -n $NUM_CORES --junitxml=report.xml .
- cd ../lbmpy
- py.test -v -n $NUM_CORES --junitxml=report.xml .
- cd ../walberla/build/
- make -j $NUM_CORES CodegenJacobiCPU CodegenJacobiGPU CodegenPoissonCPU CodegenPoissonGPU MicroBenchmarkGpuLbm LbCodeGenerationExample
- make -j $NUM_CORES multiphaseCPU multiphaseGPU FluctuatingMRT FlowAroundSphereCodeGen FieldLayoutAndVectorizationTest GeneratedOutflowBC
- cd apps/benchmarks/UniformGridGPU
- make -j $NUM_CORES
- cd ../UniformGridCPU
- make -j $NUM_CORES
tags:
- docker
- cuda11
- AVX
artifacts:
when: always
reports:
junit: pycodegen/*/report.xml
# -------------------- Scheduled Tasks --------------------------------------------------------------------------
# Nightly test against the latest (pre-release) version of SymPy published on PyPI
nightly-sympy:
stage: nightly
needs: []
extends: .scheduled
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/latest_python
before_script:
- pip install -e .
- pip install --upgrade --pre sympy
script:
- env
- pip list
- export NUM_CORES=$(nproc --all)
- mkdir -p ~/.config/matplotlib
- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
- mkdir public
- pytest -v -n $NUM_CORES -m "not longrun" --junitxml=report.xml
tags:
- docker
- AVX
- cuda
artifacts:
when: always
reports:
junit: report.xml
# -------------------- Linter & Documentation --------------------------------------------------------------------------
flake8-lint:
stage: pretest
extends: .every-commit
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full
script:
- flake8 src/pystencils
tags:
- docker
build-documentation:
stage: docs
extends: .every-commit
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/documentation
needs: []
before_script:
- pip install -e .
script:
- mkdir html_doc
- sphinx-build -b html doc html_doc
- sphinx-build -W -b html doc html_doc
tags:
- docker
artifacts:
paths:
- html_doc
pages:
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full
extends: .every-commit-master
stage: deploy
needs: ["tests-and-coverage", "build-documentation"]
script:
- ls -l
- mv coverage_report html_doc
- mv html_doc public # folder has to be named "public" for gitlab to publish it
artifacts:
paths:
- public
tags:
- docker
[settings]
line_length=100
balanced_wrapping=True
multi_line_output=4
known_third_party=sympy
Contributors:
-------------
- Martin Bauer <martin.bauer@fau.de>
- Markus Holzer <markus.holzer@fau.de>
- Stephan Seitz <stephan.seitz@fau.de>
- Michael Kuron <mkuron@icp.uni-stuttgart.de>
- Jan Hönig <jan.hoenig@fau.de>
- Julian Hammer <julian.hammer@fau.de>
- Nils Kohl <nils.kohl@fau.de>
- Frederik Hennig <frederik.hennig@fau.de>
- Dominik Ernst <dominik.ernst@fau.de>
- Christian Godenschwager <christian.godenschwager@fau.de>
- Dominik Thoennes <dominik.thoennes@fau.de>
# Change Log
## Unreleased
### Removed
* LLVM backend because it was not used much and not good integrated in pystencils.
* OpenCL backend because it was not used much and not good integrated in pystencils.
# Contributing
Contributions to pystencils are always welcome, and they are greatly appreciated!
A list of open problems can be found [here]( https://i10git.cs.fau.de/pycodegen/pystencils/-/issues).
Of course, it is also always appreciated to bring own ideas and problems to the community!
Please submit all contributions to the official [GitLab repository](https://i10git.cs.fau.de/pycodegen/pystencils) in the form of a Merge Request. Please do not submit git diffs or files containing the changes.
There also exists a GitHub repository, which is only a mirror to the GitLab repository. Contributions to the GitHub repository are not considered.
`pystencils` is an open-source python package under the license of AGPL3. Thus we consider the act of contributing to the code by submitting a Merge Request as the "Sign off" or agreement to the AGPL3 license.
You can contribute in many different ways:
## Types of Contributions
### Report Bugs
Report bugs at [https://i10git.cs.fau.de/pycodegen/pystencils/-/issues](https://i10git.cs.fau.de/pycodegen/pystencils/-/issues).
For pystencils, it is often necessary to provide the python and [SymPy](https://www.sympy.org/en/index.html) versions used and hardware information like the
processor architecture and the compiler version used to compile the generated kernels.
### Fix Issues
Look through the GitLab issues. Different tags are indicating the status of the issues.
The "bug" tag indicates problems with pystencils, while the "feature" tag shows ideas that should be added in the future.
### Write Documentation
The documentation of pystencils can be found [here](https://pycodegen.pages.i10git.cs.fau.de/pystencils/). Jupyter notebooks are used to provide an
interactive start to pystencils. It is always appreciated if new document notebooks are provided
since this helps others a lot.
## Get Started!
Ready to contribute? Here is how to set up `pystencils` for local development.
1. Fork the `pystencils` repo on GitLab.
2. Clone your fork locally:
```bash
$ git clone https://i10git.cs.fau.de/your-name/pystencils
```
3. Install your local copy into a virtualenv. It is also recommended to use anaconda or miniconda to manage the python environments.
```bash
$ mkvirtualenv pystencils
$ cd pystencils/
$ pip install -e .
```
4. Create a branch for local development:
```bash
$ git checkout -b name-of-your-bugfix-or-feature
```
Now you can make your changes locally.
5. When you're done making changes, check that your changes pass flake8 and the
tests
```bash
$ flake8 pystencils
$ py.test -v -n $NUM_CORES -m "not longrun" .
```
To get all packages needed for development, a requirements list can be found [here](https://i10git.cs.fau.de/pycodegen/pycodegen/-/blob/master/conda_environment_dev.yml). This includes flake8 and pytest.
6. Commit your changes and push your branch to GitHub::
```bash
$ git add .
$ git commit -m "Your detailed description of your changes."
$ git push origin name-of-your-bugfix-or-feature
```
7. Submit a Merge Request on GitLab.
## Merge Request Guidelines
Before you submit a Merge Request, check that it meets these guidelines:
1. All functionality that is implemented through this Merge Request should be covered by unit tests. These are implemented in `pystencil_tests`
2. If the Merge Request adds functionality, the docs should be updated. Put your new functionality into a function with a docstring.
3. If you have a maintainer status for `pystencils`, you can merge Merge Requests to the master branch. However, every Merge Request needs to be reviewed by another developer. Thus it is not allowed to merge a Merge Request, which is submitted by oneself.
## Tips
To run a subset of tests:
```bash
$ py.test my_test.py
```
\ No newline at end of file
This diff is collapsed.
include AUTHORS.txt
include CONTRIBUTING.md
include CHANGELOG.md
pystencils
==========
[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/mabau/pystencils/master?filepath=doc%2Fnotebooks)
[![Docs](https://img.shields.io/badge/read-the_docs-brightgreen.svg)](https://pycodegen.pages.i10git.cs.fau.de/pystencils)
[![pypi-package](https://badge.fury.io/py/pystencils.svg)](https://badge.fury.io/py/pystencils)
[![pipeline status](https://i10git.cs.fau.de/pycodegen/pystencils/badges/master/pipeline.svg)](https://i10git.cs.fau.de/pycodegen/pystencils/commits/master)
[![coverage report](https://i10git.cs.fau.de/pycodegen/pystencils/badges/master/coverage.svg)](http://pycodegen.pages.i10git.cs.fau.de/pystencils/coverage_report)
Run blazingly fast stencil codes on numpy arrays.
*pystencils* uses sympy to define stencil operations, that can be executed on numpy arrays.
Exploiting the stencil structure makes *pystencils* run faster than normal numpy code and even as Cython and numba,
[as demonstrated in this notebook](https://pycodegen.pages.i10git.cs.fau.de/pystencils/notebooks/demo_benchmark.html).
Here is a code snippet that computes the average of neighboring cells:
```python
import pystencils as ps
import numpy as np
f, g = ps.fields("f, g : [2D]")
stencil = ps.Assignment(g[0, 0],
(f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4)
kernel = ps.create_kernel(stencil).compile()
f_arr = np.random.rand(1000, 1000)
g_arr = np.empty_like(f_arr)
kernel(f=f_arr, g=g_arr)
```
*pystencils* is mostly used for numerical simulations using finite difference or finite volume methods.
It comes with automatic finite difference discretization for PDEs:
```python
import pystencils as ps
import sympy as sp
c, v = ps.fields("c, v(2): [2D]")
adv_diff_pde = ps.fd.transient(c) - ps.fd.diffusion(c, sp.symbols("D")) + ps.fd.advection(c, v)
discretize = ps.fd.Discretization2ndOrder(dx=1, dt=0.01)
discretization = discretize(adv_diff_pde)
```
Installation
------------
```bash
pip install pystencils[interactive]
```
Without `[interactive]` you get a minimal version with very little dependencies.
All options:
- `gpu`: use this if an NVIDIA or AMD GPU is available and CUDA or ROCm is installed
- `alltrafos`: pulls in additional dependencies for loop simplification e.g. libisl
- `bench_db`: functionality to store benchmark result in object databases
- `interactive`: installs dependencies to work in Jupyter including image I/O, plotting etc.
- `doc`: packages to build documentation
Options can be combined e.g.
```bash
pip install pystencils[interactive, gpu, doc]
```
pystencils is also fully compatible with Windows machines. If working with visual studio and cupy makes sure to run example files first to ensure that cupy can find the compiler's executable.
Documentation
-------------
Read the docs [here](https://pycodegen.pages.i10git.cs.fau.de/pystencils) and
check out the Jupyter notebooks in `doc/notebooks`. The **Changelog** of pystencils can be found [here](https://i10git.cs.fau.de/pycodegen/pystencils/-/blob/master/CHANGELOG.md).
Authors
-------
Many thanks go to the [contributors](https://i10git.cs.fau.de/pycodegen/pystencils/-/blob/master/AUTHORS.txt) of pystencils.
### Please cite us
If you use pystencils in a publication, please cite the following articles:
Overview:
- M. Bauer et al, Code Generation for Massively Parallel Phase-Field Simulations. Association for Computing Machinery, 2019. https://doi.org/10.1145/3295500.3356186
Performance Modelling:
- D. Ernst et al, Analytical performance estimation during code generation on modern GPUs. Journal of Parallel and Distributed Computing, 2023. https://doi.org/10.1016/j.jpdc.2022.11.003
class NestedScopes:
"""Symbol visibility model using nested scopes
- every accessed symbol that was not defined before, is added as a "free parameter"
- free parameters are global, i.e. they are not in scopes
- push/pop adds or removes a scope
>>> s = NestedScopes()
>>> s.access_symbol("a")
>>> s.is_defined("a")
False
>>> s.free_parameters
{'a'}
>>> s.define_symbol("b")
>>> s.is_defined("b")
True
>>> s.push()
>>> s.is_defined_locally("b")
False
>>> s.define_symbol("c")
>>> s.pop()
>>> s.is_defined("c")
False
"""
def __init__(self):
self.free_parameters = set()
self._defined = [set()]
def access_symbol(self, symbol):
if not self.is_defined(symbol):
self.free_parameters.add(symbol)
def define_symbol(self, symbol):
self._defined[-1].add(symbol)
def is_defined(self, symbol):
return any(symbol in scopes for scopes in self._defined)
def is_defined_locally(self, symbol):
return symbol in self._defined[-1]
def push(self):
self._defined.append(set())
def pop(self):
self._defined.pop()
assert self.depth >= 1
@property
def depth(self):
return len(self._defined)
import sympy as sp
from collections import namedtuple
from sympy.core import S
from typing import Set
from sympy.printing.ccode import C89CodePrinter
try:
from sympy.printing.ccode import C99CodePrinter as CCodePrinter
except ImportError:
from sympy.printing.ccode import CCodePrinter # for sympy versions < 1.1
from pystencils.integer_functions import bitwise_xor, bit_shift_right, bit_shift_left, bitwise_and, \
bitwise_or, modulo_ceil
from pystencils.astnodes import Node, KernelFunction
from pystencils.data_types import create_type, PointerType, get_type_of_expression, VectorType, cast_func, \
vector_memory_access, reinterpret_cast_func
__all__ = ['generate_c', 'CustomCodeNode', 'PrintNode', 'get_headers', 'CustomSympyPrinter']
def generate_c(ast_node: Node, signature_only: bool = False, dialect='c') -> str:
"""Prints an abstract syntax tree node as C or CUDA code.
This function does not need to distinguish between C, C++ or CUDA code, it just prints 'C-like' code as encoded
in the abstract syntax tree (AST). The AST is built differently for C or CUDA by calling different create_kernel
functions.
Args:
ast_node:
signature_only:
dialect: 'c' or 'cuda'
Returns:
C-like code for the ast node and its descendants
"""
printer = CBackend(signature_only=signature_only,
vector_instruction_set=ast_node.instruction_set,
dialect=dialect)
return printer(ast_node)
def get_headers(ast_node: Node) -> Set[str]:
"""Return a set of header files, necessary to compile the printed C-like code."""
headers = set()
if isinstance(ast_node, KernelFunction) and ast_node.instruction_set:
headers.update(ast_node.instruction_set['headers'])
if hasattr(ast_node, 'headers'):
headers.update(ast_node.headers)
for a in ast_node.args:
if isinstance(a, Node):
headers.update(get_headers(a))
return headers
# --------------------------------------- Backend Specific Nodes -------------------------------------------------------
class CustomCodeNode(Node):
def __init__(self, code, symbols_read, symbols_defined, parent=None):
super(CustomCodeNode, self).__init__(parent=parent)
self._code = "\n" + code
self._symbolsRead = set(symbols_read)
self._symbolsDefined = set(symbols_defined)
self.headers = []
def get_code(self, dialect, vector_instruction_set):
return self._code
@property
def args(self):
return []
@property
def symbols_defined(self):
return self._symbolsDefined
@property
def undefined_symbols(self):
return self.symbols_defined - self._symbolsRead
class PrintNode(CustomCodeNode):
# noinspection SpellCheckingInspection
def __init__(self, symbol_to_print):
code = '\nstd::cout << "%s = " << %s << std::endl; \n' % (symbol_to_print.name, symbol_to_print.name)
super(PrintNode, self).__init__(code, symbols_read=[symbol_to_print], symbols_defined=set())
self.headers.append("<iostream>")
# ------------------------------------------- Printer ------------------------------------------------------------------
# noinspection PyPep8Naming
class CBackend:
def __init__(self, sympy_printer=None,
signature_only=False, vector_instruction_set=None, dialect='c'):
if sympy_printer is None:
if vector_instruction_set is not None:
self.sympy_printer = VectorizedCustomSympyPrinter(vector_instruction_set)
else:
self.sympy_printer = CustomSympyPrinter()
else:
self.sympy_printer = sympy_printer
self._vector_instruction_set = vector_instruction_set
self._indent = " "
self._dialect = dialect
self._signatureOnly = signature_only
def __call__(self, node):
prev_is = VectorType.instruction_set
VectorType.instruction_set = self._vector_instruction_set
result = str(self._print(node))
VectorType.instruction_set = prev_is
return result
def _print(self, node):
for cls in type(node).__mro__:
method_name = "_print_" + cls.__name__
if hasattr(self, method_name):
return getattr(self, method_name)(node)
raise NotImplementedError("CBackend does not support node of type " + str(type(node)))
def _print_KernelFunction(self, node):
function_arguments = ["%s %s" % (str(s.symbol.dtype), s.symbol.name) for s in node.get_parameters()]
func_declaration = "FUNC_PREFIX void %s(%s)" % (node.function_name, ", ".join(function_arguments))
if self._signatureOnly:
return func_declaration
body = self._print(node.body)
return func_declaration + "\n" + body
def _print_Block(self, node):
block_contents = "\n".join([self._print(child) for child in node.args])
return "{\n%s\n}" % (self._indent + self._indent.join(block_contents.splitlines(True)))
def _print_PragmaBlock(self, node):
return "%s\n%s" % (node.pragma_line, self._print_Block(node))
def _print_LoopOverCoordinate(self, node):
counter_symbol = node.loop_counter_name
start = "int %s = %s" % (counter_symbol, self.sympy_printer.doprint(node.start))
condition = "%s < %s" % (counter_symbol, self.sympy_printer.doprint(node.stop))
update = "%s += %s" % (counter_symbol, self.sympy_printer.doprint(node.step),)
loop_str = "for (%s; %s; %s)" % (start, condition, update)
prefix = "\n".join(node.prefix_lines)
if prefix:
prefix += "\n"
return "%s%s\n%s" % (prefix, loop_str, self._print(node.body))
def _print_SympyAssignment(self, node):
if node.is_declaration:
data_type = "const " + str(node.lhs.dtype) + " " if node.is_const else str(node.lhs.dtype) + " "
return "%s%s = %s;" % (data_type, self.sympy_printer.doprint(node.lhs),
self.sympy_printer.doprint(node.rhs))
else:
lhs_type = get_type_of_expression(node.lhs)
if type(lhs_type) is VectorType and isinstance(node.lhs, cast_func):
arg, data_type, aligned, nontemporal = node.lhs.args
instr = 'storeU'
if aligned:
instr = 'stream' if nontemporal else 'storeA'
rhs_type = get_type_of_expression(node.rhs)
if type(rhs_type) is not VectorType:
rhs = cast_func(node.rhs, VectorType(rhs_type))
else:
rhs = node.rhs
return self._vector_instruction_set[instr].format("&" + self.sympy_printer.doprint(node.lhs.args[0]),
self.sympy_printer.doprint(rhs)) + ';'
else:
return "%s = %s;" % (self.sympy_printer.doprint(node.lhs), self.sympy_printer.doprint(node.rhs))
def _print_TemporaryMemoryAllocation(self, node):
align = 64
np_dtype = node.symbol.dtype.base_type.numpy_dtype
required_size = np_dtype.itemsize * node.size + align
size = modulo_ceil(required_size, align)
code = "{dtype} {name}=({dtype})aligned_alloc({align}, {size}) + {offset};"
return code.format(dtype=node.symbol.dtype,
name=self.sympy_printer.doprint(node.symbol.name),
size=self.sympy_printer.doprint(size),
offset=int(node.offset(align)),
align=align)
def _print_TemporaryMemoryFree(self, node):
align = 64
return "free(%s - %d);" % (self.sympy_printer.doprint(node.symbol.name), node.offset(align))
def _print_CustomCodeNode(self, node):
return node.get_code(self._dialect, self._vector_instruction_set)
def _print_Conditional(self, node):
condition_expr = self.sympy_printer.doprint(node.condition_expr)
true_block = self._print_Block(node.true_block)
result = "if (%s)\n%s " % (condition_expr, true_block)
if node.false_block:
false_block = self._print_Block(node.false_block)
result += "else " + false_block
return result
# ------------------------------------------ Helper function & classes -------------------------------------------------
# noinspection PyPep8Naming
class CustomSympyPrinter(CCodePrinter):
def __init__(self):
super(CustomSympyPrinter, self).__init__()
self._float_type = create_type("float32")
if 'Min' in self.known_functions:
del self.known_functions['Min']
if 'Max' in self.known_functions:
del self.known_functions['Max']
def _print_Pow(self, expr):
"""Don't use std::pow function, for small integer exponents, write as multiplication"""
if expr.exp.is_integer and expr.exp.is_number and 0 < expr.exp < 8:
return "(" + self._print(sp.Mul(*[expr.base] * expr.exp, evaluate=False)) + ")"
elif expr.exp.is_integer and expr.exp.is_number and - 8 < expr.exp < 0:
return "1 / ({})".format(self._print(sp.Mul(*[expr.base] * (-expr.exp), evaluate=False)))
else:
return super(CustomSympyPrinter, self)._print_Pow(expr)
def _print_Rational(self, expr):
"""Evaluate all rationals i.e. print 0.25 instead of 1.0/4.0"""
res = str(expr.evalf().num)
return res
def _print_Equality(self, expr):
"""Equality operator is not printable in default printer"""
return '((' + self._print(expr.lhs) + ") == (" + self._print(expr.rhs) + '))'
def _print_Piecewise(self, expr):
"""Print piecewise in one line (remove newlines)"""
result = super(CustomSympyPrinter, self)._print_Piecewise(expr)
return result.replace("\n", "")
def _print_Function(self, expr):
infix_functions = {
bitwise_xor: '^',
bit_shift_right: '>>',
bit_shift_left: '<<',
bitwise_or: '|',
bitwise_and: '&',
}
if hasattr(expr, 'to_c'):
return expr.to_c(self._print)
if isinstance(expr, reinterpret_cast_func):
arg, data_type = expr.args
return "*((%s)(& %s))" % (PointerType(data_type, restrict=False), self._print(arg))
elif isinstance(expr, cast_func):
arg, data_type = expr.args
if isinstance(arg, sp.Number):
return self._typed_number(arg, data_type)
else:
return "*((%s)(& %s))" % (PointerType(data_type, restrict=False), self._print(arg))
elif expr.func in infix_functions:
return "(%s %s %s)" % (self._print(expr.args[0]), infix_functions[expr.func], self._print(expr.args[1]))
else:
return super(CustomSympyPrinter, self)._print_Function(expr)
def _typed_number(self, number, dtype):
res = self._print(number)
if dtype.is_float():
if dtype == self._float_type:
if '.' not in res:
res += ".0f"
else:
res += "f"
return res
else:
return res
_print_Max = C89CodePrinter._print_Max
_print_Min = C89CodePrinter._print_Min
# noinspection PyPep8Naming
class VectorizedCustomSympyPrinter(CustomSympyPrinter):
SummandInfo = namedtuple("SummandInfo", ['sign', 'term'])
def __init__(self, instruction_set):
super(VectorizedCustomSympyPrinter, self).__init__()
self.instruction_set = instruction_set
def _scalarFallback(self, func_name, expr, *args, **kwargs):
expr_type = get_type_of_expression(expr)
if type(expr_type) is not VectorType:
return getattr(super(VectorizedCustomSympyPrinter, self), func_name)(expr, *args, **kwargs)
else:
assert self.instruction_set['width'] == expr_type.width
return None
def _print_Function(self, expr):
if isinstance(expr, vector_memory_access):
arg, data_type, aligned, _ = expr.args
instruction = self.instruction_set['loadA'] if aligned else self.instruction_set['loadU']
return instruction.format("& " + self._print(arg))
elif isinstance(expr, cast_func):
arg, data_type = expr.args
if type(data_type) is VectorType:
return self.instruction_set['makeVec'].format(self._print(arg))
return super(VectorizedCustomSympyPrinter, self)._print_Function(expr)
def _print_And(self, expr):
result = self._scalarFallback('_print_And', expr)
if result:
return result
arg_strings = [self._print(a) for a in expr.args]
assert len(arg_strings) > 0
result = arg_strings[0]
for item in arg_strings[1:]:
result = self.instruction_set['&'].format(result, item)
return result
def _print_Or(self, expr):
result = self._scalarFallback('_print_Or', expr)
if result:
return result
arg_strings = [self._print(a) for a in expr.args]
assert len(arg_strings) > 0
result = arg_strings[0]
for item in arg_strings[1:]:
result = self.instruction_set['|'].format(result, item)
return result
def _print_Add(self, expr, order=None):
result = self._scalarFallback('_print_Add', expr)
if result:
return result
summands = []
for term in expr.args:
if term.func == sp.Mul:
sign, t = self._print_Mul(term, inside_add=True)
else:
t = self._print(term)
sign = 1
summands.append(self.SummandInfo(sign, t))
# Use positive terms first
summands.sort(key=lambda e: e.sign, reverse=True)
# if no positive term exists, prepend a zero
if summands[0].sign == -1:
summands.insert(0, self.SummandInfo(1, "0"))
assert len(summands) >= 2
processed = summands[0].term
for summand in summands[1:]:
func = self.instruction_set['-'] if summand.sign == -1 else self.instruction_set['+']
processed = func.format(processed, summand.term)
return processed
def _print_Pow(self, expr):
result = self._scalarFallback('_print_Pow', expr)
if result:
return result
if expr.exp.is_integer and expr.exp.is_number and 0 < expr.exp < 8:
return "(" + self._print(sp.Mul(*[expr.base] * expr.exp, evaluate=False)) + ")"
elif expr.exp == -1:
one = self.instruction_set['makeVec'].format(1.0)
return self.instruction_set['/'].format(one, self._print(expr.base))
elif expr.exp == 0.5:
return self.instruction_set['sqrt'].format(self._print(expr.base))
elif expr.exp.is_integer and expr.exp.is_number and - 8 < expr.exp < 0:
one = self.instruction_set['makeVec'].format(1.0)
return self.instruction_set['/'].format(one,
self._print(sp.Mul(*[expr.base] * (-expr.exp), evaluate=False)))
else:
raise ValueError("Generic exponential not supported: " + str(expr))
def _print_Mul(self, expr, inside_add=False):
# noinspection PyProtectedMember
from sympy.core.mul import _keep_coeff
result = self._scalarFallback('_print_Mul', expr)
if result:
return result
c, e = expr.as_coeff_Mul()
if c < 0:
expr = _keep_coeff(-c, e)
sign = -1
else:
sign = 1
a = [] # items in the numerator
b = [] # items that are in the denominator (if any)
# Gather args for numerator/denominator
for item in expr.as_ordered_factors():
if item.is_commutative and item.is_Pow and item.exp.is_Rational and item.exp.is_negative:
if item.exp != -1:
b.append(sp.Pow(item.base, -item.exp, evaluate=False))
else:
b.append(sp.Pow(item.base, -item.exp))
else:
a.append(item)
a = a or [S.One]
a_str = [self._print(x) for x in a]
b_str = [self._print(x) for x in b]
result = a_str[0]
for item in a_str[1:]:
result = self.instruction_set['*'].format(result, item)
if len(b) > 0:
denominator_str = b_str[0]
for item in b_str[1:]:
denominator_str = self.instruction_set['*'].format(denominator_str, item)
result = self.instruction_set['/'].format(result, denominator_str)
if inside_add:
return sign, result
else:
if sign < 0:
return self.instruction_set['*'].format(self._print(S.NegativeOne), result)
else:
return result
def _print_Relational(self, expr):
result = self._scalarFallback('_print_Relational', expr)
if result:
return result
return self.instruction_set[expr.rel_op].format(self._print(expr.lhs), self._print(expr.rhs))
def _print_Equality(self, expr):
result = self._scalarFallback('_print_Equality', expr)
if result:
return result
return self.instruction_set['=='].format(self._print(expr.lhs), self._print(expr.rhs))
def _print_Piecewise(self, expr):
result = self._scalarFallback('_print_Piecewise', expr)
if result:
return result
if expr.args[-1].cond.args[0] is not sp.sympify(True):
# We need the last conditional to be a True, otherwise the resulting
# function may not return a result.
raise ValueError("All Piecewise expressions must contain an "
"(expr, True) statement to be used as a default "
"condition. Without one, the generated "
"expression may not evaluate to anything under "
"some condition.")
result = self._print(expr.args[-1][0])
for true_expr, condition in reversed(expr.args[:-1]):
# noinspection SpellCheckingInspection
result = self.instruction_set['blendv'].format(result, self._print(true_expr), self._print(condition))
return result
# noinspection SpellCheckingInspection
def get_vector_instruction_set(data_type='double', instruction_set='avx'):
base_names = {
'+': 'add[0, 1]',
'-': 'sub[0, 1]',
'*': 'mul[0, 1]',
'/': 'div[0, 1]',
'==': 'cmp[0, 1, _CMP_EQ_UQ ]',
'!=': 'cmp[0, 1, _CMP_NEQ_UQ ]',
'>=': 'cmp[0, 1, _CMP_GE_OQ ]',
'<=': 'cmp[0, 1, _CMP_LE_OQ ]',
'<': 'cmp[0, 1, _CMP_NGE_UQ ]',
'>': 'cmp[0, 1, _CMP_NLE_UQ ]',
'&': 'and[0, 1]',
'|': 'or[0, 1]',
'blendv': 'blendv[0, 1, 2]',
'sqrt': 'sqrt[0]',
'makeVec': 'set[]',
'makeZero': 'setzero[]',
'loadU': 'loadu[0]',
'loadA': 'load[0]',
'storeU': 'storeu[0,1]',
'storeA': 'store[0,1]',
'stream': 'stream[0,1]',
}
headers = {
'avx512': ['<immintrin.h>'],
'avx': ['<immintrin.h>'],
'sse': ['<immintrin.h>', '<xmmintrin.h>', '<emmintrin.h>', '<pmmintrin.h>',
'<tmmintrin.h>', '<smmintrin.h>', '<nmmintrin.h>']
}
suffix = {
'double': 'pd',
'float': 'ps',
}
prefix = {
'sse': '_mm',
'avx': '_mm256',
'avx512': '_mm512',
}
width = {
("double", "sse"): 2,
("float", "sse"): 4,
("double", "avx"): 4,
("float", "avx"): 8,
("double", "avx512"): 8,
("float", "avx512"): 16,
}
result = {
'width': width[(data_type, instruction_set)],
}
pre = prefix[instruction_set]
suf = suffix[data_type]
for intrinsic_id, function_shortcut in base_names.items():
function_shortcut = function_shortcut.strip()
name = function_shortcut[:function_shortcut.index('[')]
if intrinsic_id == 'makeVec':
arg_string = "({})".format(",".join(["{0}"] * result['width']))
else:
args = function_shortcut[function_shortcut.index('[') + 1: -1]
arg_string = "("
for arg in args.split(","):
arg = arg.strip()
if not arg:
continue
if arg in ('0', '1', '2', '3', '4', '5'):
arg_string += "{" + arg + "},"
else:
arg_string += arg + ","
arg_string = arg_string[:-1] + ")"
result[intrinsic_id] = pre + "_" + name + "_" + suf + arg_string
result['dataTypePrefix'] = {
'double': "_" + pre + 'd',
'float': "_" + pre,
}
bit_width = result['width'] * (64 if data_type == 'double' else 32)
result['double'] = "__m%dd" % (bit_width,)
result['float'] = "__m%d" % (bit_width,)
result['int'] = "__m%di" % (bit_width,)
result['bool'] = "__m%dd" % (bit_width,)
result['headers'] = headers[instruction_set]
return result
def get_supported_instruction_sets():
"""List of supported instruction sets on current hardware, or None if query failed."""
try:
from cpuinfo import get_cpu_info
except ImportError:
return None
result = []
required_sse_flags = {'sse', 'sse2', 'ssse3', 'sse4_1', 'sse4_2'}
required_avx_flags = {'avx'}
required_avx512_flags = {'avx512f'}
flags = set(get_cpu_info()['flags'])
if flags.issuperset(required_sse_flags):
result.append("sse")
if flags.issuperset(required_avx_flags):
result.append("avx")
if flags.issuperset(required_avx512_flags):
result.append("avx512")
return result
build-essential
graphviz
ffmpeg
# ----------------------------------------------------------------------------------------------------------------------
# Environment with all dependencies to use pystencils
#
#
# Download conda at https://conda.io/miniconda.html and create this environment by running:
# conda env create -f conda_environment_user.yml
# . activate pystencils
#
# If you have CUDA or ROCm installed and want to use your GPU, uncomment the last line to install cupy
#
# ----------------------------------------------------------------------------------------------------------------------
name: pystencils
dependencies:
# Basic dependencies:
- python >= 3.8
- numpy
- sympy >= 1.1
- appdirs # to find default cache directory on each platform
- joblib # caching on hard-disk, this is optional, but if not installed lbmpy is really slow
- cython # speed up boundary list computation (optional)
- matplotlib
- imageio
- pandas
- scipy
- pip
- pip:
- islpy # used to optimize staggered kernels
- py-cpuinfo # get cpu info like cache sizes, supported vector instruction sets, ...
- graphviz # can show abstract syntax trees as formatted graphs
- ipy_table # HTML tables for jupyter notebooks
- pyevtk # VTK output for serial simulations
- blitzdb # file-based No-SQL database to store simulation results
#- cupy # add this if you have CUDA or ROCm installed
#!/bin/bash
python3 setup.py develop
exec "$@"
\ No newline at end of file
import os
try:
from functools import lru_cache as memorycache
except ImportError:
from backports.functools_lru_cache import lru_cache as memorycache
try:
from joblib import Memory
from appdirs import user_cache_dir
if 'PYSTENCILS_CACHE_DIR' in os.environ:
cache_dir = os.environ['PYSTENCILS_CACHE_DIR']
else:
cache_dir = user_cache_dir('pystencils')
disk_cache = Memory(cachedir=cache_dir, verbose=False).cache
disk_cache_no_fallback = disk_cache
except ImportError:
# fallback to in-memory caching if joblib is not available
disk_cache = memorycache(maxsize=64)
def disk_cache_no_fallback(o):
return o
# Disable memory cache:
# disk_cache = lambda o: o
# disk_cache_no_fallback = lambda o: o
import os
import runpy
import sys
import tempfile
import warnings
import nbformat
import pytest
from nbconvert import PythonExporter
from pystencils.boundaries.createindexlist import * # NOQA
# Trigger config file reading / creation once - to avoid race conditions when multiple instances are creating it
# at the same time
from pystencils.cpu import cpujit
# trigger cython imports - there seems to be a problem when multiple processes try to compile the same cython file
# at the same time
try:
import pyximport
pyximport.install(language_level=3)
except ImportError:
pass
SCRIPT_FOLDER = os.path.dirname(os.path.realpath(__file__))
sys.path.insert(0, os.path.abspath('pystencils'))
# the Ubuntu pipeline uses an older version of pytest which uses deprecated functionality.
# This leads to many warinings in the test and coverage pipeline.
pytest_numeric_version = [int(x, 10) for x in pytest.__version__.split('.')]
pytest_numeric_version.reverse()
pytest_version = sum(x * (100 ** i) for i, x in enumerate(pytest_numeric_version))
def add_path_to_ignore(path):
if not os.path.exists(path):
return
global collect_ignore
collect_ignore += [os.path.join(SCRIPT_FOLDER, path, f) for f in os.listdir(os.path.join(SCRIPT_FOLDER, path))]
collect_ignore = [os.path.join(SCRIPT_FOLDER, "doc", "conf.py"),
os.path.join(SCRIPT_FOLDER, "src", "pystencils", "opencl", "opencl.autoinit")]
add_path_to_ignore('tests/benchmark')
add_path_to_ignore('_local_tmp')
try:
import cupy
except ImportError:
collect_ignore += [os.path.join(SCRIPT_FOLDER, "tests/test_gpu.py")]
add_path_to_ignore('src/pystencils/gpu')
try:
import waLBerla
except ImportError:
collect_ignore += [os.path.join(SCRIPT_FOLDER, "tests/test_aligned_array.py"),
os.path.join(SCRIPT_FOLDER, "tests/test_datahandling_parallel.py"),
os.path.join(SCRIPT_FOLDER, "doc/notebooks/03_tutorial_datahandling.ipynb"),
os.path.join(SCRIPT_FOLDER, "src/pystencils/datahandling/parallel_datahandling.py"),
os.path.join(SCRIPT_FOLDER, "tests/test_small_block_benchmark.ipynb")]
try:
import blitzdb
except ImportError:
add_path_to_ignore('src/pystencils/runhelper')
collect_ignore += [os.path.join(SCRIPT_FOLDER, "tests/test_parameterstudy.py")]
collect_ignore += [os.path.join(SCRIPT_FOLDER, "tests/test_json_serializer.py")]
try:
import islpy
except ImportError:
collect_ignore += [os.path.join(SCRIPT_FOLDER, "src/pystencils/integer_set_analysis.py")]
try:
import graphviz
except ImportError:
collect_ignore += [os.path.join(SCRIPT_FOLDER, "src/pystencils/backends/dot.py")]
collect_ignore += [os.path.join(SCRIPT_FOLDER, "doc/notebooks/01_tutorial_getting_started.ipynb")]
try:
import pyevtk
except ImportError:
collect_ignore += [os.path.join(SCRIPT_FOLDER, "src/pystencils/datahandling/vtk.py")]
collect_ignore += [os.path.join(SCRIPT_FOLDER, 'setup.py')]
for root, sub_dirs, files in os.walk('.'):
for f in files:
if f.endswith(".ipynb") and not any(f.startswith(k) for k in ['demo', 'tutorial', 'test', 'doc']):
collect_ignore.append(f)
class IPythonMockup:
def run_line_magic(self, *args, **kwargs):
pass
def run_cell_magic(self, *args, **kwargs):
pass
def magic(self, *args, **kwargs):
pass
def __bool__(self):
return False
class IPyNbTest(pytest.Item):
def __init__(self, name, parent, code):
super(IPyNbTest, self).__init__(name, parent)
self.code = code
self.add_marker('notebook')
@pytest.mark.filterwarnings("ignore:IPython.core.inputsplitter is deprecated")
def runtest(self):
global_dict = {'get_ipython': lambda: IPythonMockup(),
'is_test_run': True}
# disable matplotlib output
exec("import matplotlib.pyplot as p; "
"p.switch_backend('Template')", global_dict)
# in notebooks there is an implicit plt.show() - if this is not called a warning is shown when the next
# plot is created. This warning is suppressed here
exec("import warnings;"
"warnings.filterwarnings('ignore', 'Adding an axes using the same arguments as a previous.*')",
global_dict)
with tempfile.NamedTemporaryFile() as f:
f.write(self.code.encode())
f.flush()
runpy.run_path(f.name, init_globals=global_dict, run_name=self.name)
class IPyNbFile(pytest.File):
def collect(self):
exporter = PythonExporter()
exporter.exclude_markdown = True
exporter.exclude_input_prompt = True
notebook_contents = self.fspath.open(encoding='utf-8')
with warnings.catch_warnings():
warnings.filterwarnings("ignore", "IPython.core.inputsplitter is deprecated")
notebook = nbformat.read(notebook_contents, 4)
code, _ = exporter.from_notebook_node(notebook)
if pytest_version >= 50403:
yield IPyNbTest.from_parent(name=self.name, parent=self, code=code)
else:
yield IPyNbTest(self.name, self, code)
def teardown(self):
pass
def pytest_collect_file(path, parent):
glob_exprs = ["*demo*.ipynb", "*tutorial*.ipynb", "test_*.ipynb"]
if any(path.fnmatch(g) for g in glob_exprs):
if pytest_version >= 50403:
return IPyNbFile.from_parent(fspath=path, parent=parent)
else:
return IPyNbFile(path, parent)
import ctypes
import sympy as sp
import numpy as np
try:
import llvmlite.ir as ir
except ImportError as e:
ir = None
_ir_importerror = e
from sympy.core.cache import cacheit
from pystencils.cache import memorycache
from pystencils.utils import all_equal
from sympy.logic.boolalg import Boolean
# noinspection PyPep8Naming
class cast_func(sp.Function):
is_Atom = True
def __new__(cls, *args, **kwargs):
# to work in conditions of sp.Piecewise cast_func has to be of type Boolean as well
# however, a cast_function should only be a boolean if its argument is a boolean, otherwise this leads
# to problems when for example comparing cast_func's for equality
#
# lhs = bitwise_and(a, cast_func(1, 'int'))
# rhs = cast_func(0, 'int')
# print( sp.Ne(lhs, rhs) ) # would give true if all cast_funcs are booleans
# -> thus a separate class boolean_cast_func is introduced
if isinstance(args[0], Boolean):
cls = boolean_cast_func
return sp.Function.__new__(cls, *args, **kwargs)
@property
def canonical(self):
if hasattr(self.args[0], 'canonical'):
return self.args[0].canonical
else:
raise NotImplementedError()
@property
def is_commutative(self):
return self.args[0].is_commutative
@property
def dtype(self):
return self.args[1]
# noinspection PyPep8Naming
class boolean_cast_func(cast_func, Boolean):
pass
# noinspection PyPep8Naming
class vector_memory_access(cast_func):
nargs = (4,)
# noinspection PyPep8Naming
class reinterpret_cast_func(cast_func):
pass
# noinspection PyPep8Naming
class pointer_arithmetic_func(sp.Function, Boolean):
@property
def canonical(self):
if hasattr(self.args[0], 'canonical'):
return self.args[0].canonical
else:
raise NotImplementedError()
class TypedSymbol(sp.Symbol):
def __new__(cls, *args, **kwds):
obj = TypedSymbol.__xnew_cached_(cls, *args, **kwds)
return obj
def __new_stage2__(cls, name, dtype):
obj = super(TypedSymbol, cls).__xnew__(cls, name)
try:
obj._dtype = create_type(dtype)
except TypeError:
# on error keep the string
obj._dtype = dtype
return obj
__xnew__ = staticmethod(__new_stage2__)
__xnew_cached_ = staticmethod(cacheit(__new_stage2__))
@property
def dtype(self):
return self._dtype
def _hashable_content(self):
return super()._hashable_content(), hash(self._dtype)
def __getnewargs__(self):
return self.name, self.dtype
def create_type(specification):
"""Creates a subclass of Type according to a string or an object of subclass Type.
Args:
specification: Type object, or a string
Returns:
Type object, or a new Type object parsed from the string
"""
if isinstance(specification, Type):
return specification
else:
numpy_dtype = np.dtype(specification)
if numpy_dtype.fields is None:
return BasicType(numpy_dtype, const=False)
else:
return StructType(numpy_dtype, const=False)
@memorycache(maxsize=64)
def create_composite_type_from_string(specification):
"""Creates a new Type object from a c-like string specification.
Args:
specification: Specification string
Returns:
Type object
"""
specification = specification.lower().split()
parts = []
current = []
for s in specification:
if s == '*':
parts.append(current)
current = [s]
else:
current.append(s)
if len(current) > 0:
parts.append(current)
# Parse native part
base_part = parts.pop(0)
const = False
if 'const' in base_part:
const = True
base_part.remove('const')
assert len(base_part) == 1
if base_part[0][-1] == "*":
base_part[0] = base_part[0][:-1]
parts.append('*')
current_type = BasicType(np.dtype(base_part[0]), const)
# Parse pointer parts
for part in parts:
restrict = False
const = False
if 'restrict' in part:
restrict = True
part.remove('restrict')
if 'const' in part:
const = True
part.remove("const")
assert len(part) == 1 and part[0] == '*'
current_type = PointerType(current_type, const, restrict)
return current_type
def get_base_type(data_type):
while data_type.base_type is not None:
data_type = data_type.base_type
return data_type
def to_ctypes(data_type):
"""
Transforms a given Type into ctypes
:param data_type: Subclass of Type
:return: ctypes type object
"""
if isinstance(data_type, PointerType):
return ctypes.POINTER(to_ctypes(data_type.base_type))
elif isinstance(data_type, StructType):
return ctypes.POINTER(ctypes.c_uint8)
else:
return to_ctypes.map[data_type.numpy_dtype]
to_ctypes.map = {
np.dtype(np.int8): ctypes.c_int8,
np.dtype(np.int16): ctypes.c_int16,
np.dtype(np.int32): ctypes.c_int32,
np.dtype(np.int64): ctypes.c_int64,
np.dtype(np.uint8): ctypes.c_uint8,
np.dtype(np.uint16): ctypes.c_uint16,
np.dtype(np.uint32): ctypes.c_uint32,
np.dtype(np.uint64): ctypes.c_uint64,
np.dtype(np.float32): ctypes.c_float,
np.dtype(np.float64): ctypes.c_double,
}
def ctypes_from_llvm(data_type):
if not ir:
raise _ir_importerror
if isinstance(data_type, ir.PointerType):
ctype = ctypes_from_llvm(data_type.pointee)
if ctype is None:
return ctypes.c_void_p
else:
return ctypes.POINTER(ctype)
elif isinstance(data_type, ir.IntType):
if data_type.width == 8:
return ctypes.c_int8
elif data_type.width == 16:
return ctypes.c_int16
elif data_type.width == 32:
return ctypes.c_int32
elif data_type.width == 64:
return ctypes.c_int64
else:
raise ValueError("Int width %d is not supported" % data_type.width)
elif isinstance(data_type, ir.FloatType):
return ctypes.c_float
elif isinstance(data_type, ir.DoubleType):
return ctypes.c_double
elif isinstance(data_type, ir.VoidType):
return None # Void type is not supported by ctypes
else:
raise NotImplementedError('Data type %s of %s is not supported yet' % (type(data_type), data_type))
def to_llvm_type(data_type):
"""
Transforms a given type into ctypes
:param data_type: Subclass of Type
:return: llvmlite type object
"""
if not ir:
raise _ir_importerror
if isinstance(data_type, PointerType):
return to_llvm_type(data_type.base_type).as_pointer()
else:
return to_llvm_type.map[data_type.numpy_dtype]
if ir:
to_llvm_type.map = {
np.dtype(np.int8): ir.IntType(8),
np.dtype(np.int16): ir.IntType(16),
np.dtype(np.int32): ir.IntType(32),
np.dtype(np.int64): ir.IntType(64),
np.dtype(np.uint8): ir.IntType(8),
np.dtype(np.uint16): ir.IntType(16),
np.dtype(np.uint32): ir.IntType(32),
np.dtype(np.uint64): ir.IntType(64),
np.dtype(np.float32): ir.FloatType(),
np.dtype(np.float64): ir.DoubleType(),
}
def peel_off_type(dtype, type_to_peel_off):
while type(dtype) is type_to_peel_off:
dtype = dtype.base_type
return dtype
def collate_types(types):
"""
Takes a sequence of types and returns their "common type" e.g. (float, double, float) -> double
Uses the collation rules from numpy.
"""
# Pointer arithmetic case i.e. pointer + integer is allowed
if any(type(t) is PointerType for t in types):
pointer_type = None
for t in types:
if type(t) is PointerType:
if pointer_type is not None:
raise ValueError("Cannot collate the combination of two pointer types")
pointer_type = t
elif type(t) is BasicType:
if not (t.is_int() or t.is_uint()):
raise ValueError("Invalid pointer arithmetic")
else:
raise ValueError("Invalid pointer arithmetic")
return pointer_type
# peel of vector types, if at least one vector type occurred the result will also be the vector type
vector_type = [t for t in types if type(t) is VectorType]
if not all_equal(t.width for t in vector_type):
raise ValueError("Collation failed because of vector types with different width")
types = [peel_off_type(t, VectorType) for t in types]
# now we should have a list of basic types - struct types are not yet supported
assert all(type(t) is BasicType for t in types)
if any(t.is_float() for t in types):
types = tuple(t for t in types if t.is_float())
# use numpy collation -> create type from numpy type -> and, put vector type around if necessary
result_numpy_type = np.result_type(*(t.numpy_dtype for t in types))
result = BasicType(result_numpy_type)
if vector_type:
result = VectorType(result, vector_type[0].width)
return result
@memorycache(maxsize=2048)
def get_type_of_expression(expr):
from pystencils.astnodes import ResolvedFieldAccess
expr = sp.sympify(expr)
if isinstance(expr, sp.Integer):
return create_type("int")
elif isinstance(expr, sp.Rational) or isinstance(expr, sp.Float):
return create_type("double")
elif isinstance(expr, ResolvedFieldAccess):
return expr.field.dtype
elif isinstance(expr, TypedSymbol):
return expr.dtype
elif isinstance(expr, sp.Symbol):
raise ValueError("All symbols inside this expression have to be typed! ", str(expr))
elif isinstance(expr, cast_func):
return expr.args[1]
elif hasattr(expr, 'func') and expr.func == sp.Piecewise:
collated_result_type = collate_types(tuple(get_type_of_expression(a[0]) for a in expr.args))
collated_condition_type = collate_types(tuple(get_type_of_expression(a[1]) for a in expr.args))
if type(collated_condition_type) is VectorType and type(collated_result_type) is not VectorType:
collated_result_type = VectorType(collated_result_type, width=collated_condition_type.width)
return collated_result_type
elif isinstance(expr, sp.Indexed):
typed_symbol = expr.base.label
return typed_symbol.dtype.base_type
elif isinstance(expr, sp.boolalg.Boolean) or isinstance(expr, sp.boolalg.BooleanFunction):
# if any arg is of vector type return a vector boolean, else return a normal scalar boolean
result = create_type("bool")
vec_args = [get_type_of_expression(a) for a in expr.args if isinstance(get_type_of_expression(a), VectorType)]
if vec_args:
result = VectorType(result, width=vec_args[0].width)
return result
elif isinstance(expr, sp.Pow):
return get_type_of_expression(expr.args[0])
elif isinstance(expr, sp.Expr):
types = tuple(get_type_of_expression(a) for a in expr.args)
return collate_types(types)
raise NotImplementedError("Could not determine type for", expr, type(expr))
class Type(sp.Basic):
is_Atom = True
def __new__(cls, *args, **kwargs):
return sp.Basic.__new__(cls)
def _sympystr(self, *args, **kwargs):
return str(self)
class BasicType(Type):
@staticmethod
def numpy_name_to_c(name):
if name == 'float64':
return 'double'
elif name == 'float32':
return 'float'
elif name.startswith('int'):
width = int(name[len("int"):])
return "int%d_t" % (width,)
elif name.startswith('uint'):
width = int(name[len("uint"):])
return "uint%d_t" % (width,)
elif name == 'bool':
return 'bool'
else:
raise NotImplementedError("Can map numpy to C name for %s" % (name,))
def __init__(self, dtype, const=False):
self.const = const
if isinstance(dtype, Type):
self._dtype = dtype.numpy_dtype
else:
self._dtype = np.dtype(dtype)
assert self._dtype.fields is None, "Tried to initialize NativeType with a structured type"
assert self._dtype.hasobject is False
assert self._dtype.subdtype is None
def __getnewargs__(self):
return self.numpy_dtype, self.const
@property
def base_type(self):
return None
@property
def numpy_dtype(self):
return self._dtype
@property
def item_size(self):
return 1
def is_int(self):
return self.numpy_dtype in np.sctypes['int']
def is_float(self):
return self.numpy_dtype in np.sctypes['float']
def is_uint(self):
return self.numpy_dtype in np.sctypes['uint']
def is_complex(self):
return self.numpy_dtype in np.sctypes['complex']
def is_other(self):
return self.numpy_dtype in np.sctypes['others']
@property
def base_name(self):
return BasicType.numpy_name_to_c(str(self._dtype))
def __str__(self):
result = BasicType.numpy_name_to_c(str(self._dtype))
if self.const:
result += " const"
return result
def __repr__(self):
return str(self)
def __eq__(self, other):
if not isinstance(other, BasicType):
return False
else:
return (self.numpy_dtype, self.const) == (other.numpy_dtype, other.const)
def __hash__(self):
return hash(str(self))
class VectorType(Type):
instruction_set = None
def __init__(self, base_type, width=4):
self._base_type = base_type
self.width = width
@property
def base_type(self):
return self._base_type
@property
def item_size(self):
return self.width * self.base_type.item_size
def __eq__(self, other):
if not isinstance(other, VectorType):
return False
else:
return (self.base_type, self.width) == (other.base_type, other.width)
def __str__(self):
if self.instruction_set is None:
return "%s[%d]" % (self.base_type, self.width)
else:
if self.base_type == create_type("int64"):
return self.instruction_set['int']
elif self.base_type == create_type("float64"):
return self.instruction_set['double']
elif self.base_type == create_type("float32"):
return self.instruction_set['float']
elif self.base_type == create_type("bool"):
return self.instruction_set['bool']
else:
raise NotImplementedError()
def __hash__(self):
return hash((self.base_type, self.width))
def __getnewargs__(self):
return self._base_type, self.width
class PointerType(Type):
def __init__(self, base_type, const=False, restrict=True):
self._base_type = base_type
self.const = const
self.restrict = restrict
def __getnewargs__(self):
return self.base_type, self.const, self.restrict
@property
def alias(self):
return not self.restrict
@property
def base_type(self):
return self._base_type
@property
def item_size(self):
return self.base_type.item_size
def __eq__(self, other):
if not isinstance(other, PointerType):
return False
else:
return (self.base_type, self.const, self.restrict) == (other.base_type, other.const, other.restrict)
def __str__(self):
components = [str(self.base_type), '*']
if self.restrict:
components.append('RESTRICT')
if self.const:
components.append("const")
return " ".join(components)
def __repr__(self):
return str(self)
def __hash__(self):
return hash((self._base_type, self.const, self.restrict))
class StructType:
def __init__(self, numpy_type, const=False):
self.const = const
self._dtype = np.dtype(numpy_type)
def __getnewargs__(self):
return self.numpy_dtype, self.const
@property
def base_type(self):
return None
@property
def numpy_dtype(self):
return self._dtype
@property
def item_size(self):
return self.numpy_dtype.itemsize
def get_element_offset(self, element_name):
return self.numpy_dtype.fields[element_name][1]
def get_element_type(self, element_name):
np_element_type = self.numpy_dtype.fields[element_name][0]
return BasicType(np_element_type, self.const)
def has_element(self, element_name):
return element_name in self.numpy_dtype.fields
def __eq__(self, other):
if not isinstance(other, StructType):
return False
else:
return (self.numpy_dtype, self.const) == (other.numpy_dtype, other.const)
def __str__(self):
# structs are handled byte-wise
result = "uint8_t"
if self.const:
result += " const"
return result
def __repr__(self):
return str(self)
def __hash__(self):
return hash((self.numpy_dtype, self.const))