Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Commits on Source (1342)
[flake8]
max-line-length=120
exclude=src/pystencils/jupyter.py,
src/pystencils/plot.py
src/pystencils/session.py
ignore = W293 W503 W291 C901 E741
src/pystencils/_version.py export-subst
__pycache__
.ipynb_checkpoints
.coverage*
*.pyc
*.vti
/build
/dist
*.egg-info
.cache
_build
/html_doc
/.idea
.vscode
.cache
_local_tmp
RELEASE-VERSION
test-report
src/pystencils/boundaries/createindexlistcython.c
src/pystencils/boundaries/createindexlistcython.*.so
tests/tmp
tests/var
tests/kerncraft_inputs/.2d-5pt.c_kerncraft/
tests/kerncraft_inputs/.3d-7pt.c_kerncraft/
report.xml
coverage_report/
# macOS
**/.DS_Store
*.uuid
stages:
- pretest
- test
- nightly
- docs
- deploy
# -------------------------- Templates ------------------------------------------------------------------------------------
# Base configuration for jobs meant to run at every commit
.every-commit:
rules:
- if: $CI_PIPELINE_SOURCE != "schedule"
# Configuration for jobs meant to run on each commit to pycodegen/pystencils/master
.every-commit-master:
rules:
- if: '$CI_PIPELINE_SOURCE != "schedule" && $CI_PROJECT_PATH == "pycodegen/pystencils" && $CI_COMMIT_BRANCH == "master"'
# Base configuration for jobs meant to run at a schedule
.scheduled:
rules:
- if: $CI_PIPELINE_SOURCE == "schedule"
# -------------------------- Tests ------------------------------------------------------------------------------------
# Normal test - runs on every commit all but "long run" tests
tests-and-coverage:
stage: pretest
extends: .every-commit
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full:cupy12.3
before_script:
- pip install -e .
script:
- env
- pip list
- export NUM_CORES=$(nproc --all)
- mkdir -p ~/.config/matplotlib
- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
- mkdir public
- pytest -v -n $NUM_CORES --cov-report html --cov-report xml --cov-report term --cov=. -m "not longrun" --html test-report/index.html --junitxml=report.xml
- python -m coverage xml
tags:
- docker
- cuda11
- AVX
coverage: /Total coverage:\s\d+.\d+\%/
artifacts:
when: always
paths:
- coverage_report
- test-report
reports:
coverage_report:
coverage_format: cobertura
path: coverage.xml
junit: report.xml
# Normal test with longruns
tests-and-coverage-with-longrun:
stage: test
when: manual
allow_failure: true
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full:cupy12.3
before_script:
- pip install sympy --upgrade
- pip install -e .
script:
- env
- pip list
- export NUM_CORES=$(nproc --all)
- mkdir -p ~/.config/matplotlib
- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
- mkdir public
- py.test -v -n $NUM_CORES
tags:
- docker
- cuda11
- AVX
# pipeline with latest python version
latest-python:
stage: test
extends: .every-commit
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/latest_python
before_script:
- pip install -e .
script:
- env
- pip list
- pip install -e .
- export NUM_CORES=$(nproc --all)
- mkdir -p ~/.config/matplotlib
- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
- mkdir public
- py.test -v -n $NUM_CORES -m "not longrun" --junitxml=report.xml
tags:
- docker
- AVX
artifacts:
when: always
reports:
junit: report.xml
# Minimal tests in windows environment
#minimal-windows:
# stage: test
# tags:
# - win
# script:
# - export NUM_CORES=$(nproc --all)
# - source /cygdrive/c/Users/build/Miniconda3/Scripts/activate
# - source activate pystencils
# - pip install joblib
# - pip list
# - python -c "import numpy"
# - py.test -v -m "not (notebook or longrun)"
ubuntu:
stage: test
extends: .every-commit
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/ubuntu
before_script:
- ln -s /usr/include/locale.h /usr/include/xlocale.h
- pip3 install -e .
script:
- export NUM_CORES=$(nproc --all)
- mkdir -p ~/.config/matplotlib
- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
- sed -i 's/--doctest-modules //g' pytest.ini
- env
- pip list
- pytest -v -n $NUM_CORES -m "not longrun" --junitxml=report.xml
tags:
- docker
- cuda11
- AVX
artifacts:
when: always
reports:
junit: report.xml
.multiarch_template:
stage: test
extends: .every-commit
allow_failure: true
before_script: &multiarch_before_script
# - pip3 install -v .
- export PYTHONPATH=src
- python3 -c "import pystencils as ps; ps.cpu.cpujit.read_config()"
- sed -i '/^fail_under.*/d' pytest.ini
script:
- export NUM_CORES=$(nproc --all)
- mkdir -p ~/.config/matplotlib
- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
- sed -i 's/--doctest-modules //g' pytest.ini
- env
- pip3 list
- python3 -m pytest -v -n $NUM_CORES --cov-report html --cov-report xml --cov=. --junitxml=report.xml tests/test_*vec*.py tests/test_random.py tests/test_half_precision.py
- python3 -m coverage xml
tags:
- docker
- AVX
artifacts:
when: always
paths:
- coverage_report
reports:
coverage_report:
coverage_format: cobertura
path: coverage.xml
junit: report.xml
arm64v8:
extends: .multiarch_template
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/arm64
variables:
QEMU_CPU: "cortex-a76"
before_script:
- *multiarch_before_script
- sed -i s/march=native/march=armv8-a/g ~/.config/pystencils/config.json
ppc64le:
extends: .multiarch_template
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/ppc64le
before_script:
- *multiarch_before_script
- sed -i s/mcpu=native/mcpu=power8/g ~/.config/pystencils/config.json
arm64v9:
# SVE support is still unreliable in GCC 11 (incorrect code for fixed-width vectors, internal compiler errors).
# For half precision Clang is necessary
extends: .multiarch_template
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/arm64
before_script:
- *multiarch_before_script
- sed -i s/march=native/march=armv9-a+sve2+sme/g ~/.config/pystencils/config.json
- sed -i s/g\+\+/clang++/g ~/.config/pystencils/config.json
riscv64:
# RISC-V vector extension are currently not supported by GCC.
extends: .multiarch_template
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/riscv64
variables:
# explicitly set SIMD as detection requires QEMU >= 8.1
PYSTENCILS_SIMD: "rvv"
QEMU_CPU: "rv64,v=true,zicboz=true"
before_script:
- *multiarch_before_script
- sed -i 's/march=native/march=rv64imfdvzicboz/g' ~/.config/pystencils/config.json
- sed -i s/g\+\+/clang++-15/g ~/.config/pystencils/config.json
minimal-conda:
stage: pretest
extends: .every-commit
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/minimal_conda
before_script:
- pip install -e .
script:
- python quicktest.py
tags:
- docker
- cuda
minimal-sympy-master:
stage: test
extends: .every-commit
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/minimal_conda
before_script:
- pip install -e .
script:
- python -m pip install --upgrade git+https://github.com/sympy/sympy.git
- python quicktest.py
allow_failure: true
tags:
- docker
- cuda
pycodegen-integration:
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full
stage: test
when: manual
allow_failure: true
script:
- git clone https://gitlab-ci-token:${CI_JOB_TOKEN}@i10git.cs.fau.de/pycodegen/pycodegen.git
- cd pycodegen
- git submodule sync --recursive
- git submodule update --init --recursive
- git submodule foreach git fetch origin # compare the latest master version!
- git submodule foreach git reset --hard origin/master
- cd pystencils
- git remote add test $CI_REPOSITORY_URL
- git fetch test
- git reset --hard $CI_COMMIT_SHA
- cd ..
- pip install -e pystencils/
- pip install -e lbmpy/
- cmake --version
- ./install_walberla.sh
- export NUM_CORES=$(nproc --all)
- mkdir -p ~/.config/matplotlib
- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
- cd pystencils
- py.test -v -n $NUM_CORES --junitxml=report.xml .
- cd ../lbmpy
- py.test -v -n $NUM_CORES --junitxml=report.xml .
- cd ../walberla/build/
- make -j $NUM_CORES CodegenJacobiCPU CodegenJacobiGPU CodegenPoissonCPU CodegenPoissonGPU MicroBenchmarkGpuLbm LbCodeGenerationExample
- make -j $NUM_CORES multiphaseCPU multiphaseGPU FluctuatingMRT FlowAroundSphereCodeGen FieldLayoutAndVectorizationTest GeneratedOutflowBC
- cd apps/benchmarks/UniformGridGPU
- make -j $NUM_CORES
- cd ../UniformGridCPU
- make -j $NUM_CORES
tags:
- docker
- cuda11
- AVX
artifacts:
when: always
reports:
junit: pycodegen/*/report.xml
# -------------------- Scheduled Tasks --------------------------------------------------------------------------
# Nightly test against the latest (pre-release) version of SymPy published on PyPI
nightly-sympy:
stage: nightly
needs: []
extends: .scheduled
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/latest_python
before_script:
- pip install -e .
- pip install --upgrade --pre sympy
script:
- env
- pip list
- export NUM_CORES=$(nproc --all)
- mkdir -p ~/.config/matplotlib
- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
- mkdir public
- pytest -v -n $NUM_CORES -m "not longrun" --junitxml=report.xml
tags:
- docker
- AVX
- cuda
artifacts:
when: always
reports:
junit: report.xml
# -------------------- Linter & Documentation --------------------------------------------------------------------------
flake8-lint:
stage: pretest
extends: .every-commit
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full
script:
- flake8 src/pystencils
tags:
- docker
build-documentation:
stage: docs
extends: .every-commit
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/documentation
needs: []
before_script:
- pip install -e .
script:
- mkdir html_doc
- sphinx-build -b html doc html_doc
- sphinx-build -W -b html doc html_doc
tags:
- docker
artifacts:
paths:
- html_doc
pages:
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full
extends: .every-commit-master
stage: deploy
needs: ["tests-and-coverage", "build-documentation"]
script:
- ls -l
- mv coverage_report html_doc
- mv html_doc public # folder has to be named "public" for gitlab to publish it
artifacts:
paths:
- public
tags:
- docker
[settings]
line_length=100
balanced_wrapping=True
multi_line_output=4
known_third_party=sympy
Contributors:
-------------
- Martin Bauer <martin.bauer@fau.de>
- Markus Holzer <markus.holzer@fau.de>
- Stephan Seitz <stephan.seitz@fau.de>
- Michael Kuron <mkuron@icp.uni-stuttgart.de>
- Jan Hönig <jan.hoenig@fau.de>
- Julian Hammer <julian.hammer@fau.de>
- Nils Kohl <nils.kohl@fau.de>
- Frederik Hennig <frederik.hennig@fau.de>
- Dominik Ernst <dominik.ernst@fau.de>
- Christian Godenschwager <christian.godenschwager@fau.de>
- Dominik Thoennes <dominik.thoennes@fau.de>
# Change Log
## Unreleased
### Removed
* LLVM backend because it was not used much and not good integrated in pystencils.
* OpenCL backend because it was not used much and not good integrated in pystencils.
# Contributing
Contributions to pystencils are always welcome, and they are greatly appreciated!
A list of open problems can be found [here]( https://i10git.cs.fau.de/pycodegen/pystencils/-/issues).
Of course, it is also always appreciated to bring own ideas and problems to the community!
Please submit all contributions to the official [GitLab repository](https://i10git.cs.fau.de/pycodegen/pystencils) in the form of a Merge Request. Please do not submit git diffs or files containing the changes.
There also exists a GitHub repository, which is only a mirror to the GitLab repository. Contributions to the GitHub repository are not considered.
`pystencils` is an open-source python package under the license of AGPL3. Thus we consider the act of contributing to the code by submitting a Merge Request as the "Sign off" or agreement to the AGPL3 license.
You can contribute in many different ways:
## Types of Contributions
### Report Bugs
Report bugs at [https://i10git.cs.fau.de/pycodegen/pystencils/-/issues](https://i10git.cs.fau.de/pycodegen/pystencils/-/issues).
For pystencils, it is often necessary to provide the python and [SymPy](https://www.sympy.org/en/index.html) versions used and hardware information like the
processor architecture and the compiler version used to compile the generated kernels.
### Fix Issues
Look through the GitLab issues. Different tags are indicating the status of the issues.
The "bug" tag indicates problems with pystencils, while the "feature" tag shows ideas that should be added in the future.
### Write Documentation
The documentation of pystencils can be found [here](https://pycodegen.pages.i10git.cs.fau.de/pystencils/). Jupyter notebooks are used to provide an
interactive start to pystencils. It is always appreciated if new document notebooks are provided
since this helps others a lot.
## Get Started!
Ready to contribute? Here is how to set up `pystencils` for local development.
1. Fork the `pystencils` repo on GitLab.
2. Clone your fork locally:
```bash
$ git clone https://i10git.cs.fau.de/your-name/pystencils
```
3. Install your local copy into a virtualenv. It is also recommended to use anaconda or miniconda to manage the python environments.
```bash
$ mkvirtualenv pystencils
$ cd pystencils/
$ pip install -e .
```
4. Create a branch for local development:
```bash
$ git checkout -b name-of-your-bugfix-or-feature
```
Now you can make your changes locally.
5. When you're done making changes, check that your changes pass flake8 and the
tests
```bash
$ flake8 pystencils
$ py.test -v -n $NUM_CORES -m "not longrun" .
```
To get all packages needed for development, a requirements list can be found [here](https://i10git.cs.fau.de/pycodegen/pycodegen/-/blob/master/conda_environment_dev.yml). This includes flake8 and pytest.
6. Commit your changes and push your branch to GitHub::
```bash
$ git add .
$ git commit -m "Your detailed description of your changes."
$ git push origin name-of-your-bugfix-or-feature
```
7. Submit a Merge Request on GitLab.
## Merge Request Guidelines
Before you submit a Merge Request, check that it meets these guidelines:
1. All functionality that is implemented through this Merge Request should be covered by unit tests. These are implemented in `pystencil_tests`
2. If the Merge Request adds functionality, the docs should be updated. Put your new functionality into a function with a docstring.
3. If you have a maintainer status for `pystencils`, you can merge Merge Requests to the master branch. However, every Merge Request needs to be reviewed by another developer. Thus it is not allowed to merge a Merge Request, which is submitted by oneself.
## Tips
To run a subset of tests:
```bash
$ py.test my_test.py
```
\ No newline at end of file
This diff is collapsed.
include AUTHORS.txt
include CONTRIBUTING.md
include CHANGELOG.md
pystencils
==========
[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/mabau/pystencils/master?filepath=doc%2Fnotebooks)
[![Docs](https://img.shields.io/badge/read-the_docs-brightgreen.svg)](https://pycodegen.pages.i10git.cs.fau.de/pystencils)
[![pypi-package](https://badge.fury.io/py/pystencils.svg)](https://badge.fury.io/py/pystencils)
[![pipeline status](https://i10git.cs.fau.de/pycodegen/pystencils/badges/master/pipeline.svg)](https://i10git.cs.fau.de/pycodegen/pystencils/commits/master)
[![coverage report](https://i10git.cs.fau.de/pycodegen/pystencils/badges/master/coverage.svg)](http://pycodegen.pages.i10git.cs.fau.de/pystencils/coverage_report)
Run blazingly fast stencil codes on numpy arrays.
*pystencils* uses sympy to define stencil operations, that can be executed on numpy arrays.
Exploiting the stencil structure makes *pystencils* run faster than normal numpy code and even as Cython and numba,
[as demonstrated in this notebook](https://pycodegen.pages.i10git.cs.fau.de/pystencils/notebooks/demo_benchmark.html).
Here is a code snippet that computes the average of neighboring cells:
```python
import pystencils as ps
import numpy as np
f, g = ps.fields("f, g : [2D]")
stencil = ps.Assignment(g[0, 0],
(f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4)
kernel = ps.create_kernel(stencil).compile()
f_arr = np.random.rand(1000, 1000)
g_arr = np.empty_like(f_arr)
kernel(f=f_arr, g=g_arr)
```
*pystencils* is mostly used for numerical simulations using finite difference or finite volume methods.
It comes with automatic finite difference discretization for PDEs:
```python
import pystencils as ps
import sympy as sp
c, v = ps.fields("c, v(2): [2D]")
adv_diff_pde = ps.fd.transient(c) - ps.fd.diffusion(c, sp.symbols("D")) + ps.fd.advection(c, v)
discretize = ps.fd.Discretization2ndOrder(dx=1, dt=0.01)
discretization = discretize(adv_diff_pde)
```
Installation
------------
```bash
pip install pystencils[interactive]
```
Without `[interactive]` you get a minimal version with very little dependencies.
All options:
- `gpu`: use this if an NVIDIA or AMD GPU is available and CUDA or ROCm is installed
- `alltrafos`: pulls in additional dependencies for loop simplification e.g. libisl
- `bench_db`: functionality to store benchmark result in object databases
- `interactive`: installs dependencies to work in Jupyter including image I/O, plotting etc.
- `doc`: packages to build documentation
Options can be combined e.g.
```bash
pip install pystencils[interactive, gpu, doc]
```
pystencils is also fully compatible with Windows machines. If working with visual studio and cupy makes sure to run example files first to ensure that cupy can find the compiler's executable.
Documentation
-------------
Read the docs [here](https://pycodegen.pages.i10git.cs.fau.de/pystencils) and
check out the Jupyter notebooks in `doc/notebooks`. The **Changelog** of pystencils can be found [here](https://i10git.cs.fau.de/pycodegen/pystencils/-/blob/master/CHANGELOG.md).
Authors
-------
Many thanks go to the [contributors](https://i10git.cs.fau.de/pycodegen/pystencils/-/blob/master/AUTHORS.txt) of pystencils.
### Please cite us
If you use pystencils in a publication, please cite the following articles:
Overview:
- M. Bauer et al, Code Generation for Massively Parallel Phase-Field Simulations. Association for Computing Machinery, 2019. https://doi.org/10.1145/3295500.3356186
Performance Modelling:
- D. Ernst et al, Analytical performance estimation during code generation on modern GPUs. Journal of Parallel and Distributed Computing, 2023. https://doi.org/10.1016/j.jpdc.2022.11.003
from pystencils.field import Field, extractCommonSubexpressions
from pystencils.data_types import TypedSymbol
from pystencils.slicing import makeSlice
from pystencils.kernelcreation import createKernel, createIndexedKernel
from pystencils.display_utils import showCode
import sympy as sp
from sympy.tensor import IndexedBase
from pystencils.field import Field
from pystencils.data_types import TypedSymbol, createType, castFunc
from pystencils.sympyextensions import fastSubs
class Node(object):
"""Base class for all AST nodes"""
def __init__(self, parent=None):
self.parent = parent
def args(self):
"""Returns all arguments/children of this node"""
return []
@property
def symbolsDefined(self):
"""Set of symbols which are defined by this node. """
return set()
@property
def undefinedSymbols(self):
"""Symbols which are used but are not defined inside this node"""
raise NotImplementedError()
def subs(self, *args, **kwargs):
"""Inplace! substitute, similar to sympys but modifies ast and returns None"""
for a in self.args:
a.subs(*args, **kwargs)
@property
def func(self):
return self.__class__
def atoms(self, argType):
"""
Returns a set of all children which are an instance of the given argType
"""
result = set()
for arg in self.args:
if isinstance(arg, argType):
result.add(arg)
result.update(arg.atoms(argType))
return result
class Conditional(Node):
"""Conditional"""
def __init__(self, conditionExpr, trueBlock, falseBlock=None):
"""
Create a new conditional node
:param conditionExpr: sympy relational expression
:param trueBlock: block which is run if conditional is true
:param falseBlock: block which is run if conditional is false, or None if not needed
"""
assert conditionExpr.is_Boolean or conditionExpr.is_Relational
self.conditionExpr = conditionExpr
def handleChild(c):
if c is None:
return None
if not isinstance(c, Block):
c = Block([c])
c.parent = self
return c
self.trueBlock = handleChild(trueBlock)
self.falseBlock = handleChild(falseBlock)
def subs(self, *args, **kwargs):
self.trueBlock.subs(*args, **kwargs)
if self.falseBlock:
self.falseBlock.subs(*args, **kwargs)
self.conditionExpr = self.conditionExpr.subs(*args, **kwargs)
@property
def args(self):
result = [self.conditionExpr, self.trueBlock]
if self.falseBlock:
result.append(self.falseBlock)
return result
@property
def symbolsDefined(self):
return set()
@property
def undefinedSymbols(self):
result = self.trueBlock.undefinedSymbols
if self.falseBlock:
result.update(self.falseBlock.undefinedSymbols)
result.update(self.conditionExpr.atoms(sp.Symbol))
return result
def __str__(self):
return 'if:({!s}) '.format(self.conditionExpr)
def __repr__(self):
return 'if:({!r}) '.format(self.conditionExpr)
class KernelFunction(Node):
class Argument:
def __init__(self, name, dtype, symbol, kernelFunctionNode):
from pystencils.transformations import symbolNameToVariableName
self.name = name
self.dtype = dtype
self.isFieldPtrArgument = False
self.isFieldShapeArgument = False
self.isFieldStrideArgument = False
self.isFieldArgument = False
self.fieldName = ""
self.coordinate = None
self.symbol = symbol
if name.startswith(Field.DATA_PREFIX):
self.isFieldPtrArgument = True
self.isFieldArgument = True
self.fieldName = name[len(Field.DATA_PREFIX):]
elif name.startswith(Field.SHAPE_PREFIX):
self.isFieldShapeArgument = True
self.isFieldArgument = True
self.fieldName = name[len(Field.SHAPE_PREFIX):]
elif name.startswith(Field.STRIDE_PREFIX):
self.isFieldStrideArgument = True
self.isFieldArgument = True
self.fieldName = name[len(Field.STRIDE_PREFIX):]
self.field = None
if self.isFieldArgument:
fieldMap = {symbolNameToVariableName(f.name): f for f in kernelFunctionNode.fieldsAccessed}
self.field = fieldMap[self.fieldName]
def __lt__(self, other):
def score(l):
if l.isFieldPtrArgument:
return -4
elif l.isFieldShapeArgument:
return -3
elif l.isFieldStrideArgument:
return -2
return 0
if score(self) < score(other):
return True
elif score(self) == score(other):
return self.name < other.name
else:
return False
def __repr__(self):
return '<{0} {1}>'.format(self.dtype, self.name)
def __init__(self, body, ghostLayers=None, functionName="kernel"):
super(KernelFunction, self).__init__()
self._body = body
body.parent = self
self._parameters = None
self.functionName = functionName
self._body.parent = self
self.compile = None
self.ghostLayers = ghostLayers
# these variables are assumed to be global, so no automatic parameter is generated for them
self.globalVariables = set()
@property
def symbolsDefined(self):
return set()
@property
def undefinedSymbols(self):
return set()
@property
def parameters(self):
self._updateParameters()
return self._parameters
@property
def body(self):
return self._body
@property
def args(self):
return [self._body]
@property
def fieldsAccessed(self):
"""Set of Field instances: fields which are accessed inside this kernel function"""
return set(o.field for o in self.atoms(ResolvedFieldAccess))
def _updateParameters(self):
undefinedSymbols = self._body.undefinedSymbols - self.globalVariables
self._parameters = [KernelFunction.Argument(s.name, s.dtype, s, self) for s in undefinedSymbols]
self._parameters.sort()
def __str__(self):
self._updateParameters()
return '{0} {1}({2})\n{3}'.format(type(self).__name__, self.functionName, self.parameters,
("\t" + "\t".join(str(self.body).splitlines(True))))
def __repr__(self):
self._updateParameters()
return '{0} {1}({2})'.format(type(self).__name__, self.functionName, self.parameters)
class Block(Node):
def __init__(self, listOfNodes):
super(Node, self).__init__()
self._nodes = listOfNodes
self.parent = None
for n in self._nodes:
n.parent = self
@property
def args(self):
return self._nodes
def insertFront(self, node):
node.parent = self
self._nodes.insert(0, node)
def insertBefore(self, newNode, insertBefore):
newNode.parent = self
idx = self._nodes.index(insertBefore)
# move all assignment (definitions to the top)
if isinstance(newNode, SympyAssignment) and newNode.isDeclaration:
while idx > 0:
pn = self._nodes[idx - 1]
if isinstance(pn, LoopOverCoordinate) or isinstance(pn, Conditional):
idx -= 1
else:
break
self._nodes.insert(idx, newNode)
def append(self, node):
if isinstance(node, list) or isinstance(node, tuple):
for n in node:
n.parent = self
self._nodes.append(n)
else:
node.parent = self
self._nodes.append(node)
def takeChildNodes(self):
tmp = self._nodes
self._nodes = []
return tmp
def replace(self, child, replacements):
idx = self._nodes.index(child)
del self._nodes[idx]
if type(replacements) is list:
for e in replacements:
e.parent = self
self._nodes = self._nodes[:idx] + replacements + self._nodes[idx:]
else:
replacements.parent = self
self._nodes.insert(idx, replacements)
@property
def symbolsDefined(self):
result = set()
for a in self.args:
result.update(a.symbolsDefined)
return result
@property
def undefinedSymbols(self):
result = set()
definedSymbols = set()
for a in self.args:
result.update(a.undefinedSymbols)
definedSymbols.update(a.symbolsDefined)
return result - definedSymbols
def __str__(self):
return "Block " + ''.join('{!s}\n'.format(node) for node in self._nodes)
def __repr__(self):
return "Block"
class PragmaBlock(Block):
def __init__(self, pragmaLine, listOfNodes):
super(PragmaBlock, self).__init__(listOfNodes)
self.pragmaLine = pragmaLine
for n in listOfNodes:
n.parent = self
def __repr__(self):
return self.pragmaLine
class LoopOverCoordinate(Node):
LOOP_COUNTER_NAME_PREFIX = "ctr"
def __init__(self, body, coordinateToLoopOver, start, stop, step=1):
self.body = body
body.parent = self
self.coordinateToLoopOver = coordinateToLoopOver
self.start = start
self.stop = stop
self.step = step
self.body.parent = self
self.prefixLines = []
def newLoopWithDifferentBody(self, newBody):
result = LoopOverCoordinate(newBody, self.coordinateToLoopOver, self.start, self.stop, self.step)
result.prefixLines = [l for l in self.prefixLines]
return result
def subs(self, *args, **kwargs):
self.body.subs(*args, **kwargs)
if hasattr(self.start, "subs"):
self.start = self.start.subs(*args, **kwargs)
if hasattr(self.stop, "subs"):
self.stop = self.stop.subs(*args, **kwargs)
if hasattr(self.step, "subs"):
self.step = self.step.subs(*args, **kwargs)
@property
def args(self):
result = [self.body]
for e in [self.start, self.stop, self.step]:
if hasattr(e, "args"):
result.append(e)
return result
def replace(self, child, replacement):
if child == self.body:
self.body = replacement
elif child == self.start:
self.start = replacement
elif child == self.step:
self.step = replacement
elif child == self.stop:
self.stop = replacement
@property
def symbolsDefined(self):
return set([self.loopCounterSymbol])
@property
def undefinedSymbols(self):
result = self.body.undefinedSymbols
for possibleSymbol in [self.start, self.stop, self.step]:
if isinstance(possibleSymbol, Node) or isinstance(possibleSymbol, sp.Basic):
result.update(possibleSymbol.atoms(sp.Symbol))
return result - set([self.loopCounterSymbol])
@staticmethod
def getLoopCounterName(coordinateToLoopOver):
return "%s_%s" % (LoopOverCoordinate.LOOP_COUNTER_NAME_PREFIX, coordinateToLoopOver)
@property
def loopCounterName(self):
return LoopOverCoordinate.getLoopCounterName(self.coordinateToLoopOver)
@staticmethod
def isLoopCounterSymbol(symbol):
prefix = LoopOverCoordinate.LOOP_COUNTER_NAME_PREFIX
if not symbol.name.startswith(prefix):
return None
if symbol.dtype != createType('int'):
return None
coordinate = int(symbol.name[len(prefix)+1:])
return coordinate
@staticmethod
def getLoopCounterSymbol(coordinateToLoopOver):
return TypedSymbol(LoopOverCoordinate.getLoopCounterName(coordinateToLoopOver), 'int')
@property
def loopCounterSymbol(self):
return LoopOverCoordinate.getLoopCounterSymbol(self.coordinateToLoopOver)
@property
def isOutermostLoop(self):
from pystencils.transformations import getNextParentOfType
return getNextParentOfType(self, LoopOverCoordinate) is None
@property
def isInnermostLoop(self):
return len(self.atoms(LoopOverCoordinate)) == 0
def __str__(self):
return 'for({!s}={!s}; {!s}<{!s}; {!s}+={!s})\n{!s}'.format(self.loopCounterName, self.start,
self.loopCounterName, self.stop,
self.loopCounterName, self.step,
("\t" + "\t".join(str(self.body).splitlines(True))))
def __repr__(self):
return 'for({!s}={!s}; {!s}<{!s}; {!s}+={!s})'.format(self.loopCounterName, self.start,
self.loopCounterName, self.stop,
self.loopCounterName, self.step)
class SympyAssignment(Node):
def __init__(self, lhsSymbol, rhsTerm, isConst=True):
self._lhsSymbol = lhsSymbol
self.rhs = rhsTerm
self._isDeclaration = True
isCast = self._lhsSymbol.func == castFunc
if isinstance(self._lhsSymbol, Field.Access) or isinstance(self._lhsSymbol, ResolvedFieldAccess) or isCast:
self._isDeclaration = False
self._isConst = isConst
@property
def lhs(self):
return self._lhsSymbol
@lhs.setter
def lhs(self, newValue):
self._lhsSymbol = newValue
self._isDeclaration = True
isCast = self._lhsSymbol.func == castFunc
if isinstance(self._lhsSymbol, Field.Access) or isinstance(self._lhsSymbol, sp.Indexed) or isCast:
self._isDeclaration = False
def subs(self, *args, **kwargs):
self.lhs = fastSubs(self.lhs, *args, **kwargs)
self.rhs = fastSubs(self.rhs, *args, **kwargs)
@property
def args(self):
return [self._lhsSymbol, self.rhs]
@property
def symbolsDefined(self):
if not self._isDeclaration:
return set()
return set([self._lhsSymbol])
@property
def undefinedSymbols(self):
result = self.rhs.atoms(sp.Symbol)
# Add loop counters if there a field accesses
loopCounters = set()
for symbol in result:
if isinstance(symbol, Field.Access):
for i in range(len(symbol.offsets)):
loopCounters.add(LoopOverCoordinate.getLoopCounterSymbol(i))
result.update(loopCounters)
result.update(self._lhsSymbol.atoms(sp.Symbol))
return result
@property
def isDeclaration(self):
return self._isDeclaration
@property
def isConst(self):
return self._isConst
def replace(self, child, replacement):
if child == self.lhs:
replacement.parent = self
self.lhs = replacement
elif child == self.rhs:
replacement.parent = self
self.rhs = replacement
else:
raise ValueError('%s is not in args of %s' % (replacement, self.__class__))
def __repr__(self):
return repr(self.lhs) + " = " + repr(self.rhs)
class ResolvedFieldAccess(sp.Indexed):
def __new__(cls, base, linearizedIndex, field, offsets, idxCoordinateValues):
if not isinstance(base, IndexedBase):
base = IndexedBase(base, shape=(1,))
obj = super(ResolvedFieldAccess, cls).__new__(cls, base, linearizedIndex)
obj.field = field
obj.offsets = offsets
obj.idxCoordinateValues = idxCoordinateValues
return obj
def _eval_subs(self, old, new):
return ResolvedFieldAccess(self.args[0],
self.args[1].subs(old, new),
self.field, self.offsets, self.idxCoordinateValues)
def fastSubs(self, subsDict):
if self in subsDict:
return subsDict[self]
return ResolvedFieldAccess(self.args[0].subs(subsDict),
self.args[1].subs(subsDict),
self.field, self.offsets, self.idxCoordinateValues)
def _hashable_content(self):
superClassContents = super(ResolvedFieldAccess, self)._hashable_content()
return superClassContents + tuple(self.offsets) + (repr(self.idxCoordinateValues), hash(self.field))
@property
def typedSymbol(self):
return self.base.label
def __str__(self):
top = super(ResolvedFieldAccess, self).__str__()
return "%s (%s)" % (top, self.typedSymbol.dtype)
def __getnewargs__(self):
return self.base, self.indices[0], self.field, self.offsets, self.idxCoordinateValues
class TemporaryMemoryAllocation(Node):
def __init__(self, typedSymbol, size):
self.symbol = typedSymbol
self.size = size
@property
def symbolsDefined(self):
return set([self.symbol])
@property
def undefinedSymbols(self):
if isinstance(self.size, sp.Basic):
return self.size.atoms(sp.Symbol)
else:
return set()
@property
def args(self):
return [self.symbol]
class TemporaryMemoryFree(Node):
def __init__(self, typedSymbol):
self.symbol = typedSymbol
@property
def symbolsDefined(self):
return set()
@property
def undefinedSymbols(self):
return set()
@property
def args(self):
return []
from .cbackend import generateC
try:
from .dot import dotprint
from .llvm import generateLLVM
except ImportError:
pass
import sympy as sp
try:
from sympy.utilities.codegen import CCodePrinter
except ImportError:
from sympy.printing.ccode import C99CodePrinter as CCodePrinter
from collections import namedtuple
from sympy.core.mul import _keep_coeff
from sympy.core import S
from pystencils.astnodes import Node, ResolvedFieldAccess, SympyAssignment
from pystencils.data_types import createType, PointerType, getTypeOfExpression, VectorType, castFunc
from pystencils.backends.simd_instruction_sets import selectedInstructionSet
def generateC(astNode, signatureOnly=False):
"""
Prints the abstract syntax tree as C function
"""
fieldTypes = set([f.dtype for f in astNode.fieldsAccessed])
useFloatConstants = createType("double") not in fieldTypes
vectorIS = selectedInstructionSet['double']
printer = CBackend(constantsAsFloats=useFloatConstants, signatureOnly=signatureOnly, vectorInstructionSet=vectorIS)
return printer(astNode)
def getHeaders(astNode):
headers = set()
if hasattr(astNode, 'headers'):
headers.update(astNode.headers)
elif isinstance(astNode, SympyAssignment):
if type(getTypeOfExpression(astNode.rhs)) is VectorType:
headers.update(selectedInstructionSet['double']['headers'])
for a in astNode.args:
if isinstance(a, Node):
headers.update(getHeaders(a))
return headers
# --------------------------------------- Backend Specific Nodes -------------------------------------------------------
class CustomCppCode(Node):
def __init__(self, code, symbolsRead, symbolsDefined):
self._code = "\n" + code
self._symbolsRead = set(symbolsRead)
self._symbolsDefined = set(symbolsDefined)
self.headers = []
@property
def code(self):
return self._code
@property
def args(self):
return []
@property
def symbolsDefined(self):
return self._symbolsDefined
@property
def undefinedSymbols(self):
return self.symbolsDefined - self._symbolsRead
class PrintNode(CustomCppCode):
def __init__(self, symbolToPrint):
code = '\nstd::cout << "%s = " << %s << std::endl; \n' % (symbolToPrint.name, symbolToPrint.name)
super(PrintNode, self).__init__(code, symbolsRead=[symbolToPrint], symbolsDefined=set())
self.headers.append("<iostream>")
# ------------------------------------------- Printer ------------------------------------------------------------------
class CBackend(object):
def __init__(self, constantsAsFloats=False, sympyPrinter=None, signatureOnly=False, vectorInstructionSet=None):
if sympyPrinter is None:
self.sympyPrinter = CustomSympyPrinter(constantsAsFloats)
if vectorInstructionSet is not None:
self.sympyPrinter = VectorizedCustomSympyPrinter(vectorInstructionSet, constantsAsFloats)
else:
self.sympyPrinter = CustomSympyPrinter(constantsAsFloats)
else:
self.sympyPrinter = sympyPrinter
self._vectorInstructionSet = vectorInstructionSet
self._indent = " "
self._signatureOnly = signatureOnly
def __call__(self, node):
prevIs = VectorType.instructionSet
VectorType.instructionSet = self._vectorInstructionSet
result = str(self._print(node))
VectorType.instructionSet = prevIs
return result
def _print(self, node):
for cls in type(node).__mro__:
methodName = "_print_" + cls.__name__
if hasattr(self, methodName):
return getattr(self, methodName)(node)
raise NotImplementedError("CBackend does not support node of type " + cls.__name__)
def _print_KernelFunction(self, node):
functionArguments = ["%s %s" % (str(s.dtype), s.name) for s in node.parameters]
funcDeclaration = "FUNC_PREFIX void %s(%s)" % (node.functionName, ", ".join(functionArguments))
if self._signatureOnly:
return funcDeclaration
body = self._print(node.body)
return funcDeclaration + "\n" + body
def _print_Block(self, node):
blockContents = "\n".join([self._print(child) for child in node.args])
return "{\n%s\n}" % (self._indent + self._indent.join(blockContents.splitlines(True)))
def _print_PragmaBlock(self, node):
return "%s\n%s" % (node.pragmaLine, self._print_Block(node))
def _print_LoopOverCoordinate(self, node):
counterVar = node.loopCounterName
start = "int %s = %s" % (counterVar, self.sympyPrinter.doprint(node.start))
condition = "%s < %s" % (counterVar, self.sympyPrinter.doprint(node.stop))
update = "%s += %s" % (counterVar, self.sympyPrinter.doprint(node.step),)
loopStr = "for (%s; %s; %s)" % (start, condition, update)
prefix = "\n".join(node.prefixLines)
if prefix:
prefix += "\n"
return "%s%s\n%s" % (prefix, loopStr, self._print(node.body))
def _print_SympyAssignment(self, node):
if node.isDeclaration:
dtype = "const " + str(node.lhs.dtype) + " " if node.isConst else str(node.lhs.dtype) + " "
return "%s %s = %s;" % (dtype, self.sympyPrinter.doprint(node.lhs), self.sympyPrinter.doprint(node.rhs))
else:
lhsType = getTypeOfExpression(node.lhs)
if type(lhsType) is VectorType and node.lhs.func == castFunc:
return self._vectorInstructionSet['storeU'].format("&" + self.sympyPrinter.doprint(node.lhs.args[0]),
self.sympyPrinter.doprint(node.rhs)) + ';'
else:
return "%s = %s;" % (self.sympyPrinter.doprint(node.lhs), self.sympyPrinter.doprint(node.rhs))
def _print_TemporaryMemoryAllocation(self, node):
return "%s %s = new %s[%s];" % (node.symbol.dtype, self.sympyPrinter.doprint(node.symbol.name),
node.symbol.dtype.baseType, self.sympyPrinter.doprint(node.size))
def _print_TemporaryMemoryFree(self, node):
return "delete [] %s;" % (self.sympyPrinter.doprint(node.symbol.name),)
def _print_CustomCppCode(self, node):
return node.code
def _print_Conditional(self, node):
conditionExpr = self.sympyPrinter.doprint(node.conditionExpr)
trueBlock = self._print_Block(node.trueBlock)
result = "if (%s)\n%s " % (conditionExpr, trueBlock)
if node.falseBlock:
falseBlock = self._print_Block(node.falseBlock)
result += "else " + falseBlock
return result
# ------------------------------------------ Helper function & classes -------------------------------------------------
class CustomSympyPrinter(CCodePrinter):
def __init__(self, constantsAsFloats=False):
self._constantsAsFloats = constantsAsFloats
super(CustomSympyPrinter, self).__init__()
def _print_Pow(self, expr):
"""Don't use std::pow function, for small integer exponents, write as multiplication"""
if expr.exp.is_integer and expr.exp.is_number and 0 < expr.exp < 8:
return "(" + self._print(sp.Mul(*[expr.base] * expr.exp, evaluate=False)) + ")"
else:
return super(CustomSympyPrinter, self)._print_Pow(expr)
def _print_Rational(self, expr):
"""Evaluate all rationals i.e. print 0.25 instead of 1.0/4.0"""
res = str(expr.evalf().num)
if self._constantsAsFloats:
res += "f"
return res
def _print_Equality(self, expr):
"""Equality operator is not printable in default printer"""
return '((' + self._print(expr.lhs) + ") == (" + self._print(expr.rhs) + '))'
def _print_Piecewise(self, expr):
"""Print piecewise in one line (remove newlines)"""
result = super(CustomSympyPrinter, self)._print_Piecewise(expr)
return result.replace("\n", "")
def _print_Float(self, expr):
res = str(expr)
if self._constantsAsFloats:
res += "f"
return res
def _print_Function(self, expr):
if expr.func == castFunc:
arg, type = expr.args
return "*((%s)(& %s))" % (PointerType(type), self._print(arg))
else:
return super(CustomSympyPrinter, self)._print_Function(expr)
class VectorizedCustomSympyPrinter(CustomSympyPrinter):
SummandInfo = namedtuple("SummandInfo", ['sign', 'term'])
def __init__(self, instructionSet, constantsAsFloats=False):
super(VectorizedCustomSympyPrinter, self).__init__(constantsAsFloats)
self.instructionSet = instructionSet
def _scalarFallback(self, funcName, expr, *args, **kwargs):
exprType = getTypeOfExpression(expr)
if type(exprType) is not VectorType:
return getattr(super(VectorizedCustomSympyPrinter, self), funcName)(expr, *args, **kwargs)
else:
assert self.instructionSet['width'] == exprType.width
return None
def _print_Function(self, expr):
if expr.func == castFunc:
arg, dtype = expr.args
if type(dtype) is VectorType:
if type(arg) is ResolvedFieldAccess:
return self.instructionSet['loadU'].format("& " + self._print(arg))
else:
return self.instructionSet['makeVec'].format(self._print(arg))
return super(VectorizedCustomSympyPrinter, self)._print_Function(expr)
def _print_And(self, expr):
result = self._scalarFallback('_print_And', expr)
if result:
return result
argStrings = [self._print(a) for a in expr.args]
assert len(argStrings) > 0
result = argStrings[0]
for item in argStrings[1:]:
result = self.instructionSet['&'].format(result, item)
return result
def _print_Or(self, expr):
result = self._scalarFallback('_print_Or', expr)
if result:
return result
argStrings = [self._print(a) for a in expr.args]
assert len(argStrings) > 0
result = argStrings[0]
for item in argStrings[1:]:
result = self.instructionSet['|'].format(result, item)
return result
def _print_Add(self, expr, order=None):
result = self._scalarFallback('_print_Add', expr)
if result:
return result
summands = []
for term in expr.args:
if term.func == sp.Mul:
sign, t = self._print_Mul(term, insideAdd=True)
else:
t = self._print(term)
sign = 1
summands.append(self.SummandInfo(sign, t))
# Use positive terms first
summands.sort(key=lambda e: e.sign, reverse=True)
# if no positive term exists, prepend a zero
if summands[0].sign == -1:
summands.insert(0, self.SummandInfo(1, "0"))
assert len(summands) >= 2
processed = summands[0].term
for summand in summands[1:]:
func = self.instructionSet['-'] if summand.sign == -1 else self.instructionSet['+']
processed = func.format(processed, summand.term)
return processed
def _print_Pow(self, expr):
result = self._scalarFallback('_print_Pow', expr)
if result:
return result
if expr.exp.is_integer and expr.exp.is_number and 0 < expr.exp < 8:
return "(" + self._print(sp.Mul(*[expr.base] * expr.exp, evaluate=False)) + ")"
else:
if expr.exp == -1:
one = self.instructionSet['makeVec'].format(1.0)
return self.instructionSet['/'].format(one, self._print(expr.base))
elif expr.exp == 0.5:
return self.instructionSet['sqrt'].format(self._print(expr.base))
else:
raise ValueError("Generic exponential not supported")
def _print_Mul(self, expr, insideAdd=False):
result = self._scalarFallback('_print_Mul', expr)
if result:
return result
c, e = expr.as_coeff_Mul()
if c < 0:
expr = _keep_coeff(-c, e)
sign = -1
else:
sign = 1
a = [] # items in the numerator
b = [] # items that are in the denominator (if any)
# Gather args for numerator/denominator
for item in expr.as_ordered_factors():
if item.is_commutative and item.is_Pow and item.exp.is_Rational and item.exp.is_negative:
if item.exp != -1:
b.append(sp.Pow(item.base, -item.exp, evaluate=False))
else:
b.append(sp.Pow(item.base, -item.exp))
else:
a.append(item)
a = a or [S.One]
# a = a or [castFunc(S.One, VectorType(createTypeFromString("double"), exprType.width))]
a_str = [self._print(x) for x in a]
b_str = [self._print(x) for x in b]
result = a_str[0]
for item in a_str[1:]:
result = self.instructionSet['*'].format(result, item)
if len(b) > 0:
denominator_str = b_str[0]
for item in b_str[1:]:
denominator_str = self.instructionSet['*'].format(denominator_str, item)
result = self.instructionSet['/'].format(result, denominator_str)
if insideAdd:
return sign, result
else:
if sign < 0:
return self.instructionSet['*'].format(self._print(S.NegativeOne), result)
else:
return result
def _print_Relational(self, expr):
result = self._scalarFallback('_print_Relational', expr)
if result:
return result
return self.instructionSet[expr.rel_op].format(self._print(expr.lhs), self._print(expr.rhs))
def _print_Equality(self, expr):
result = self._scalarFallback('_print_Equality', expr)
if result:
return result
return self.instructionSet['=='].format(self._print(expr.lhs), self._print(expr.rhs))
def _print_Piecewise(self, expr):
result = self._scalarFallback('_print_Piecewise', expr)
if result:
return result
if expr.args[-1].cond != True:
# We need the last conditional to be a True, otherwise the resulting
# function may not return a result.
raise ValueError("All Piecewise expressions must contain an "
"(expr, True) statement to be used as a default "
"condition. Without one, the generated "
"expression may not evaluate to anything under "
"some condition.")
result = self._print(expr.args[-1][0])
for trueExpr, condition in reversed(expr.args[:-1]):
result = self.instructionSet['blendv'].format(result, self._print(trueExpr), self._print(condition))
return result
from sympy.printing.printer import Printer
from graphviz import Digraph, lang
import graphviz
class DotPrinter(Printer):
"""
A printer which converts ast to DOT (graph description language).
"""
def __init__(self, nodeToStrFunction, full, **kwargs):
super(DotPrinter, self).__init__()
self._nodeToStrFunction = nodeToStrFunction
self.full = full
self.dot = Digraph(**kwargs)
self.dot.quote_edge = lang.quote
def _print_KernelFunction(self, function):
self.dot.node(self._nodeToStrFunction(function), style='filled', fillcolor='#a056db', label="Function")
self._print(function.body)
self.dot.edge(self._nodeToStrFunction(function), self._nodeToStrFunction(function.body))
def _print_LoopOverCoordinate(self, loop):
self.dot.node(self._nodeToStrFunction(loop), style='filled', fillcolor='#3498db')
self._print(loop.body)
self.dot.edge(self._nodeToStrFunction(loop), self._nodeToStrFunction(loop.body))
def _print_Block(self, block):
for node in block.args:
self._print(node)
self.dot.node(self._nodeToStrFunction(block), style='filled', fillcolor='#dbc256', label=repr(block))
for node in block.args:
self.dot.edge(self._nodeToStrFunction(block), self._nodeToStrFunction(node))
def _print_SympyAssignment(self, assignment):
self.dot.node(self._nodeToStrFunction(assignment), style='filled', fillcolor='#56db7f')
if self.full:
for node in assignment.args:
self._print(node)
for node in assignment.args:
self.dot.edge(self._nodeToStrFunction(assignment), self._nodeToStrFunction(node))
def emptyPrinter(self, expr):
if self.full:
self.dot.node(self._nodeToStrFunction(expr))
for node in expr.args:
self._print(node)
for node in expr.args:
self.dot.edge(self._nodeToStrFunction(expr), self._nodeToStrFunction(node))
else:
raise NotImplemented('Dotprinter cannot print', expr)
def doprint(self, expr):
self._print(expr)
return self.dot.source
def __shortened(node):
from pystencils.astnodes import LoopOverCoordinate, KernelFunction, SympyAssignment, Block
if isinstance(node, LoopOverCoordinate):
return "Loop over dim %d" % (node.coordinateToLoopOver,)
elif isinstance(node, KernelFunction):
params = [f.name for f in node.fieldsAccessed]
params += [p.name for p in node.parameters if not p.isFieldArgument]
return "Func: %s (%s)" % (node.functionName, ",".join(params))
elif isinstance(node, SympyAssignment):
return repr(node.lhs)
elif isinstance(node, Block):
return "Block" + str(id(node))
else:
raise NotImplementedError("Cannot handle node type %s" % (type(node),))
def dotprint(node, view=False, short=False, full=False, **kwargs):
"""
Returns a string which can be used to generate a DOT-graph
:param node: The ast which should be generated
:param view: Boolen, if rendering of the image directly should occur.
:param short: Uses the __shortened output
:param full: Prints the whole tree with type information
:param kwargs: is directly passed to the DotPrinter class: http://graphviz.readthedocs.io/en/latest/api.html#digraph
:return: string in DOT format
"""
nodeToStrFunction = repr
if short:
nodeToStrFunction = __shortened
elif full:
nodeToStrFunction = lambda expr: repr(type(expr)) + repr(expr)
printer = DotPrinter(nodeToStrFunction, full, **kwargs)
dot = printer.doprint(node)
if view:
return graphviz.Source(dot)
return dot
if __name__ == "__main__":
from pystencils import Field
import sympy as sp
imgField = Field.createGeneric('I',
spatialDimensions=2, # 2D image
indexDimensions=1) # multiple values per pixel: e.g. RGB
w1, w2 = sp.symbols("w_1 w_2")
sobelX = -w2 * imgField[-1, 0](1) - w1 * imgField[-1, -1](1) - w1 * imgField[-1, +1](1) \
+ w2 * imgField[+1, 0](1) + w1 * imgField[+1, -1](1) - w1 * imgField[+1, +1](1)
sobelX
dstField = Field.createGeneric('dst', spatialDimensions=2, indexDimensions=0)
updateRule = sp.Eq(dstField[0, 0], sobelX)
updateRule
from pystencils import createKernel
ast = createKernel([updateRule])
print(dotprint(ast, short=True))
{
"version" : 1,
"disable_existing_loggers" : false,
"formatters" : {
"simple" :{
"format" : "[%(levelname)s]: %(message)s"
}
},
"handlers" : {
"console": {
"class": "logging.StreamHandler",
"level": "INFO",
"formatter": "simple",
"stream": "ext://sys.stdout"
},
"log_file": {
"class": "logging.FileHandler",
"level": "DEBUG",
"formatter": "simple",
"filename": "gen.log",
"mode" : "w",
"encoding": "utf8"
}
},
"loggers" : {
"generator" : {
"level" : "DEBUG",
"handlers" : ["console", "log_file"]
}
}
}
\ No newline at end of file
def x86VectorInstructionSet(dataType='double', instructionSet='avx'):
baseNames = {
'+': 'add[0, 1]',
'-': 'sub[0, 1]',
'*': 'mul[0, 1]',
'/': 'div[0, 1]',
'==': 'cmp[0, 1, _CMP_EQ_UQ ]',
'!=': 'cmp[0, 1, _CMP_NEQ_UQ ]',
'>=': 'cmp[0, 1, _CMP_GE_OQ ]',
'<=': 'cmp[0, 1, _CMP_LE_OQ ]',
'<': 'cmp[0, 1, _CMP_NGE_UQ ]',
'>': 'cmp[0, 1, _CMP_NLE_UQ ]',
'&': 'and[0, 1]',
'|': 'or[0, 1]',
'blendv': 'blendv[0, 1, 2]',
'sqrt': 'sqrt[0]',
'makeVec': 'set[0,0,0,0]',
'makeZero': 'setzero[]',
'loadU': 'loadu[0]',
'loadA': 'load[0]',
'storeU': 'storeu[0,1]',
'storeA': 'store [0,1]',
}
headers = {
'avx': ['<immintrin.h>'],
'sse': ['<xmmintrin.h>', '<emmintrin.h>', '<pmmintrin.h>', '<tmmintrin.h>', '<smmintrin.h>', '<nmmintrin.h>']
}
suffix = {
'double': 'pd',
'float': 'ps',
}
prefix = {
'sse': '_mm',
'avx': '_mm256',
'avx512': '_mm512',
}
width = {
("double", "sse"): 2,
("float", "sse"): 4,
("double", "avx"): 4,
("float", "avx"): 8,
("double", "avx512"): 8,
("float", "avx512"): 16,
}
result = {}
pre = prefix[instructionSet]
suf = suffix[dataType]
for intrinsicId, functionShortcut in baseNames.items():
functionShortcut = functionShortcut.strip()
name = functionShortcut[:functionShortcut.index('[')]
args = functionShortcut[functionShortcut.index('[') + 1: -1]
argString = "("
for arg in args.split(","):
arg = arg.strip()
if not arg:
continue
if arg in ('0', '1', '2', '3', '4', '5'):
argString += "{" + arg + "},"
else:
argString += arg + ","
argString = argString[:-1] + ")"
result[intrinsicId] = pre + "_" + name + "_" + suf + argString
result['width'] = width[(dataType, instructionSet)]
result['dataTypePrefix'] = {
'double': "_" + pre + 'd',
'float': "_" + pre,
}
bitWidth = result['width'] * 64
result['double'] = "__m%dd" % (bitWidth,)
result['float'] = "__m%d" % (bitWidth,)
result['int'] = "__m%di" % (bitWidth,)
result['bool'] = "__m%dd" % (bitWidth,)
result['headers'] = headers[instructionSet]
return result
selectedInstructionSet = {
'float': x86VectorInstructionSet('float', 'avx'),
'double': x86VectorInstructionSet('double', 'avx'),
}
build-essential
graphviz
ffmpeg
# ----------------------------------------------------------------------------------------------------------------------
# Environment with all dependencies to use pystencils
#
#
# Download conda at https://conda.io/miniconda.html and create this environment by running:
# conda env create -f conda_environment_user.yml
# . activate pystencils
#
# If you have CUDA or ROCm installed and want to use your GPU, uncomment the last line to install cupy
#
# ----------------------------------------------------------------------------------------------------------------------
name: pystencils
dependencies:
# Basic dependencies:
- python >= 3.8
- numpy
- sympy >= 1.1
- appdirs # to find default cache directory on each platform
- joblib # caching on hard-disk, this is optional, but if not installed lbmpy is really slow
- cython # speed up boundary list computation (optional)
- matplotlib
- imageio
- pandas
- scipy
- pip
- pip:
- islpy # used to optimize staggered kernels
- py-cpuinfo # get cpu info like cache sizes, supported vector instruction sets, ...
- graphviz # can show abstract syntax trees as formatted graphs
- ipy_table # HTML tables for jupyter notebooks
- pyevtk # VTK output for serial simulations
- blitzdb # file-based No-SQL database to store simulation results
#- cupy # add this if you have CUDA or ROCm installed