Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 372 additions and 2180 deletions
// An implementation of C++ std::complex for use on CUDA devices.
// Written by John C. Travers <jtravs@gmail.com> (2012)
//
// Missing:
// - long double support (not supported on CUDA)
// - some integral pow functions (due to lack of C++11 support on CUDA)
//
// Heavily derived from the LLVM libcpp project (svn revision 147853).
// Based on libcxx/include/complex.
// The git history contains the complete change history from the original.
// The modifications are licensed as per the original LLVM license below.
//
// -*- C++ -*-
//===--------------------------- complex ----------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
extern "C++" {
#ifndef CUDA_COMPLEX_HPP
#define CUDA_COMPLEX_HPP
#ifdef __CUDACC__
#define CUDA_CALLABLE_MEMBER __host__ __device__
#else
#define CUDA_CALLABLE_MEMBER
#endif
/*
complex synopsis
template<class T>
class complex
{
public:
typedef T value_type;
complex(const T& re = T(), const T& im = T());
complex(const complex&);
template<class X> complex(const complex<X>&);
T real() const;
T imag() const;
void real(T);
void imag(T);
complex<T>& operator= (const T&);
complex<T>& operator+=(const T&);
complex<T>& operator-=(const T&);
complex<T>& operator*=(const T&);
complex<T>& operator/=(const T&);
complex& operator=(const complex&);
template<class X> complex<T>& operator= (const complex<X>&);
template<class X> complex<T>& operator+=(const complex<X>&);
template<class X> complex<T>& operator-=(const complex<X>&);
template<class X> complex<T>& operator*=(const complex<X>&);
template<class X> complex<T>& operator/=(const complex<X>&);
};
template<>
class complex<float>
{
public:
typedef float value_type;
constexpr complex(float re = 0.0f, float im = 0.0f);
explicit constexpr complex(const complex<double>&);
constexpr float real() const;
void real(float);
constexpr float imag() const;
void imag(float);
complex<float>& operator= (float);
complex<float>& operator+=(float);
complex<float>& operator-=(float);
complex<float>& operator*=(float);
complex<float>& operator/=(float);
complex<float>& operator=(const complex<float>&);
template<class X> complex<float>& operator= (const complex<X>&);
template<class X> complex<float>& operator+=(const complex<X>&);
template<class X> complex<float>& operator-=(const complex<X>&);
template<class X> complex<float>& operator*=(const complex<X>&);
template<class X> complex<float>& operator/=(const complex<X>&);
};
template<>
class complex<double>
{
public:
typedef double value_type;
constexpr complex(double re = 0.0, double im = 0.0);
constexpr complex(const complex<float>&);
constexpr double real() const;
void real(double);
constexpr double imag() const;
void imag(double);
complex<double>& operator= (double);
complex<double>& operator+=(double);
complex<double>& operator-=(double);
complex<double>& operator*=(double);
complex<double>& operator/=(double);
complex<double>& operator=(const complex<double>&);
template<class X> complex<double>& operator= (const complex<X>&);
template<class X> complex<double>& operator+=(const complex<X>&);
template<class X> complex<double>& operator-=(const complex<X>&);
template<class X> complex<double>& operator*=(const complex<X>&);
template<class X> complex<double>& operator/=(const complex<X>&);
};
// 26.3.6 operators:
template<class T> complex<T> operator+(const complex<T>&, const complex<T>&);
template<class T> complex<T> operator+(const complex<T>&, const T&);
template<class T> complex<T> operator+(const T&, const complex<T>&);
template<class T> complex<T> operator-(const complex<T>&, const complex<T>&);
template<class T> complex<T> operator-(const complex<T>&, const T&);
template<class T> complex<T> operator-(const T&, const complex<T>&);
template<class T> complex<T> operator*(const complex<T>&, const complex<T>&);
template<class T> complex<T> operator*(const complex<T>&, const T&);
template<class T> complex<T> operator*(const T&, const complex<T>&);
template<class T> complex<T> operator/(const complex<T>&, const complex<T>&);
template<class T> complex<T> operator/(const complex<T>&, const T&);
template<class T> complex<T> operator/(const T&, const complex<T>&);
template<class T> complex<T> operator+(const complex<T>&);
template<class T> complex<T> operator-(const complex<T>&);
template<class T> bool operator==(const complex<T>&, const complex<T>&);
template<class T> bool operator==(const complex<T>&, const T&);
template<class T> bool operator==(const T&, const complex<T>&);
template<class T> bool operator!=(const complex<T>&, const complex<T>&);
template<class T> bool operator!=(const complex<T>&, const T&);
template<class T> bool operator!=(const T&, const complex<T>&);
template<class T, class charT, class traits>
basic_istream<charT, traits>&
operator>>(basic_istream<charT, traits>&, complex<T>&);
template<class T, class charT, class traits>
basic_ostream<charT, traits>&
operator<<(basic_ostream<charT, traits>&, const complex<T>&);
// 26.3.7 values:
template<class T> T real(const complex<T>&);
double real(double);
template<Integral T> double real(T);
float real(float);
template<class T> T imag(const complex<T>&);
double imag(double);
template<Integral T> double imag(T);
float imag(float);
template<class T> T abs(const complex<T>&);
template<class T> T arg(const complex<T>&);
double arg(double);
template<Integral T> double arg(T);
float arg(float);
template<class T> T norm(const complex<T>&);
double norm(double);
template<Integral T> double norm(T);
float norm(float);
template<class T> complex<T> conj(const complex<T>&);
complex<double> conj(double);
template<Integral T> complex<double> conj(T);
complex<float> conj(float);
template<class T> complex<T> proj(const complex<T>&);
complex<double> proj(double);
template<Integral T> complex<double> proj(T);
complex<float> proj(float);
template<class T> complex<T> polar(const T&, const T& = 0);
// 26.3.8 transcendentals:
template<class T> complex<T> acos(const complex<T>&);
template<class T> complex<T> asin(const complex<T>&);
template<class T> complex<T> atan(const complex<T>&);
template<class T> complex<T> acosh(const complex<T>&);
template<class T> complex<T> asinh(const complex<T>&);
template<class T> complex<T> atanh(const complex<T>&);
template<class T> complex<T> cos (const complex<T>&);
template<class T> complex<T> cosh (const complex<T>&);
template<class T> complex<T> exp (const complex<T>&);
template<class T> complex<T> log (const complex<T>&);
template<class T> complex<T> log10(const complex<T>&);
template<class T> complex<T> pow(const complex<T>&, const T&);
template<class T> complex<T> pow(const complex<T>&, const complex<T>&);
template<class T> complex<T> pow(const T&, const complex<T>&);
template<class T> complex<T> sin (const complex<T>&);
template<class T> complex<T> sinh (const complex<T>&);
template<class T> complex<T> sqrt (const complex<T>&);
template<class T> complex<T> tan (const complex<T>&);
template<class T> complex<T> tanh (const complex<T>&);
template<class T, class charT, class traits>
basic_istream<charT, traits>&
operator>>(basic_istream<charT, traits>& is, complex<T>& x);
template<class T, class charT, class traits>
basic_ostream<charT, traits>&
operator<<(basic_ostream<charT, traits>& o, const complex<T>& x);
*/
#include <math.h>
#include <sstream>
template <class _Tp> class complex;
template <class _Tp>
complex<_Tp> operator*(const complex<_Tp> &__z, const complex<_Tp> &__w);
template <class _Tp>
complex<_Tp> operator/(const complex<_Tp> &__x, const complex<_Tp> &__y);
template <class _Tp> class complex {
public:
typedef _Tp value_type;
private:
value_type __re_;
value_type __im_;
public:
CUDA_CALLABLE_MEMBER
complex(const value_type &__re = value_type(),
const value_type &__im = value_type())
: __re_(__re), __im_(__im) {}
template <class _Xp>
CUDA_CALLABLE_MEMBER complex(const complex<_Xp> &__c)
: __re_(__c.real()), __im_(__c.imag()) {}
CUDA_CALLABLE_MEMBER value_type real() const { return __re_; }
CUDA_CALLABLE_MEMBER value_type imag() const { return __im_; }
CUDA_CALLABLE_MEMBER void real(value_type __re) { __re_ = __re; }
CUDA_CALLABLE_MEMBER void imag(value_type __im) { __im_ = __im; }
CUDA_CALLABLE_MEMBER complex &operator=(const value_type &__re) {
__re_ = __re;
__im_ = value_type();
return *this;
}
CUDA_CALLABLE_MEMBER complex &operator+=(const value_type &__re) {
__re_ += __re;
return *this;
}
CUDA_CALLABLE_MEMBER complex &operator-=(const value_type &__re) {
__re_ -= __re;
return *this;
}
CUDA_CALLABLE_MEMBER complex &operator*=(const value_type &__re) {
__re_ *= __re;
__im_ *= __re;
return *this;
}
CUDA_CALLABLE_MEMBER complex &operator/=(const value_type &__re) {
__re_ /= __re;
__im_ /= __re;
return *this;
}
template <class _Xp>
CUDA_CALLABLE_MEMBER complex &operator=(const complex<_Xp> &__c) {
__re_ = __c.real();
__im_ = __c.imag();
return *this;
}
template <class _Xp>
CUDA_CALLABLE_MEMBER complex &operator+=(const complex<_Xp> &__c) {
__re_ += __c.real();
__im_ += __c.imag();
return *this;
}
template <class _Xp>
CUDA_CALLABLE_MEMBER complex &operator-=(const complex<_Xp> &__c) {
__re_ -= __c.real();
__im_ -= __c.imag();
return *this;
}
template <class _Xp>
CUDA_CALLABLE_MEMBER complex &operator*=(const complex<_Xp> &__c) {
*this = *this * __c;
return *this;
}
template <class _Xp>
CUDA_CALLABLE_MEMBER complex &operator/=(const complex<_Xp> &__c) {
*this = *this / __c;
return *this;
}
};
template <> class complex<double>;
template <> class complex<float> {
float __re_;
float __im_;
public:
typedef float value_type;
/*constexpr*/ CUDA_CALLABLE_MEMBER complex(float __re = 0.0f,
float __im = 0.0f)
: __re_(__re), __im_(__im) {}
explicit /*constexpr*/ complex(const complex<double> &__c);
/*constexpr*/ CUDA_CALLABLE_MEMBER float real() const { return __re_; }
/*constexpr*/ CUDA_CALLABLE_MEMBER float imag() const { return __im_; }
CUDA_CALLABLE_MEMBER void real(value_type __re) { __re_ = __re; }
CUDA_CALLABLE_MEMBER void imag(value_type __im) { __im_ = __im; }
CUDA_CALLABLE_MEMBER complex &operator=(float __re) {
__re_ = __re;
__im_ = value_type();
return *this;
}
CUDA_CALLABLE_MEMBER complex &operator+=(float __re) {
__re_ += __re;
return *this;
}
CUDA_CALLABLE_MEMBER complex &operator-=(float __re) {
__re_ -= __re;
return *this;
}
CUDA_CALLABLE_MEMBER complex &operator*=(float __re) {
__re_ *= __re;
__im_ *= __re;
return *this;
}
CUDA_CALLABLE_MEMBER complex &operator/=(float __re) {
__re_ /= __re;
__im_ /= __re;
return *this;
}
template <class _Xp>
CUDA_CALLABLE_MEMBER complex &operator=(const complex<_Xp> &__c) {
__re_ = __c.real();
__im_ = __c.imag();
return *this;
}
template <class _Xp>
CUDA_CALLABLE_MEMBER complex &operator+=(const complex<_Xp> &__c) {
__re_ += __c.real();
__im_ += __c.imag();
return *this;
}
template <class _Xp>
CUDA_CALLABLE_MEMBER complex &operator-=(const complex<_Xp> &__c) {
__re_ -= __c.real();
__im_ -= __c.imag();
return *this;
}
template <class _Xp>
CUDA_CALLABLE_MEMBER complex &operator*=(const complex<_Xp> &__c) {
*this = *this * __c;
return *this;
}
template <class _Xp>
CUDA_CALLABLE_MEMBER complex &operator/=(const complex<_Xp> &__c) {
*this = *this / __c;
return *this;
}
};
template <> class complex<double> {
double __re_;
double __im_;
public:
typedef double value_type;
/*constexpr*/ CUDA_CALLABLE_MEMBER complex(double __re = 0.0,
double __im = 0.0)
: __re_(__re), __im_(__im) {}
/*constexpr*/ complex(const complex<float> &__c);
/*constexpr*/ CUDA_CALLABLE_MEMBER double real() const { return __re_; }
/*constexpr*/ CUDA_CALLABLE_MEMBER double imag() const { return __im_; }
CUDA_CALLABLE_MEMBER void real(value_type __re) { __re_ = __re; }
CUDA_CALLABLE_MEMBER void imag(value_type __im) { __im_ = __im; }
CUDA_CALLABLE_MEMBER complex &operator=(double __re) {
__re_ = __re;
__im_ = value_type();
return *this;
}
CUDA_CALLABLE_MEMBER complex &operator+=(double __re) {
__re_ += __re;
return *this;
}
CUDA_CALLABLE_MEMBER complex &operator-=(double __re) {
__re_ -= __re;
return *this;
}
CUDA_CALLABLE_MEMBER complex &operator*=(double __re) {
__re_ *= __re;
__im_ *= __re;
return *this;
}
CUDA_CALLABLE_MEMBER complex &operator/=(double __re) {
__re_ /= __re;
__im_ /= __re;
return *this;
}
template <class _Xp>
CUDA_CALLABLE_MEMBER complex &operator=(const complex<_Xp> &__c) {
__re_ = __c.real();
__im_ = __c.imag();
return *this;
}
template <class _Xp>
CUDA_CALLABLE_MEMBER complex &operator+=(const complex<_Xp> &__c) {
__re_ += __c.real();
__im_ += __c.imag();
return *this;
}
template <class _Xp>
CUDA_CALLABLE_MEMBER complex &operator-=(const complex<_Xp> &__c) {
__re_ -= __c.real();
__im_ -= __c.imag();
return *this;
}
template <class _Xp>
CUDA_CALLABLE_MEMBER complex &operator*=(const complex<_Xp> &__c) {
*this = *this * __c;
return *this;
}
template <class _Xp>
CUDA_CALLABLE_MEMBER complex &operator/=(const complex<_Xp> &__c) {
*this = *this / __c;
return *this;
}
};
// constexpr
inline CUDA_CALLABLE_MEMBER complex<float>::complex(const complex<double> &__c)
: __re_(__c.real()), __im_(__c.imag()) {}
// constexpr
inline CUDA_CALLABLE_MEMBER complex<double>::complex(const complex<float> &__c)
: __re_(__c.real()), __im_(__c.imag()) {}
// 26.3.6 operators:
template <class _Tp>
inline CUDA_CALLABLE_MEMBER complex<_Tp> operator+(const complex<_Tp> &__x,
const complex<_Tp> &__y) {
complex<_Tp> __t(__x);
__t += __y;
return __t;
}
template <class _Tp>
inline CUDA_CALLABLE_MEMBER complex<_Tp> operator+(const complex<_Tp> &__x,
const _Tp &__y) {
complex<_Tp> __t(__x);
__t += __y;
return __t;
}
template <class _Tp>
inline CUDA_CALLABLE_MEMBER complex<_Tp> operator+(const _Tp &__x,
const complex<_Tp> &__y) {
complex<_Tp> __t(__y);
__t += __x;
return __t;
}
template <class _Tp>
inline CUDA_CALLABLE_MEMBER complex<_Tp> operator-(const complex<_Tp> &__x,
const complex<_Tp> &__y) {
complex<_Tp> __t(__x);
__t -= __y;
return __t;
}
template <class _Tp>
inline CUDA_CALLABLE_MEMBER complex<_Tp> operator-(const complex<_Tp> &__x,
const _Tp &__y) {
complex<_Tp> __t(__x);
__t -= __y;
return __t;
}
template <class _Tp>
inline CUDA_CALLABLE_MEMBER complex<_Tp> operator-(const _Tp &__x,
const complex<_Tp> &__y) {
complex<_Tp> __t(-__y);
__t += __x;
return __t;
}
template <class _Tp>
CUDA_CALLABLE_MEMBER complex<_Tp> operator*(const complex<_Tp> &__z,
const complex<_Tp> &__w) {
_Tp __a = __z.real();
_Tp __b = __z.imag();
_Tp __c = __w.real();
_Tp __d = __w.imag();
_Tp __ac = __a * __c;
_Tp __bd = __b * __d;
_Tp __ad = __a * __d;
_Tp __bc = __b * __c;
_Tp __x = __ac - __bd;
_Tp __y = __ad + __bc;
if (isnan(__x) && isnan(__y)) {
bool __recalc = false;
if (isinf(__a) || isinf(__b)) {
__a = copysign(isinf(__a) ? _Tp(1) : _Tp(0), __a);
__b = copysign(isinf(__b) ? _Tp(1) : _Tp(0), __b);
if (isnan(__c))
__c = copysign(_Tp(0), __c);
if (isnan(__d))
__d = copysign(_Tp(0), __d);
__recalc = true;
}
if (isinf(__c) || isinf(__d)) {
__c = copysign(isinf(__c) ? _Tp(1) : _Tp(0), __c);
__d = copysign(isinf(__d) ? _Tp(1) : _Tp(0), __d);
if (isnan(__a))
__a = copysign(_Tp(0), __a);
if (isnan(__b))
__b = copysign(_Tp(0), __b);
__recalc = true;
}
if (!__recalc &&
(isinf(__ac) || isinf(__bd) || isinf(__ad) || isinf(__bc))) {
if (isnan(__a))
__a = copysign(_Tp(0), __a);
if (isnan(__b))
__b = copysign(_Tp(0), __b);
if (isnan(__c))
__c = copysign(_Tp(0), __c);
if (isnan(__d))
__d = copysign(_Tp(0), __d);
__recalc = true;
}
if (__recalc) {
__x = _Tp(INFINITY) * (__a * __c - __b * __d);
__y = _Tp(INFINITY) * (__a * __d + __b * __c);
}
}
return complex<_Tp>(__x, __y);
}
template <class _Tp>
inline CUDA_CALLABLE_MEMBER complex<_Tp> operator*(const complex<_Tp> &__x,
const _Tp &__y) {
complex<_Tp> __t(__x);
__t *= __y;
return __t;
}
template <class _Tp>
inline CUDA_CALLABLE_MEMBER complex<_Tp> operator*(const _Tp &__x,
const complex<_Tp> &__y) {
complex<_Tp> __t(__y);
__t *= __x;
return __t;
}
template <class _Tp>
CUDA_CALLABLE_MEMBER complex<_Tp> operator/(const complex<_Tp> &__z,
const complex<_Tp> &__w) {
int __ilogbw = 0;
_Tp __a = __z.real();
_Tp __b = __z.imag();
_Tp __c = __w.real();
_Tp __d = __w.imag();
_Tp __logbw = logb(fmax(fabs(__c), fabs(__d)));
if (isfinite(__logbw)) {
__ilogbw = static_cast<int>(__logbw);
__c = scalbn(__c, -__ilogbw);
__d = scalbn(__d, -__ilogbw);
}
_Tp __denom = __c * __c + __d * __d;
_Tp __x = scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
_Tp __y = scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
if (isnan(__x) && isnan(__y)) {
if ((__denom == _Tp(0)) && (!isnan(__a) || !isnan(__b))) {
__x = copysign(_Tp(INFINITY), __c) * __a;
__y = copysign(_Tp(INFINITY), __c) * __b;
} else if ((isinf(__a) || isinf(__b)) && isfinite(__c) && isfinite(__d)) {
__a = copysign(isinf(__a) ? _Tp(1) : _Tp(0), __a);
__b = copysign(isinf(__b) ? _Tp(1) : _Tp(0), __b);
__x = _Tp(INFINITY) * (__a * __c + __b * __d);
__y = _Tp(INFINITY) * (__b * __c - __a * __d);
} else if (isinf(__logbw) && __logbw > _Tp(0) && isfinite(__a) &&
isfinite(__b)) {
__c = copysign(isinf(__c) ? _Tp(1) : _Tp(0), __c);
__d = copysign(isinf(__d) ? _Tp(1) : _Tp(0), __d);
__x = _Tp(0) * (__a * __c + __b * __d);
__y = _Tp(0) * (__b * __c - __a * __d);
}
}
return complex<_Tp>(__x, __y);
}
template <>
CUDA_CALLABLE_MEMBER complex<float> operator/(const complex<float> &__z,
const complex<float> &__w) {
int __ilogbw = 0;
float __a = __z.real();
float __b = __z.imag();
float __c = __w.real();
float __d = __w.imag();
float __logbw = logbf(fmaxf(fabsf(__c), fabsf(__d)));
if (isfinite(__logbw)) {
__ilogbw = static_cast<int>(__logbw);
__c = scalbnf(__c, -__ilogbw);
__d = scalbnf(__d, -__ilogbw);
}
float __denom = __c * __c + __d * __d;
float __x = scalbnf((__a * __c + __b * __d) / __denom, -__ilogbw);
float __y = scalbnf((__b * __c - __a * __d) / __denom, -__ilogbw);
if (isnan(__x) && isnan(__y)) {
if ((__denom == float(0)) && (!isnan(__a) || !isnan(__b))) {
#pragma warning(suppress : 4756) // Ignore INFINITY related warning
__x = copysignf(INFINITY, __c) * __a;
#pragma warning(suppress : 4756) // Ignore INFINITY related warning
__y = copysignf(INFINITY, __c) * __b;
} else if ((isinf(__a) || isinf(__b)) && isfinite(__c) && isfinite(__d)) {
__a = copysignf(isinf(__a) ? float(1) : float(0), __a);
__b = copysignf(isinf(__b) ? float(1) : float(0), __b);
#pragma warning(suppress : 4756) // Ignore INFINITY related warning
__x = INFINITY * (__a * __c + __b * __d);
#pragma warning(suppress : 4756) // Ignore INFINITY related warning
__y = INFINITY * (__b * __c - __a * __d);
} else if (isinf(__logbw) && __logbw > float(0) && isfinite(__a) &&
isfinite(__b)) {
__c = copysignf(isinf(__c) ? float(1) : float(0), __c);
__d = copysignf(isinf(__d) ? float(1) : float(0), __d);
__x = float(0) * (__a * __c + __b * __d);
__y = float(0) * (__b * __c - __a * __d);
}
}
return complex<float>(__x, __y);
}
template <class _Tp>
inline CUDA_CALLABLE_MEMBER complex<_Tp> operator/(const complex<_Tp> &__x,
const _Tp &__y) {
return complex<_Tp>(__x.real() / __y, __x.imag() / __y);
}
template <class _Tp>
inline CUDA_CALLABLE_MEMBER complex<_Tp> operator/(const _Tp &__x,
const complex<_Tp> &__y) {
complex<_Tp> __t(__x);
__t /= __y;
return __t;
}
template <class _Tp>
inline CUDA_CALLABLE_MEMBER complex<_Tp> operator+(const complex<_Tp> &__x) {
return __x;
}
template <class _Tp>
inline CUDA_CALLABLE_MEMBER complex<_Tp> operator-(const complex<_Tp> &__x) {
return complex<_Tp>(-__x.real(), -__x.imag());
}
template <class _Tp>
inline CUDA_CALLABLE_MEMBER bool operator==(const complex<_Tp> &__x,
const complex<_Tp> &__y) {
return __x.real() == __y.real() && __x.imag() == __y.imag();
}
template <class _Tp>
inline CUDA_CALLABLE_MEMBER bool operator==(const complex<_Tp> &__x,
const _Tp &__y) {
return __x.real() == __y && __x.imag() == 0;
}
template <class _Tp>
inline CUDA_CALLABLE_MEMBER bool operator==(const _Tp &__x,
const complex<_Tp> &__y) {
return __x == __y.real() && 0 == __y.imag();
}
template <class _Tp>
inline CUDA_CALLABLE_MEMBER bool operator!=(const complex<_Tp> &__x,
const complex<_Tp> &__y) {
return !(__x == __y);
}
template <class _Tp>
inline CUDA_CALLABLE_MEMBER bool operator!=(const complex<_Tp> &__x,
const _Tp &__y) {
return !(__x == __y);
}
template <class _Tp>
inline CUDA_CALLABLE_MEMBER bool operator!=(const _Tp &__x,
const complex<_Tp> &__y) {
return !(__x == __y);
}
// 26.3.7 values:
// real
template <class _Tp>
inline CUDA_CALLABLE_MEMBER _Tp real(const complex<_Tp> &__c) {
return __c.real();
}
inline CUDA_CALLABLE_MEMBER double real(double __re) { return __re; }
inline CUDA_CALLABLE_MEMBER float real(float __re) { return __re; }
// imag
template <class _Tp>
inline CUDA_CALLABLE_MEMBER _Tp imag(const complex<_Tp> &__c) {
return __c.imag();
}
inline CUDA_CALLABLE_MEMBER double imag(double __re) { return 0; }
inline CUDA_CALLABLE_MEMBER float imag(float __re) { return 0; }
// abs
template <class _Tp>
inline CUDA_CALLABLE_MEMBER _Tp abs(const complex<_Tp> &__c) {
return hypot(__c.real(), __c.imag());
}
// arg
template <class _Tp>
inline CUDA_CALLABLE_MEMBER _Tp arg(const complex<_Tp> &__c) {
return atan2(__c.imag(), __c.real());
}
inline CUDA_CALLABLE_MEMBER double arg(double __re) { return atan2(0., __re); }
inline CUDA_CALLABLE_MEMBER float arg(float __re) { return atan2f(0.F, __re); }
// norm
template <class _Tp>
inline CUDA_CALLABLE_MEMBER _Tp norm(const complex<_Tp> &__c) {
if (isinf(__c.real()))
return fabs(__c.real());
if (isinf(__c.imag()))
return fabs(__c.imag());
return __c.real() * __c.real() + __c.imag() * __c.imag();
}
inline CUDA_CALLABLE_MEMBER double norm(double __re) { return __re * __re; }
inline CUDA_CALLABLE_MEMBER float norm(float __re) { return __re * __re; }
// conj
template <class _Tp>
inline CUDA_CALLABLE_MEMBER complex<_Tp> conj(const complex<_Tp> &__c) {
return complex<_Tp>(__c.real(), -__c.imag());
}
inline CUDA_CALLABLE_MEMBER complex<double> conj(double __re) {
return complex<double>(__re);
}
inline CUDA_CALLABLE_MEMBER complex<float> conj(float __re) {
return complex<float>(__re);
}
// proj
template <class _Tp>
inline CUDA_CALLABLE_MEMBER complex<_Tp> proj(const complex<_Tp> &__c) {
complex<_Tp> __r = __c;
if (isinf(__c.real()) || isinf(__c.imag()))
__r = complex<_Tp>(INFINITY, copysign(_Tp(0), __c.imag()));
return __r;
}
inline CUDA_CALLABLE_MEMBER complex<double> proj(double __re) {
if (isinf(__re))
__re = fabs(__re);
return complex<double>(__re);
}
inline CUDA_CALLABLE_MEMBER complex<float> proj(float __re) {
if (isinf(__re))
__re = fabs(__re);
return complex<float>(__re);
}
// polar
template <class _Tp>
CUDA_CALLABLE_MEMBER complex<_Tp> polar(const _Tp &__rho,
const _Tp &__theta = _Tp(0)) {
if (isnan(__rho) || signbit(__rho))
return complex<_Tp>(_Tp(NAN), _Tp(NAN));
if (isnan(__theta)) {
if (isinf(__rho))
return complex<_Tp>(__rho, __theta);
return complex<_Tp>(__theta, __theta);
}
if (isinf(__theta)) {
if (isinf(__rho))
return complex<_Tp>(__rho, _Tp(NAN));
return complex<_Tp>(_Tp(NAN), _Tp(NAN));
}
_Tp __x = __rho * cos(__theta);
if (isnan(__x))
__x = 0;
_Tp __y = __rho * sin(__theta);
if (isnan(__y))
__y = 0;
return complex<_Tp>(__x, __y);
}
// log
template <class _Tp>
inline CUDA_CALLABLE_MEMBER complex<_Tp> log(const complex<_Tp> &__x) {
return complex<_Tp>(log(abs(__x)), arg(__x));
}
// log10
template <class _Tp>
inline CUDA_CALLABLE_MEMBER complex<_Tp> log10(const complex<_Tp> &__x) {
return log(__x) / log(_Tp(10));
}
// sqrt
template <class _Tp>
CUDA_CALLABLE_MEMBER complex<_Tp> sqrt(const complex<_Tp> &__x) {
if (isinf(__x.imag()))
return complex<_Tp>(_Tp(INFINITY), __x.imag());
if (isinf(__x.real())) {
if (__x.real() > _Tp(0))
return complex<_Tp>(__x.real(), isnan(__x.imag())
? __x.imag()
: copysign(_Tp(0), __x.imag()));
return complex<_Tp>(isnan(__x.imag()) ? __x.imag() : _Tp(0),
copysign(__x.real(), __x.imag()));
}
return polar(sqrt(abs(__x)), arg(__x) / _Tp(2));
}
// exp
template <class _Tp>
CUDA_CALLABLE_MEMBER complex<_Tp> exp(const complex<_Tp> &__x) {
_Tp __i = __x.imag();
if (isinf(__x.real())) {
if (__x.real() < _Tp(0)) {
if (!isfinite(__i))
__i = _Tp(1);
} else if (__i == 0 || !isfinite(__i)) {
if (isinf(__i))
__i = _Tp(NAN);
return complex<_Tp>(__x.real(), __i);
}
} else if (isnan(__x.real()) && __x.imag() == 0)
return __x;
_Tp __e = exp(__x.real());
return complex<_Tp>(__e * cos(__i), __e * sin(__i));
}
// pow
template <class _Tp>
inline CUDA_CALLABLE_MEMBER complex<_Tp> pow(const complex<_Tp> &__x,
const complex<_Tp> &__y) {
return exp(__y * log(__x));
}
template <class _Tp>
inline CUDA_CALLABLE_MEMBER complex<_Tp> pow(const complex<_Tp> &__x,
const _Tp &__y) {
return pow(__x, complex<_Tp>(__y));
}
template <class _Tp>
inline CUDA_CALLABLE_MEMBER complex<_Tp> pow(const _Tp &__x,
const complex<_Tp> &__y) {
return pow(complex<_Tp>(__x), __y);
}
// asinh
template <class _Tp>
CUDA_CALLABLE_MEMBER complex<_Tp> asinh(const complex<_Tp> &__x) {
const _Tp __pi(atan2(+0., -0.));
if (isinf(__x.real())) {
if (isnan(__x.imag()))
return __x;
if (isinf(__x.imag()))
return complex<_Tp>(__x.real(), copysign(__pi * _Tp(0.25), __x.imag()));
return complex<_Tp>(__x.real(), copysign(_Tp(0), __x.imag()));
}
if (isnan(__x.real())) {
if (isinf(__x.imag()))
return complex<_Tp>(__x.imag(), __x.real());
if (__x.imag() == 0)
return __x;
return complex<_Tp>(__x.real(), __x.real());
}
if (isinf(__x.imag()))
return complex<_Tp>(copysign(__x.imag(), __x.real()),
copysign(__pi / _Tp(2), __x.imag()));
complex<_Tp> __z = log(__x + sqrt(pow(__x, _Tp(2)) + _Tp(1)));
return complex<_Tp>(copysign(__z.real(), __x.real()),
copysign(__z.imag(), __x.imag()));
}
// acosh
template <class _Tp>
CUDA_CALLABLE_MEMBER complex<_Tp> acosh(const complex<_Tp> &__x) {
const _Tp __pi(atan2(+0., -0.));
if (isinf(__x.real())) {
if (isnan(__x.imag()))
return complex<_Tp>(fabs(__x.real()), __x.imag());
if (isinf(__x.imag()))
if (__x.real() > 0)
return complex<_Tp>(__x.real(), copysign(__pi * _Tp(0.25), __x.imag()));
else
return complex<_Tp>(-__x.real(),
copysign(__pi * _Tp(0.75), __x.imag()));
if (__x.real() < 0)
return complex<_Tp>(-__x.real(), copysign(__pi, __x.imag()));
return complex<_Tp>(__x.real(), copysign(_Tp(0), __x.imag()));
}
if (isnan(__x.real())) {
if (isinf(__x.imag()))
return complex<_Tp>(fabs(__x.imag()), __x.real());
return complex<_Tp>(__x.real(), __x.real());
}
if (isinf(__x.imag()))
return complex<_Tp>(fabs(__x.imag()), copysign(__pi / _Tp(2), __x.imag()));
complex<_Tp> __z = log(__x + sqrt(pow(__x, _Tp(2)) - _Tp(1)));
return complex<_Tp>(copysign(__z.real(), _Tp(0)),
copysign(__z.imag(), __x.imag()));
}
// atanh
template <class _Tp>
CUDA_CALLABLE_MEMBER complex<_Tp> atanh(const complex<_Tp> &__x) {
const _Tp __pi(atan2(+0., -0.));
if (isinf(__x.imag())) {
return complex<_Tp>(copysign(_Tp(0), __x.real()),
copysign(__pi / _Tp(2), __x.imag()));
}
if (isnan(__x.imag())) {
if (isinf(__x.real()) || __x.real() == 0)
return complex<_Tp>(copysign(_Tp(0), __x.real()), __x.imag());
return complex<_Tp>(__x.imag(), __x.imag());
}
if (isnan(__x.real())) {
return complex<_Tp>(__x.real(), __x.real());
}
if (isinf(__x.real())) {
return complex<_Tp>(copysign(_Tp(0), __x.real()),
copysign(__pi / _Tp(2), __x.imag()));
}
if (fabs(__x.real()) == _Tp(1) && __x.imag() == _Tp(0)) {
return complex<_Tp>(copysign(_Tp(INFINITY), __x.real()),
copysign(_Tp(0), __x.imag()));
}
complex<_Tp> __z = log((_Tp(1) + __x) / (_Tp(1) - __x)) / _Tp(2);
return complex<_Tp>(copysign(__z.real(), __x.real()),
copysign(__z.imag(), __x.imag()));
}
// sinh
template <class _Tp>
CUDA_CALLABLE_MEMBER complex<_Tp> sinh(const complex<_Tp> &__x) {
if (isinf(__x.real()) && !isfinite(__x.imag()))
return complex<_Tp>(__x.real(), _Tp(NAN));
if (__x.real() == 0 && !isfinite(__x.imag()))
return complex<_Tp>(__x.real(), _Tp(NAN));
if (__x.imag() == 0 && !isfinite(__x.real()))
return __x;
return complex<_Tp>(sinh(__x.real()) * cos(__x.imag()),
cosh(__x.real()) * sin(__x.imag()));
}
// cosh
template <class _Tp>
CUDA_CALLABLE_MEMBER complex<_Tp> cosh(const complex<_Tp> &__x) {
if (isinf(__x.real()) && !isfinite(__x.imag()))
return complex<_Tp>(fabs(__x.real()), _Tp(NAN));
if (__x.real() == 0 && !isfinite(__x.imag()))
return complex<_Tp>(_Tp(NAN), __x.real());
if (__x.real() == 0 && __x.imag() == 0)
return complex<_Tp>(_Tp(1), __x.imag());
if (__x.imag() == 0 && !isfinite(__x.real()))
return complex<_Tp>(fabs(__x.real()), __x.imag());
return complex<_Tp>(cosh(__x.real()) * cos(__x.imag()),
sinh(__x.real()) * sin(__x.imag()));
}
// tanh
template <class _Tp>
CUDA_CALLABLE_MEMBER complex<_Tp> tanh(const complex<_Tp> &__x) {
if (isinf(__x.real())) {
if (!isfinite(__x.imag()))
return complex<_Tp>(_Tp(1), _Tp(0));
return complex<_Tp>(_Tp(1), copysign(_Tp(0), sin(_Tp(2) * __x.imag())));
}
if (isnan(__x.real()) && __x.imag() == 0)
return __x;
_Tp __2r(_Tp(2) * __x.real());
_Tp __2i(_Tp(2) * __x.imag());
_Tp __d(cosh(__2r) + cos(__2i));
return complex<_Tp>(sinh(__2r) / __d, sin(__2i) / __d);
}
// asin
template <class _Tp>
CUDA_CALLABLE_MEMBER complex<_Tp> asin(const complex<_Tp> &__x) {
complex<_Tp> __z = asinh(complex<_Tp>(-__x.imag(), __x.real()));
return complex<_Tp>(__z.imag(), -__z.real());
}
// acos
template <class _Tp>
CUDA_CALLABLE_MEMBER complex<_Tp> acos(const complex<_Tp> &__x) {
const _Tp __pi(atan2(+0., -0.));
if (isinf(__x.real())) {
if (isnan(__x.imag()))
return complex<_Tp>(__x.imag(), __x.real());
if (isinf(__x.imag())) {
if (__x.real() < _Tp(0))
return complex<_Tp>(_Tp(0.75) * __pi, -__x.imag());
return complex<_Tp>(_Tp(0.25) * __pi, -__x.imag());
}
if (__x.real() < _Tp(0))
return complex<_Tp>(__pi, signbit(__x.imag()) ? -__x.real() : __x.real());
return complex<_Tp>(_Tp(0), signbit(__x.imag()) ? __x.real() : -__x.real());
}
if (isnan(__x.real())) {
if (isinf(__x.imag()))
return complex<_Tp>(__x.real(), -__x.imag());
return complex<_Tp>(__x.real(), __x.real());
}
if (isinf(__x.imag()))
return complex<_Tp>(__pi / _Tp(2), -__x.imag());
if (__x.real() == 0)
return complex<_Tp>(__pi / _Tp(2), -__x.imag());
complex<_Tp> __z = log(__x + sqrt(pow(__x, _Tp(2)) - _Tp(1)));
if (signbit(__x.imag()))
return complex<_Tp>(fabs(__z.imag()), fabs(__z.real()));
return complex<_Tp>(fabs(__z.imag()), -fabs(__z.real()));
}
// atan
template <class _Tp>
CUDA_CALLABLE_MEMBER complex<_Tp> atan(const complex<_Tp> &__x) {
complex<_Tp> __z = atanh(complex<_Tp>(-__x.imag(), __x.real()));
return complex<_Tp>(__z.imag(), -__z.real());
}
// sin
template <class _Tp>
CUDA_CALLABLE_MEMBER complex<_Tp> sin(const complex<_Tp> &__x) {
complex<_Tp> __z = sinh(complex<_Tp>(-__x.imag(), __x.real()));
return complex<_Tp>(__z.imag(), -__z.real());
}
// cos
template <class _Tp>
inline CUDA_CALLABLE_MEMBER complex<_Tp> cos(const complex<_Tp> &__x) {
return cosh(complex<_Tp>(-__x.imag(), __x.real()));
}
// tan
template <class _Tp>
CUDA_CALLABLE_MEMBER complex<_Tp> tan(const complex<_Tp> &__x) {
complex<_Tp> __z = tanh(complex<_Tp>(-__x.imag(), __x.real()));
return complex<_Tp>(__z.imag(), -__z.real());
}
template <class _Tp, class _CharT, class _Traits>
std::basic_istream<_CharT, _Traits> &
operator>>(std::basic_istream<_CharT, _Traits> &__is, complex<_Tp> &__x) {
if (__is.good()) {
ws(__is);
if (__is.peek() == _CharT('(')) {
__is.get();
_Tp __r;
__is >> __r;
if (!__is.fail()) {
ws(__is);
_CharT __c = __is.peek();
if (__c == _CharT(',')) {
__is.get();
_Tp __i;
__is >> __i;
if (!__is.fail()) {
ws(__is);
__c = __is.peek();
if (__c == _CharT(')')) {
__is.get();
__x = complex<_Tp>(__r, __i);
} else
__is.setstate(std::ios_base::failbit);
} else
__is.setstate(std::ios_base::failbit);
} else if (__c == _CharT(')')) {
__is.get();
__x = complex<_Tp>(__r, _Tp(0));
} else
__is.setstate(std::ios_base::failbit);
} else
__is.setstate(std::ios_base::failbit);
} else {
_Tp __r;
__is >> __r;
if (!__is.fail())
__x = complex<_Tp>(__r, _Tp(0));
else
__is.setstate(std::ios_base::failbit);
}
} else
__is.setstate(std::ios_base::failbit);
return __is;
}
template <class _Tp, class _CharT, class _Traits>
std::basic_ostream<_CharT, _Traits> &
operator<<(std::basic_ostream<_CharT, _Traits> &__os, const complex<_Tp> &__x) {
std::basic_ostringstream<_CharT, _Traits> __s;
__s.flags(__os.flags());
__s.imbue(__os.getloc());
__s.precision(__os.precision());
__s << '(' << __x.real() << ',' << __x.imag() << ')';
return __os << __s.str();
}
//} // close namespace cuda_complex
template <class U, class V>
CUDA_CALLABLE_MEMBER auto operator*(const complex<U> &complexNumber,
const V &scalar) -> complex<U> {
return complex<U>(real(complexNumber) * scalar, imag(complexNumber) * scalar);
}
template <class U, class V>
CUDA_CALLABLE_MEMBER auto operator*(const V &scalar,
const complex<U> &complexNumber)
-> complex<U> {
return complex<U>(real(complexNumber) * scalar, imag(complexNumber) * scalar);
}
template <class U, class V>
CUDA_CALLABLE_MEMBER auto operator+(const complex<U> &complexNumber,
const V &scalar) -> complex<U> {
return complex<U>(real(complexNumber) + scalar, imag(complexNumber));
}
template <class U, class V>
CUDA_CALLABLE_MEMBER auto operator+(const V &scalar,
const complex<U> &complexNumber)
-> complex<U> {
return complex<U>(real(complexNumber) + scalar, imag(complexNumber));
}
template <class U, class V>
CUDA_CALLABLE_MEMBER auto operator-(const complex<U> &complexNumber,
const V &scalar) -> complex<U> {
return complex<U>(real(complexNumber) - scalar, imag(complexNumber));
}
template <class U, class V>
CUDA_CALLABLE_MEMBER auto operator-(const V &scalar,
const complex<U> &complexNumber)
-> complex<U> {
return complex<U>(scalar - real(complexNumber), imag(complexNumber));
}
template <class U, class V>
CUDA_CALLABLE_MEMBER auto operator/(const complex<U> &complexNumber,
const V scalar) -> complex<U> {
return complex<U>(real(complexNumber) / scalar, imag(complexNumber) / scalar);
}
template <class U, class V>
CUDA_CALLABLE_MEMBER auto operator/(const V scalar,
const complex<U> &complexNumber)
-> complex<U> {
return complex<U>(scalar, 0) / complexNumber;
}
using ComplexDouble = complex<double>;
using ComplexFloat = complex<float>;
#endif // CUDA_COMPLEX_HPP
}
#ifndef OPENCL_STDINT
#define OPENCL_STDINT
typedef unsigned int uint_t;
typedef signed char int8_t;
typedef signed short int16_t;
typedef signed int int32_t;
typedef signed long int int64_t;
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef unsigned long int uint64_t;
#endif
from typing import List, Union
import sympy
import sympy as sp
from sympy.codegen import Assignment
from sympy.codegen.rewriting import ReplaceOptim, optimize
from pystencils.astnodes import Block, Node, SympyAssignment
from pystencils.backends.cbackend import CustomCodeNode
from pystencils.functions import DivFunc
from pystencils.simp import AssignmentCollection
class NodeCollection:
def __init__(self, assignments: List[Union[Node, Assignment]]):
self.all_assignments = assignments
if all((isinstance(a, Assignment) for a in assignments)):
self.is_Nodes = False
self.is_Assignments = True
elif all((isinstance(n, Node) for n in assignments)):
self.is_Nodes = True
self.is_Assignments = False
else:
raise ValueError(f'The list "{assignments}" is mixed. Pass either a list of "pystencils.Assignments" '
f'or a list of "pystencils.astnodes.Node')
self.simplification_hints = {}
@staticmethod
def from_assignment_collection(assignment_collection: AssignmentCollection):
nodes = list()
for assignemt in assignment_collection.all_assignments:
if isinstance(assignemt, Assignment):
nodes.append(SympyAssignment(assignemt.lhs, assignemt.rhs))
elif isinstance(assignemt, Node):
nodes.append(assignemt)
else:
raise ValueError(f"Unknown node in the AssignmentCollection: {assignemt}")
return NodeCollection(nodes)
def evaluate_terms(self):
evaluate_constant_terms = ReplaceOptim(
lambda e: hasattr(e, 'is_constant') and e.is_constant and not e.is_integer,
lambda p: p.evalf())
evaluate_pow = ReplaceOptim(
lambda e: e.is_Pow and e.exp.is_Integer and abs(e.exp) <= 8,
lambda p: (
sp.UnevaluatedExpr(sp.Mul(*([p.base] * +p.exp), evaluate=False)) if p.exp > 0 else
DivFunc(sp.Integer(1), sp.Mul(*([p.base] * -p.exp), evaluate=False))
))
sympy_optimisations = [evaluate_constant_terms, evaluate_pow]
if self.is_Nodes:
def visitor(node):
if isinstance(node, CustomCodeNode):
return node
elif isinstance(node, Block):
return node.func([visitor(child) for child in node.args])
elif isinstance(node, Node):
return node.func(*[visitor(child) for child in node.args])
elif isinstance(node, sympy.Basic):
return optimize(node, sympy_optimisations)
else:
raise NotImplementedError(f'{node} {type(node)} has no valid visitor')
self.all_assignments = [visitor(assignment) for assignment in self.all_assignments]
else:
self.all_assignments = [Assignment(a.lhs, optimize(a.rhs, sympy_optimisations))
if hasattr(a, 'lhs')
else a for a in self.all_assignments]
import pytest
import pystencils as ps
import numpy as np
# This test aims to trigger deprication warnings. Thus the warnings should not be displayed in the warning summary.
import pystencils.config
def test_create_kernel_backwards_compatibility():
size = (30, 20)
src_field_string = np.random.rand(*size)
src_field_enum = np.copy(src_field_string)
src_field_config = np.copy(src_field_string)
dst_field_string = np.zeros(size)
dst_field_enum = np.zeros(size)
dst_field_config = np.zeros(size)
f = ps.Field.create_from_numpy_array("f", src_field_enum)
d = ps.Field.create_from_numpy_array("d", dst_field_enum)
jacobi = ps.Assignment(d[0, 0], (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4)
ast_enum = ps.create_kernel(jacobi, target=ps.Target.CPU).compile()
with pytest.warns(DeprecationWarning):
ast_string = ps.create_kernel(jacobi, target='cpu').compile()
# noinspection PyTypeChecker
with pytest.warns(DeprecationWarning):
ast_config = ps.create_kernel(jacobi, config=pystencils.config.CreateKernelConfig(target='cpu')).compile()
ast_enum(f=src_field_enum, d=dst_field_enum)
ast_string(f=src_field_string, d=dst_field_string)
ast_config(f=src_field_config, d=dst_field_config)
error = np.sum(np.abs(dst_field_enum - dst_field_string))
np.testing.assert_almost_equal(error, 0.0)
error = np.sum(np.abs(dst_field_enum - dst_field_config))
np.testing.assert_almost_equal(error, 0.0)
import numpy as np
import pystencils as ps
from pystencils import Assignment, Field, CreateKernelConfig, create_kernel, Target
def test_indexed_kernel():
arr = np.zeros((3, 4))
dtype = np.dtype([('x', int), ('y', int), ('value', arr.dtype)])
index_arr = np.zeros((3,), dtype=dtype)
index_arr[0] = (0, 2, 3.0)
index_arr[1] = (1, 3, 42.0)
index_arr[2] = (2, 1, 5.0)
indexed_field = Field.create_from_numpy_array('index', index_arr)
normal_field = Field.create_from_numpy_array('f', arr)
update_rule = Assignment(normal_field[0, 0], indexed_field('value'))
config = CreateKernelConfig(index_fields=[indexed_field])
ast = create_kernel([update_rule], config=config)
kernel = ast.compile()
kernel(f=arr, index=index_arr)
code = ps.get_code_str(kernel)
for i in range(index_arr.shape[0]):
np.testing.assert_allclose(arr[index_arr[i]['x'], index_arr[i]['y']], index_arr[i]['value'], atol=1e-13)
def test_indexed_cuda_kernel():
try:
import pycuda
except ImportError:
pycuda = None
if pycuda:
import pycuda.gpuarray as gpuarray
arr = np.zeros((3, 4))
dtype = np.dtype([('x', int), ('y', int), ('value', arr.dtype)])
index_arr = np.zeros((3,), dtype=dtype)
index_arr[0] = (0, 2, 3.0)
index_arr[1] = (1, 3, 42.0)
index_arr[2] = (2, 1, 5.0)
indexed_field = Field.create_from_numpy_array('index', index_arr)
normal_field = Field.create_from_numpy_array('f', arr)
update_rule = Assignment(normal_field[0, 0], indexed_field('value'))
config = CreateKernelConfig(target=Target.GPU, index_fields=[indexed_field])
ast = create_kernel([update_rule], config=config)
kernel = ast.compile()
gpu_arr = gpuarray.to_gpu(arr)
gpu_index_arr = gpuarray.to_gpu(index_arr)
kernel(f=gpu_arr, index=gpu_index_arr)
gpu_arr.get(arr)
for i in range(index_arr.shape[0]):
np.testing.assert_allclose(arr[index_arr[i]['x'], index_arr[i]['y']], index_arr[i]['value'], atol=1e-13)
else:
print("Did not run test on GPU since no pycuda is available")
%% Cell type:code id: tags:
``` python
from pystencils.session import *
```
%% Cell type:code id: tags:
``` python
dh = ps.create_data_handling(domain_size=(256, 256), periodicity=True)
c_field = dh.add_array('c')
dh.fill("c", 0.0, ghost_layers=True)
```
%% Cell type:code id: tags:
``` python
for x in range(129):
for y in range(258):
dh.cpu_arrays['c'][x, y] = 1.0
```
%% Cell type:code id: tags:
``` python
plt.scalar_field(dh.cpu_arrays["c"])
```
%% Output
<matplotlib.image.AxesImage at 0x7fcb7d253710>
%% Cell type:code id: tags:
``` python
ur = ps.Assignment(c_field[0, 0], c_field[1, 0])
ast = ps.create_kernel(ur, target=dh.default_target, cpu_openmp=True)
kernel = ast.compile()
```
%% Cell type:code id: tags:
``` python
c_sync = dh.synchronization_function_cpu(['c'])
```
%% Cell type:code id: tags:
``` python
def timeloop(steps=10):
for i in range(steps):
c_sync()
dh.run_kernel(kernel)
return dh.gather_array('c')
```
%% Cell type:code id: tags:
``` python
ps.jupyter.set_display_mode('video')
```
%% Cell type:code id: tags:
``` python
ani = ps.plot.scalar_field_animation(timeloop, rescale=True, frames=12)
ps.jupyter.display_animation(ani)
```
%% Output
<IPython.core.display.HTML object>
%% Cell type:code id: tags:
``` python
ps.jupyter.set_display_mode('image_update')
```
%% Cell type:code id: tags:
``` python
ani = ps.plot.scalar_field_animation(timeloop, rescale=True, frames=12)
ps.jupyter.display_animation(ani)
```
%% Output
%% Cell type:code id: tags:
``` python
def grid_update_function(image):
for i in range(40):
c_sync()
dh.run_kernel(kernel)
return dh.gather_array('c')
```
%% Cell type:code id: tags:
``` python
animation = ps.jupyter.make_imshow_animation(dh.cpu_arrays["c"], grid_update_function, frames=300)
```
%% Output
%% Cell type:code id: tags:
``` python
ps.jupyter.set_display_mode("video")
ps.jupyter.set_display_mode("window")
ps.jupyter.set_display_mode("image_update")
ps.jupyter.activate_ipython()
```
%% Cell type:code id: tags:
``` python
import pytest
pytest.importorskip('pycuda')
```
%% Cell type:code id: tags:
``` python
from pystencils.session import *
sp.init_printing()
frac = sp.Rational
```
%% Cell type:markdown id: tags:
# Phase-field simulation of dentritic solidification in 3D
This notebook tests the model presented in the dentritic growth tutorial in 3D.
%% Cell type:code id: tags:
``` python
target = ps.Target.GPU
gpu = target == ps.Target.GPU
domain_size = (25, 25, 25) if 'is_test_run' in globals() else (300, 300, 300)
dh = ps.create_data_handling(domain_size=domain_size, periodicity=True, default_target=target)
φ_field = dh.add_array('phi', latex_name='φ')
φ_delta_field = dh.add_array('phidelta', latex_name='φ_D')
t_field = dh.add_array('T')
```
%% Cell type:code id: tags:
``` python
ε, m, δ, j, θzero, α, γ, Teq, κ, τ = sp.symbols("ε m δ j θ_0 α γ T_eq κ τ")
εb = sp.Symbol("\\bar{\\epsilon}")
discretize = ps.fd.Discretization2ndOrder(dx=0.03, dt=1e-5)
φ = φ_field.center
T = t_field.center
d = ps.fd.Diff
def f(φ, m):
return φ**4 / 4 - (frac(1, 2) - m/3) * φ**3 + (frac(1,4)-m/2)*φ**2
bulk_free_energy_density = f(φ, m)
interface_free_energy_density = ε ** 2 / 2 * (d(φ, 0) ** 2 + d(φ, 1) ** 2 + d(φ, 2) ** 2)
```
%% Cell type:markdown id: tags:
Here comes the major change, that has to be made for the 3D model: $\epsilon$ depends on the interface normal, which can not be computed simply as atan() as in the 2D case
%% Cell type:code id: tags:
``` python
n = sp.Matrix([d(φ, i) for i in range(3)])
nLen = sp.sqrt(sum(n_i**2 for n_i in n))
n = n / nLen
nVal = sum(n_i**4 for n_i in n)
σ = δ * nVal
εVal = εb * (1 + σ)
εVal
```
%% Output
$\displaystyle \bar{\epsilon} \left(δ \left(\frac{{\partial_{0} {{φ}_{(0,0,0)}}}^{4}}{\left({\partial_{0} {{φ}_{(0,0,0)}}}^{2} + {\partial_{1} {{φ}_{(0,0,0)}}}^{2} + {\partial_{2} {{φ}_{(0,0,0)}}}^{2}\right)^{2}} + \frac{{\partial_{1} {{φ}_{(0,0,0)}}}^{4}}{\left({\partial_{0} {{φ}_{(0,0,0)}}}^{2} + {\partial_{1} {{φ}_{(0,0,0)}}}^{2} + {\partial_{2} {{φ}_{(0,0,0)}}}^{2}\right)^{2}} + \frac{{\partial_{2} {{φ}_{(0,0,0)}}}^{4}}{\left({\partial_{0} {{φ}_{(0,0,0)}}}^{2} + {\partial_{1} {{φ}_{(0,0,0)}}}^{2} + {\partial_{2} {{φ}_{(0,0,0)}}}^{2}\right)^{2}}\right) + 1\right)$
⎛ ⎛ 4
⎜ ⎜ D(φ[0,0,0])
\bar{\epsilon}⋅⎜δ⋅⎜───────────────────────────────────────────── + ───────────
⎜ ⎜ 2
⎜ ⎜⎛ 2 2 2⎞ ⎛
⎝ ⎝⎝D(φ[0,0,0]) + D(φ[0,0,0]) + D(φ[0,0,0]) ⎠ ⎝D(φ[0,0,0]
4 4
D(φ[0,0,0]) D(φ[0,0,0])
────────────────────────────────── + ─────────────────────────────────────────
2
2 2 2⎞ ⎛ 2 2
) + D(φ[0,0,0]) + D(φ[0,0,0]) ⎠ ⎝D(φ[0,0,0]) + D(φ[0,0,0]) + D(φ[0,0,0]
⎞ ⎞
⎟ ⎟
────⎟ + 1⎟
2⎟ ⎟
2⎞ ⎟ ⎟
) ⎠ ⎠ ⎠
%% Cell type:code id: tags:
``` python
def m_func(temperature):
return (α / sp.pi) * sp.atan(γ * (Teq - temperature))
```
%% Cell type:code id: tags:
``` python
substitutions = {m: m_func(T),
ε: εVal}
fe_i = interface_free_energy_density.subs(substitutions)
fe_b = bulk_free_energy_density.subs(substitutions)
μ_if = ps.fd.expand_diff_full(ps.fd.functional_derivative(fe_i, φ), functions=[φ])
μ_b = ps.fd.expand_diff_full(ps.fd.functional_derivative(fe_b, φ), functions=[φ])
```
%% Cell type:code id: tags:
``` python
dF_dφ = μ_b + sp.Piecewise((μ_if, nLen**2 > 1e-10), (0, True))
```
%% Cell type:code id: tags:
``` python
parameters = {
τ: 0.0003,
κ: 1.8,
εb: 0.01,
δ: 0.3,
γ: 10,
j: 6,
α: 0.9,
Teq: 1.0,
θzero: 0.2,
sp.pi: sp.pi.evalf()
}
parameters
```
%% Output
$\displaystyle \left\{ \pi : 3.14159265358979, \ T_{eq} : 1.0, \ \bar{\epsilon} : 0.01, \ j : 6, \ α : 0.9, \ γ : 10, \ δ : 0.3, \ θ_{0} : 0.2, \ κ : 1.8, \ τ : 0.0003\right\}$
{π: 3.14159265358979, T_eq: 1.0, \bar{\epsilon}: 0.01, j: 6, α: 0.9, γ: 10, δ:
0.3, θ₀: 0.2, κ: 1.8, τ: 0.0003}
%% Cell type:code id: tags:
``` python
dφ_dt = - dF_dφ / τ
assignments = [
ps.Assignment(φ_delta_field.center, discretize(dφ_dt.subs(parameters))),
]
φEqs = ps.simp.sympy_cse_on_assignment_list(assignments)
φEqs.append(ps.Assignment(φ, discretize(ps.fd.transient(φ) - φ_delta_field.center)))
temperatureEvolution = -ps.fd.transient(T) + ps.fd.diffusion(T, 1) + κ * φ_delta_field.center
temperatureEqs = [
ps.Assignment(T, discretize(temperatureEvolution.subs(parameters)))
]
```
%% Cell type:code id: tags:
``` python
temperatureEqs
```
%% Output
$\displaystyle \left[ {{T}_{(0,0,0)}} \leftarrow 0.0111111111111111 {{T}_{(-1,0,0)}} + 0.0111111111111111 {{T}_{(0,-1,0)}} + 0.0111111111111111 {{T}_{(0,0,-1)}} + 0.933333333333333 {{T}_{(0,0,0)}} + 0.0111111111111111 {{T}_{(0,0,1)}} + 0.0111111111111111 {{T}_{(0,1,0)}} + 0.0111111111111111 {{T}_{(1,0,0)}} + 1.8 \cdot 10^{-5} {{φ_D}_{(0,0,0)}}\right]$
[T_C := 0.0111111111111111⋅T_W + 0.0111111111111111⋅T_S + 0.0111111111111111⋅T
_B + 0.933333333333333⋅T_C + 0.0111111111111111⋅T_T + 0.0111111111111111⋅T_N +
0.0111111111111111⋅T_E + 1.8e-5⋅phidelta_C]
%% Cell type:code id: tags:
``` python
φ_kernel = ps.create_kernel(φEqs, cpu_openmp=4, target=target).compile()
temperatureKernel = ps.create_kernel(temperatureEqs, cpu_openmp=4, target=target).compile()
```
%% Cell type:code id: tags:
``` python
def time_loop(steps):
φ_sync = dh.synchronization_function(['phi'], target=target)
temperature_sync = dh.synchronization_function(['T'], target=target)
dh.all_to_gpu()
for t in range(steps):
φ_sync()
dh.run_kernel(φ_kernel)
temperature_sync()
dh.run_kernel(temperatureKernel)
dh.all_to_cpu()
def init(nucleus_size=np.sqrt(5)):
for b in dh.iterate():
x, y, z = b.cell_index_arrays
x, y, z = x - b.shape[0] // 2, y - b.shape[1] // 2, z - b.shape[2] // 2
b['phi'].fill(0)
b['phi'][(x ** 2 + y ** 2 + z ** 2) < nucleus_size ** 2] = 1.0
b['T'].fill(0.0)
def plot(slice_obj=ps.make_slice[:, :, 0.5]):
plt.subplot(1, 3, 1)
plt.scalar_field(dh.gather_array('phi', slice_obj).squeeze())
plt.title("φ")
plt.colorbar()
plt.subplot(1, 3, 2)
plt.title("T")
plt.scalar_field(dh.gather_array('T', slice_obj).squeeze())
plt.colorbar()
plt.subplot(1, 3, 3)
plt.title("∂φ")
plt.scalar_field(dh.gather_array('phidelta', slice_obj).squeeze())
plt.colorbar()
```
%% Cell type:code id: tags:
``` python
init()
plot()
print(dh)
```
%% Output
Name| Inner (min/max)| WithGl (min/max)
----------------------------------------------------
T| ( 0, 0)| ( 0, 0)
phi| ( 0, 1)| ( 0, 1)
phidelta| ( 0, 0)| ( 0, 0)
%% Cell type:code id: tags:
``` python
if 'is_test_run' in globals():
time_loop(2)
assert np.isfinite(dh.max('phi'))
assert np.isfinite(dh.max('T'))
assert np.isfinite(dh.max('phidelta'))
else:
from time import perf_counter
vtk_writer = dh.create_vtk_writer('dentritic_growth_large', ['phi'])
last = perf_counter()
for i in range(300):
time_loop(100)
vtk_writer(i)
print("Step ", i, perf_counter() - last, dh.max('phi'))
last = perf_counter()
```
import pytest
import pystencils
from sympy import oo
@pytest.mark.parametrize('type', ('float32', 'float64', 'int64'))
@pytest.mark.parametrize('negative', (False, 'Negative'))
@pytest.mark.parametrize('target', (pystencils.Target.CPU, pystencils.Target.GPU))
def test_print_infinity(type, negative, target):
x = pystencils.fields(f'x: {type}[1d]')
if negative:
assignment = pystencils.Assignment(x.center, -oo)
else:
assignment = pystencils.Assignment(x.center, oo)
ast = pystencils.create_kernel(assignment, data_type=type, target=target)
if target == pystencils.Target.GPU:
pytest.importorskip('pycuda')
ast.compile()
print(ast.compile().code)
# -*- coding: utf-8 -*-
#
# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
#
# Distributed under terms of the GPLv3 license.
"""
"""
import pytest
import pystencils
from pystencils.backends.cbackend import CBackend
class UnsupportedNode(pystencils.astnodes.Node):
def __init__(self):
super().__init__()
def test_print_unsupported_node():
with pytest.raises(NotImplementedError, match='CBackend does not support node of type UnsupportedNode'):
CBackend()(UnsupportedNode())
import numpy as np
import sympy as sp
from pystencils import Assignment, Field, TypedSymbol, create_kernel, make_slice
from pystencils.simp import sympy_cse_on_assignment_list
def test_sliced_iteration():
size = (4, 4)
src_arr = np.ones(size)
dst_arr = np.zeros_like(src_arr)
src_field = Field.create_from_numpy_array('src', src_arr)
dst_field = Field.create_from_numpy_array('dst', dst_arr)
a, b = sp.symbols("a b")
update_rule = Assignment(dst_field[0, 0],
(a * src_field[0, 1] + a * src_field[0, -1] +
b * src_field[1, 0] + b * src_field[-1, 0]) / 4)
x_end = TypedSymbol("x_end", "int")
s = make_slice[1:x_end, 1]
x_end_value = size[1] - 1
kernel = create_kernel(sympy_cse_on_assignment_list([update_rule]), iteration_slice=s).compile()
kernel(src=src_arr, dst=dst_arr, a=1.0, b=1.0, x_end=x_end_value)
expected_result = np.zeros(size)
expected_result[1:x_end_value, 1] = 1
np.testing.assert_almost_equal(expected_result, dst_arr)
import pystencils as ps
from pystencils import TypedSymbol
from pystencils.astnodes import LoopOverCoordinate, SympyAssignment
from pystencils.typing import create_type
from pystencils.transformations import filtered_tree_iteration, get_loop_hierarchy, get_loop_counter_symbol_hierarchy
def test_loop_information():
f, g = ps.fields("f, g: double[2D]")
update_rule = ps.Assignment(g[0, 0], f[0, 0])
ast = ps.create_kernel(update_rule)
inner_loops = [l for l in filtered_tree_iteration(ast, LoopOverCoordinate, stop_type=SympyAssignment)
if l.is_innermost_loop]
loop_order = []
for i in get_loop_hierarchy(inner_loops[0].args[0]):
loop_order.append(i)
assert loop_order == [0, 1]
loop_symbols = get_loop_counter_symbol_hierarchy(inner_loops[0].args[0])
assert loop_symbols == [TypedSymbol("ctr_1", create_type("int"), nonnegative=True),
TypedSymbol("ctr_0", create_type("int"), nonnegative=True)]
[pytest] [pytest]
testpaths = src tests doc/notebooks
pythonpath = src
python_files = test_*.py *_test.py scenario_*.py python_files = test_*.py *_test.py scenario_*.py
norecursedirs = *.egg-info .git .cache .ipynb_checkpoints htmlcov norecursedirs = *.egg-info .git .cache .ipynb_checkpoints htmlcov
addopts = --doctest-modules --durations=20 --cov-config pytest.ini addopts = --doctest-modules --durations=20 --cov-config pytest.ini
...@@ -17,20 +19,21 @@ filterwarnings = ...@@ -17,20 +19,21 @@ filterwarnings =
[run] [run]
branch = True branch = True
source = pystencils source = src/pystencils
pystencils_tests tests
omit = doc/* omit = doc/*
pystencils_tests/* tests/*
setup.py setup.py
quicktest.py
conftest.py conftest.py
versioneer.py versioneer.py
pystencils/jupytersetup.py src/pystencils/jupytersetup.py
pystencils/cpu/msvc_detection.py src/pystencils/cpu/msvc_detection.py
pystencils/sympy_gmpy_bug_workaround.py src/pystencils/sympy_gmpy_bug_workaround.py
pystencils/cache.py src/pystencils/cache.py
pystencils/pacxx/benchmark.py src/pystencils/pacxx/benchmark.py
pystencils/_version.py src/pystencils/_version.py
venv/ venv/
[report] [report]
......
#!/usr/bin/env python3
from contextlib import redirect_stdout
import io
from tests.test_quicktests import (
test_basic_kernel,
test_basic_blocking_staggered,
test_basic_vectorization,
)
quick_tests = [
test_basic_kernel,
test_basic_blocking_staggered,
test_basic_vectorization,
]
if __name__ == "__main__":
print("Running pystencils quicktests")
for qt in quick_tests:
print(f" -> {qt.__name__}")
with redirect_stdout(io.StringIO()):
qt()
# See the docstring in versioneer.py for instructions. Note that you must
# re-run 'versioneer.py setup' after changing this section, and commit the
# resulting files.
[versioneer]
VCS = git
style = pep440
versionfile_source = pystencils/_version.py
versionfile_build = pystencils/_version.py
tag_prefix = release/
parentdir_prefix = pystencils-
import distutils from setuptools import setup, __version__ as setuptools_version
import io
import os
from contextlib import redirect_stdout
from importlib import import_module
import setuptools if int(setuptools_version.split('.')[0]) < 61:
raise Exception(
"[ERROR] pystencils requires at least setuptools version 61 to install.\n"
"If this error occurs during an installation via pip, it is likely that there is a conflict between "
"versions of setuptools installed by pip and the system package manager. "
"In this case, it is recommended to install pystencils into a virtual environment instead."
)
import versioneer import versioneer
try:
import cython # noqa
USE_CYTHON = True
except ImportError:
USE_CYTHON = False
quick_tests = [
'test_quicktests.test_basic_kernel',
'test_quicktests.test_basic_blocking_staggered',
'test_quicktests.test_basic_vectorization',
]
class SimpleTestRunner(distutils.cmd.Command):
"""A custom command to run selected tests"""
description = 'run some quick tests'
user_options = []
@staticmethod
def _run_tests_in_module(test):
"""Short test runner function - to work also if py.test is not installed."""
test = f'pystencils_tests.{test}'
mod, function_name = test.rsplit('.', 1)
if isinstance(mod, str):
mod = import_module(mod)
func = getattr(mod, function_name)
print(f" -> {function_name} in {mod.__name__}")
with redirect_stdout(io.StringIO()):
func()
def initialize_options(self):
pass
def finalize_options(self):
pass
def run(self):
"""Run command."""
for test in quick_tests:
self._run_tests_in_module(test)
def readme():
with open('README.md') as f:
return f.read()
def cython_extensions(*extensions):
from distutils.extension import Extension
if USE_CYTHON:
ext = '.pyx'
result = [Extension(e, [os.path.join(*e.split(".")) + ext]) for e in extensions]
from Cython.Build import cythonize
result = cythonize(result, language_level=3)
return result
elif all([os.path.exists(os.path.join(*e.split(".")) + '.c') for e in extensions]):
ext = '.c'
result = [Extension(e, [os.path.join(*e.split(".")) + ext]) for e in extensions]
return result
else:
return None
def get_cmdclass(): def get_cmdclass():
cmdclass = {"quicktest": SimpleTestRunner} return versioneer.get_cmdclass()
cmdclass.update(versioneer.get_cmdclass())
return cmdclass
setuptools.setup(name='pystencils',
description='Speeding up stencil computations on CPUs and GPUs',
version=versioneer.get_version(),
long_description=readme(),
long_description_content_type="text/markdown",
author='Martin Bauer, Jan Hönig, Markus Holzer',
license='AGPLv3',
author_email='cs10-codegen@fau.de',
url='https://i10git.cs.fau.de/pycodegen/pystencils/',
packages=['pystencils'] + ['pystencils.' + s for s in setuptools.find_packages('pystencils')],
install_requires=['sympy>=1.6,<=1.10', 'numpy>=1.8.0', 'appdirs', 'joblib'],
package_data={'pystencils': ['include/*.h',
'backends/cuda_known_functions.txt',
'backends/opencl1.1_known_functions.txt',
'boundaries/createindexlistcython.c',
'boundaries/createindexlistcython.pyx']},
ext_modules=cython_extensions("pystencils.boundaries.createindexlistcython"),
classifiers=[
'Development Status :: 4 - Beta',
'Framework :: Jupyter',
'Topic :: Software Development :: Code Generators',
'Topic :: Scientific/Engineering :: Physics',
'Intended Audience :: Developers',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)',
],
project_urls={
"Bug Tracker": "https://i10git.cs.fau.de/pycodegen/pystencils/issues",
"Documentation": "http://pycodegen.pages.walberla.net/pystencils/",
"Source Code": "https://i10git.cs.fau.de/pycodegen/pystencils",
},
extras_require={
'gpu': ['pycuda'],
'alltrafos': ['islpy', 'py-cpuinfo'],
'bench_db': ['blitzdb', 'pymongo', 'pandas'],
'interactive': ['matplotlib', 'ipy_table', 'imageio', 'jupyter', 'pyevtk', 'rich', 'graphviz'],
'doc': ['sphinx', 'sphinx_rtd_theme', 'nbsphinx',
'sphinxcontrib-bibtex', 'sphinx_autodoc_typehints', 'pandoc'],
'use_cython': ['Cython']
},
tests_require=['pytest',
'pytest-cov',
'pytest-html',
'ansi2html',
'pytest-xdist',
'flake8',
'nbformat',
'nbconvert',
'ipython',
'randomgen>=1.18'],
python_requires=">=3.8", setup(
cmdclass=get_cmdclass() version=versioneer.get_version(),
) cmdclass=get_cmdclass(),
)
...@@ -2,18 +2,19 @@ ...@@ -2,18 +2,19 @@
from .enums import Backend, Target from .enums import Backend, Target
from . import fd from . import fd
from . import stencil as stencil from . import stencil as stencil
from .assignment import Assignment, assignment_from_stencil from .assignment import Assignment, AddAugmentedAssignment, assignment_from_stencil
from pystencils.typing.typed_sympy import TypedSymbol from .typing.typed_sympy import TypedSymbol
from .datahandling import create_data_handling
from .display_utils import get_code_obj, get_code_str, show_code, to_dot from .display_utils import get_code_obj, get_code_str, show_code, to_dot
from .field import Field, FieldType, fields from .field import Field, FieldType, fields
from .config import CreateKernelConfig from .config import CreateKernelConfig
from .cache import clear_cache
from .kernel_decorator import kernel, kernel_config from .kernel_decorator import kernel, kernel_config
from .kernelcreation import create_kernel, create_staggered_kernel from .kernelcreation import create_kernel, create_staggered_kernel
from .simp import AssignmentCollection from .simp import AssignmentCollection
from .slicing import make_slice from .slicing import make_slice
from .spatial_coordinates import x_, x_staggered, x_staggered_vector, x_vector, y_, y_staggered, z_, z_staggered from .spatial_coordinates import x_, x_staggered, x_staggered_vector, x_vector, y_, y_staggered, z_, z_staggered
from .sympyextensions import SymbolCreator from .sympyextensions import SymbolCreator
from .datahandling import create_data_handling
__all__ = ['Field', 'FieldType', 'fields', __all__ = ['Field', 'FieldType', 'fields',
'TypedSymbol', 'TypedSymbol',
...@@ -23,10 +24,11 @@ __all__ = ['Field', 'FieldType', 'fields', ...@@ -23,10 +24,11 @@ __all__ = ['Field', 'FieldType', 'fields',
'Target', 'Backend', 'Target', 'Backend',
'show_code', 'to_dot', 'get_code_obj', 'get_code_str', 'show_code', 'to_dot', 'get_code_obj', 'get_code_str',
'AssignmentCollection', 'AssignmentCollection',
'Assignment', 'Assignment', 'AddAugmentedAssignment',
'assignment_from_stencil', 'assignment_from_stencil',
'SymbolCreator', 'SymbolCreator',
'create_data_handling', 'create_data_handling',
'clear_cache',
'kernel', 'kernel_config', 'kernel', 'kernel_config',
'x_', 'y_', 'z_', 'x_', 'y_', 'z_',
'x_staggered', 'y_staggered', 'z_staggered', 'x_staggered', 'y_staggered', 'z_staggered',
...@@ -34,7 +36,5 @@ __all__ = ['Field', 'FieldType', 'fields', ...@@ -34,7 +36,5 @@ __all__ = ['Field', 'FieldType', 'fields',
'fd', 'fd',
'stencil'] 'stencil']
from ._version import get_versions from . import _version
__version__ = _version.get_versions()['version']
__version__ = get_versions()['version']
del get_versions
...@@ -5,8 +5,9 @@ ...@@ -5,8 +5,9 @@
# directories (produced by setup.py build) will contain a much shorter file # directories (produced by setup.py build) will contain a much shorter file
# that just contains the computed version number. # that just contains the computed version number.
# This file is released into the public domain. Generated by # This file is released into the public domain.
# versioneer-0.19 (https://github.com/python-versioneer/python-versioneer) # Generated by versioneer-0.29
# https://github.com/python-versioneer/python-versioneer
"""Git implementation of _version.py.""" """Git implementation of _version.py."""
...@@ -15,9 +16,11 @@ import os ...@@ -15,9 +16,11 @@ import os
import re import re
import subprocess import subprocess
import sys import sys
from typing import Any, Callable, Dict, List, Optional, Tuple
import functools
def get_keywords(): def get_keywords() -> Dict[str, str]:
"""Get the keywords needed to look up the version information.""" """Get the keywords needed to look up the version information."""
# these strings will be replaced by git during git-archive. # these strings will be replaced by git during git-archive.
# setup.py/versioneer.py will grep for the variable names, so they must # setup.py/versioneer.py will grep for the variable names, so they must
...@@ -33,8 +36,15 @@ def get_keywords(): ...@@ -33,8 +36,15 @@ def get_keywords():
class VersioneerConfig: class VersioneerConfig:
"""Container for Versioneer configuration parameters.""" """Container for Versioneer configuration parameters."""
VCS: str
style: str
tag_prefix: str
parentdir_prefix: str
versionfile_source: str
verbose: bool
def get_config():
def get_config() -> VersioneerConfig:
"""Create, populate and return the VersioneerConfig() object.""" """Create, populate and return the VersioneerConfig() object."""
# these strings are filled in when 'setup.py versioneer' creates # these strings are filled in when 'setup.py versioneer' creates
# _version.py # _version.py
...@@ -43,7 +53,7 @@ def get_config(): ...@@ -43,7 +53,7 @@ def get_config():
cfg.style = "pep440" cfg.style = "pep440"
cfg.tag_prefix = "release/" cfg.tag_prefix = "release/"
cfg.parentdir_prefix = "pystencils-" cfg.parentdir_prefix = "pystencils-"
cfg.versionfile_source = "pystencils/_version.py" cfg.versionfile_source = "src/pystencils/_version.py"
cfg.verbose = False cfg.verbose = False
return cfg return cfg
...@@ -52,13 +62,13 @@ class NotThisMethod(Exception): ...@@ -52,13 +62,13 @@ class NotThisMethod(Exception):
"""Exception raised if a method is not valid for the current scenario.""" """Exception raised if a method is not valid for the current scenario."""
LONG_VERSION_PY = {} LONG_VERSION_PY: Dict[str, str] = {}
HANDLERS = {} HANDLERS: Dict[str, Dict[str, Callable]] = {}
def register_vcs_handler(vcs, method): # decorator def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator
"""Create decorator to mark a method as the handler of a VCS.""" """Create decorator to mark a method as the handler of a VCS."""
def decorate(f): def decorate(f: Callable) -> Callable:
"""Store f in HANDLERS[vcs][method].""" """Store f in HANDLERS[vcs][method]."""
if vcs not in HANDLERS: if vcs not in HANDLERS:
HANDLERS[vcs] = {} HANDLERS[vcs] = {}
...@@ -67,22 +77,35 @@ def register_vcs_handler(vcs, method): # decorator ...@@ -67,22 +77,35 @@ def register_vcs_handler(vcs, method): # decorator
return decorate return decorate
def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, def run_command(
env=None): commands: List[str],
args: List[str],
cwd: Optional[str] = None,
verbose: bool = False,
hide_stderr: bool = False,
env: Optional[Dict[str, str]] = None,
) -> Tuple[Optional[str], Optional[int]]:
"""Call the given command(s).""" """Call the given command(s)."""
assert isinstance(commands, list) assert isinstance(commands, list)
p = None process = None
for c in commands:
popen_kwargs: Dict[str, Any] = {}
if sys.platform == "win32":
# This hides the console window if pythonw.exe is used
startupinfo = subprocess.STARTUPINFO()
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
popen_kwargs["startupinfo"] = startupinfo
for command in commands:
try: try:
dispcmd = str([c] + args) dispcmd = str([command] + args)
# remember shell=False, so use git.cmd on windows, not just git # remember shell=False, so use git.cmd on windows, not just git
p = subprocess.Popen([c] + args, cwd=cwd, env=env, process = subprocess.Popen([command] + args, cwd=cwd, env=env,
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=(subprocess.PIPE if hide_stderr stderr=(subprocess.PIPE if hide_stderr
else None)) else None), **popen_kwargs)
break break
except EnvironmentError: except OSError as e:
e = sys.exc_info()[1]
if e.errno == errno.ENOENT: if e.errno == errno.ENOENT:
continue continue
if verbose: if verbose:
...@@ -93,16 +116,20 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, ...@@ -93,16 +116,20 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
if verbose: if verbose:
print("unable to find command, tried %s" % (commands,)) print("unable to find command, tried %s" % (commands,))
return None, None return None, None
stdout = p.communicate()[0].strip().decode() stdout = process.communicate()[0].strip().decode()
if p.returncode != 0: if process.returncode != 0:
if verbose: if verbose:
print("unable to run %s (error)" % dispcmd) print("unable to run %s (error)" % dispcmd)
print("stdout was %s" % stdout) print("stdout was %s" % stdout)
return None, p.returncode return None, process.returncode
return stdout, p.returncode return stdout, process.returncode
def versions_from_parentdir(parentdir_prefix, root, verbose): def versions_from_parentdir(
parentdir_prefix: str,
root: str,
verbose: bool,
) -> Dict[str, Any]:
"""Try to determine the version from the parent directory name. """Try to determine the version from the parent directory name.
Source tarballs conventionally unpack into a directory that includes both Source tarballs conventionally unpack into a directory that includes both
...@@ -111,15 +138,14 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): ...@@ -111,15 +138,14 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
""" """
rootdirs = [] rootdirs = []
for i in range(3): for _ in range(3):
dirname = os.path.basename(root) dirname = os.path.basename(root)
if dirname.startswith(parentdir_prefix): if dirname.startswith(parentdir_prefix):
return {"version": dirname[len(parentdir_prefix):], return {"version": dirname[len(parentdir_prefix):],
"full-revisionid": None, "full-revisionid": None,
"dirty": False, "error": None, "date": None} "dirty": False, "error": None, "date": None}
else: rootdirs.append(root)
rootdirs.append(root) root = os.path.dirname(root) # up a level
root = os.path.dirname(root) # up a level
if verbose: if verbose:
print("Tried directories %s but none started with prefix %s" % print("Tried directories %s but none started with prefix %s" %
...@@ -128,39 +154,42 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): ...@@ -128,39 +154,42 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
@register_vcs_handler("git", "get_keywords") @register_vcs_handler("git", "get_keywords")
def git_get_keywords(versionfile_abs): def git_get_keywords(versionfile_abs: str) -> Dict[str, str]:
"""Extract version information from the given file.""" """Extract version information from the given file."""
# the code embedded in _version.py can just fetch the value of these # the code embedded in _version.py can just fetch the value of these
# keywords. When used from setup.py, we don't want to import _version.py, # keywords. When used from setup.py, we don't want to import _version.py,
# so we do it with a regexp instead. This function is not used from # so we do it with a regexp instead. This function is not used from
# _version.py. # _version.py.
keywords = {} keywords: Dict[str, str] = {}
try: try:
f = open(versionfile_abs, "r") with open(versionfile_abs, "r") as fobj:
for line in f.readlines(): for line in fobj:
if line.strip().startswith("git_refnames ="): if line.strip().startswith("git_refnames ="):
mo = re.search(r'=\s*"(.*)"', line) mo = re.search(r'=\s*"(.*)"', line)
if mo: if mo:
keywords["refnames"] = mo.group(1) keywords["refnames"] = mo.group(1)
if line.strip().startswith("git_full ="): if line.strip().startswith("git_full ="):
mo = re.search(r'=\s*"(.*)"', line) mo = re.search(r'=\s*"(.*)"', line)
if mo: if mo:
keywords["full"] = mo.group(1) keywords["full"] = mo.group(1)
if line.strip().startswith("git_date ="): if line.strip().startswith("git_date ="):
mo = re.search(r'=\s*"(.*)"', line) mo = re.search(r'=\s*"(.*)"', line)
if mo: if mo:
keywords["date"] = mo.group(1) keywords["date"] = mo.group(1)
f.close() except OSError:
except EnvironmentError:
pass pass
return keywords return keywords
@register_vcs_handler("git", "keywords") @register_vcs_handler("git", "keywords")
def git_versions_from_keywords(keywords, tag_prefix, verbose): def git_versions_from_keywords(
keywords: Dict[str, str],
tag_prefix: str,
verbose: bool,
) -> Dict[str, Any]:
"""Get version information from git keywords.""" """Get version information from git keywords."""
if not keywords: if "refnames" not in keywords:
raise NotThisMethod("no keywords at all, weird") raise NotThisMethod("Short version file found")
date = keywords.get("date") date = keywords.get("date")
if date is not None: if date is not None:
# Use only the last line. Previous lines may contain GPG signature # Use only the last line. Previous lines may contain GPG signature
...@@ -179,11 +208,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): ...@@ -179,11 +208,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
if verbose: if verbose:
print("keywords are unexpanded, not using") print("keywords are unexpanded, not using")
raise NotThisMethod("unexpanded keywords, not a git-archive tarball") raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
refs = set([r.strip() for r in refnames.strip("()").split(",")]) refs = {r.strip() for r in refnames.strip("()").split(",")}
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
# just "foo-1.0". If we see a "tag: " prefix, prefer those. # just "foo-1.0". If we see a "tag: " prefix, prefer those.
TAG = "tag: " TAG = "tag: "
tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
if not tags: if not tags:
# Either we're using git < 1.8.3, or there really are no tags. We use # Either we're using git < 1.8.3, or there really are no tags. We use
# a heuristic: assume all version tags have a digit. The old git %d # a heuristic: assume all version tags have a digit. The old git %d
...@@ -192,7 +221,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): ...@@ -192,7 +221,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
# between branches and tags. By ignoring refnames without digits, we # between branches and tags. By ignoring refnames without digits, we
# filter out many common branch names like "release" and # filter out many common branch names like "release" and
# "stabilization", as well as "HEAD" and "master". # "stabilization", as well as "HEAD" and "master".
tags = set([r for r in refs if re.search(r'\d', r)]) tags = {r for r in refs if re.search(r'\d', r)}
if verbose: if verbose:
print("discarding '%s', no digits" % ",".join(refs - tags)) print("discarding '%s', no digits" % ",".join(refs - tags))
if verbose: if verbose:
...@@ -201,6 +230,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): ...@@ -201,6 +230,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
# sorting will prefer e.g. "2.0" over "2.0rc1" # sorting will prefer e.g. "2.0" over "2.0rc1"
if ref.startswith(tag_prefix): if ref.startswith(tag_prefix):
r = ref[len(tag_prefix):] r = ref[len(tag_prefix):]
# Filter out refs that exactly match prefix or that don't start
# with a number once the prefix is stripped (mostly a concern
# when prefix is '')
if not re.match(r'\d', r):
continue
if verbose: if verbose:
print("picking %s" % r) print("picking %s" % r)
return {"version": r, return {"version": r,
...@@ -216,7 +250,12 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): ...@@ -216,7 +250,12 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
@register_vcs_handler("git", "pieces_from_vcs") @register_vcs_handler("git", "pieces_from_vcs")
def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): def git_pieces_from_vcs(
tag_prefix: str,
root: str,
verbose: bool,
runner: Callable = run_command
) -> Dict[str, Any]:
"""Get version from 'git describe' in the root of the source tree. """Get version from 'git describe' in the root of the source tree.
This only gets called if the git-archive 'subst' keywords were *not* This only gets called if the git-archive 'subst' keywords were *not*
...@@ -227,8 +266,15 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): ...@@ -227,8 +266,15 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
if sys.platform == "win32": if sys.platform == "win32":
GITS = ["git.cmd", "git.exe"] GITS = ["git.cmd", "git.exe"]
out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, # GIT_DIR can interfere with correct operation of Versioneer.
hide_stderr=True) # It may be intended to be passed to the Versioneer-versioned project,
# but that should not change where we get our version from.
env = os.environ.copy()
env.pop("GIT_DIR", None)
runner = functools.partial(runner, env=env)
_, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
hide_stderr=not verbose)
if rc != 0: if rc != 0:
if verbose: if verbose:
print("Directory %s not under git control" % root) print("Directory %s not under git control" % root)
...@@ -236,24 +282,57 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): ...@@ -236,24 +282,57 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
# if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
# if there isn't one, this yields HEX[-dirty] (no NUM) # if there isn't one, this yields HEX[-dirty] (no NUM)
describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", describe_out, rc = runner(GITS, [
"--always", "--long", "describe", "--tags", "--dirty", "--always", "--long",
"--match", "%s*" % tag_prefix], "--match", f"{tag_prefix}[[:digit:]]*"
cwd=root) ], cwd=root)
# --long was added in git-1.5.5 # --long was added in git-1.5.5
if describe_out is None: if describe_out is None:
raise NotThisMethod("'git describe' failed") raise NotThisMethod("'git describe' failed")
describe_out = describe_out.strip() describe_out = describe_out.strip()
full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
if full_out is None: if full_out is None:
raise NotThisMethod("'git rev-parse' failed") raise NotThisMethod("'git rev-parse' failed")
full_out = full_out.strip() full_out = full_out.strip()
pieces = {} pieces: Dict[str, Any] = {}
pieces["long"] = full_out pieces["long"] = full_out
pieces["short"] = full_out[:7] # maybe improved later pieces["short"] = full_out[:7] # maybe improved later
pieces["error"] = None pieces["error"] = None
branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
cwd=root)
# --abbrev-ref was added in git-1.6.3
if rc != 0 or branch_name is None:
raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
branch_name = branch_name.strip()
if branch_name == "HEAD":
# If we aren't exactly on a branch, pick a branch which represents
# the current commit. If all else fails, we are on a branchless
# commit.
branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
# --contains was added in git-1.5.4
if rc != 0 or branches is None:
raise NotThisMethod("'git branch --contains' returned error")
branches = branches.split("\n")
# Remove the first line if we're running detached
if "(" in branches[0]:
branches.pop(0)
# Strip off the leading "* " from the list of branches.
branches = [branch[2:] for branch in branches]
if "master" in branches:
branch_name = "master"
elif not branches:
branch_name = None
else:
# Pick the first branch that is returned. Good or bad.
branch_name = branches[0]
pieces["branch"] = branch_name
# parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
# TAG might have hyphens. # TAG might have hyphens.
git_describe = describe_out git_describe = describe_out
...@@ -270,7 +349,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): ...@@ -270,7 +349,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
# TAG-NUM-gHEX # TAG-NUM-gHEX
mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
if not mo: if not mo:
# unparseable. Maybe git-describe is misbehaving? # unparsable. Maybe git-describe is misbehaving?
pieces["error"] = ("unable to parse git-describe output: '%s'" pieces["error"] = ("unable to parse git-describe output: '%s'"
% describe_out) % describe_out)
return pieces return pieces
...@@ -295,13 +374,11 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): ...@@ -295,13 +374,11 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
else: else:
# HEX: no tags # HEX: no tags
pieces["closest-tag"] = None pieces["closest-tag"] = None
count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
cwd=root) pieces["distance"] = len(out.split()) # total number of commits
pieces["distance"] = int(count_out) # total number of commits
# commit date: see ISO-8601 comment in git_versions_from_keywords() # commit date: see ISO-8601 comment in git_versions_from_keywords()
date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
cwd=root)[0].strip()
# Use only the last line. Previous lines may contain GPG signature # Use only the last line. Previous lines may contain GPG signature
# information. # information.
date = date.splitlines()[-1] date = date.splitlines()[-1]
...@@ -310,14 +387,14 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): ...@@ -310,14 +387,14 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
return pieces return pieces
def plus_or_dot(pieces): def plus_or_dot(pieces: Dict[str, Any]) -> str:
"""Return a + if we don't already have one, else return a .""" """Return a + if we don't already have one, else return a ."""
if "+" in pieces.get("closest-tag", ""): if "+" in pieces.get("closest-tag", ""):
return "." return "."
return "+" return "+"
def render_pep440(pieces): def render_pep440(pieces: Dict[str, Any]) -> str:
"""Build up version string, with post-release "local version identifier". """Build up version string, with post-release "local version identifier".
Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
...@@ -342,23 +419,71 @@ def render_pep440(pieces): ...@@ -342,23 +419,71 @@ def render_pep440(pieces):
return rendered return rendered
def render_pep440_pre(pieces): def render_pep440_branch(pieces: Dict[str, Any]) -> str:
"""TAG[.post0.devDISTANCE] -- No -dirty. """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
The ".dev0" means not master branch. Note that .dev0 sorts backwards
(a feature branch will appear "older" than the master branch).
Exceptions: Exceptions:
1: no tags. 0.post0.devDISTANCE 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
""" """
if pieces["closest-tag"]: if pieces["closest-tag"]:
rendered = pieces["closest-tag"] rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
if pieces["branch"] != "master":
rendered += ".dev0"
rendered += plus_or_dot(pieces)
rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
else:
# exception #1
rendered = "0"
if pieces["branch"] != "master":
rendered += ".dev0"
rendered += "+untagged.%d.g%s" % (pieces["distance"],
pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
return rendered
def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:
"""Split pep440 version string at the post-release segment.
Returns the release segments before the post-release and the
post-release version number (or -1 if no post-release segment is present).
"""
vc = str.split(ver, ".post")
return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
def render_pep440_pre(pieces: Dict[str, Any]) -> str:
"""TAG[.postN.devDISTANCE] -- No -dirty.
Exceptions:
1: no tags. 0.post0.devDISTANCE
"""
if pieces["closest-tag"]:
if pieces["distance"]: if pieces["distance"]:
rendered += ".post0.dev%d" % pieces["distance"] # update the post release segment
tag_version, post_version = pep440_split_post(pieces["closest-tag"])
rendered = tag_version
if post_version is not None:
rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
else:
rendered += ".post0.dev%d" % (pieces["distance"])
else:
# no commits, use the tag as the version
rendered = pieces["closest-tag"]
else: else:
# exception #1 # exception #1
rendered = "0.post0.dev%d" % pieces["distance"] rendered = "0.post0.dev%d" % pieces["distance"]
return rendered return rendered
def render_pep440_post(pieces): def render_pep440_post(pieces: Dict[str, Any]) -> str:
"""TAG[.postDISTANCE[.dev0]+gHEX] . """TAG[.postDISTANCE[.dev0]+gHEX] .
The ".dev0" means dirty. Note that .dev0 sorts backwards The ".dev0" means dirty. Note that .dev0 sorts backwards
...@@ -385,7 +510,36 @@ def render_pep440_post(pieces): ...@@ -385,7 +510,36 @@ def render_pep440_post(pieces):
return rendered return rendered
def render_pep440_old(pieces): def render_pep440_post_branch(pieces: Dict[str, Any]) -> str:
"""TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
The ".dev0" means not master branch.
Exceptions:
1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += ".post%d" % pieces["distance"]
if pieces["branch"] != "master":
rendered += ".dev0"
rendered += plus_or_dot(pieces)
rendered += "g%s" % pieces["short"]
if pieces["dirty"]:
rendered += ".dirty"
else:
# exception #1
rendered = "0.post%d" % pieces["distance"]
if pieces["branch"] != "master":
rendered += ".dev0"
rendered += "+g%s" % pieces["short"]
if pieces["dirty"]:
rendered += ".dirty"
return rendered
def render_pep440_old(pieces: Dict[str, Any]) -> str:
"""TAG[.postDISTANCE[.dev0]] . """TAG[.postDISTANCE[.dev0]] .
The ".dev0" means dirty. The ".dev0" means dirty.
...@@ -407,7 +561,7 @@ def render_pep440_old(pieces): ...@@ -407,7 +561,7 @@ def render_pep440_old(pieces):
return rendered return rendered
def render_git_describe(pieces): def render_git_describe(pieces: Dict[str, Any]) -> str:
"""TAG[-DISTANCE-gHEX][-dirty]. """TAG[-DISTANCE-gHEX][-dirty].
Like 'git describe --tags --dirty --always'. Like 'git describe --tags --dirty --always'.
...@@ -427,7 +581,7 @@ def render_git_describe(pieces): ...@@ -427,7 +581,7 @@ def render_git_describe(pieces):
return rendered return rendered
def render_git_describe_long(pieces): def render_git_describe_long(pieces: Dict[str, Any]) -> str:
"""TAG-DISTANCE-gHEX[-dirty]. """TAG-DISTANCE-gHEX[-dirty].
Like 'git describe --tags --dirty --always -long'. Like 'git describe --tags --dirty --always -long'.
...@@ -447,7 +601,7 @@ def render_git_describe_long(pieces): ...@@ -447,7 +601,7 @@ def render_git_describe_long(pieces):
return rendered return rendered
def render(pieces, style): def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:
"""Render the given version pieces into the requested style.""" """Render the given version pieces into the requested style."""
if pieces["error"]: if pieces["error"]:
return {"version": "unknown", return {"version": "unknown",
...@@ -461,10 +615,14 @@ def render(pieces, style): ...@@ -461,10 +615,14 @@ def render(pieces, style):
if style == "pep440": if style == "pep440":
rendered = render_pep440(pieces) rendered = render_pep440(pieces)
elif style == "pep440-branch":
rendered = render_pep440_branch(pieces)
elif style == "pep440-pre": elif style == "pep440-pre":
rendered = render_pep440_pre(pieces) rendered = render_pep440_pre(pieces)
elif style == "pep440-post": elif style == "pep440-post":
rendered = render_pep440_post(pieces) rendered = render_pep440_post(pieces)
elif style == "pep440-post-branch":
rendered = render_pep440_post_branch(pieces)
elif style == "pep440-old": elif style == "pep440-old":
rendered = render_pep440_old(pieces) rendered = render_pep440_old(pieces)
elif style == "git-describe": elif style == "git-describe":
...@@ -479,7 +637,7 @@ def render(pieces, style): ...@@ -479,7 +637,7 @@ def render(pieces, style):
"date": pieces.get("date")} "date": pieces.get("date")}
def get_versions(): def get_versions() -> Dict[str, Any]:
"""Get version information or return default if unable to do so.""" """Get version information or return default if unable to do so."""
# I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
# __file__, we can work backwards from there to the root. Some # __file__, we can work backwards from there to the root. Some
...@@ -500,7 +658,7 @@ def get_versions(): ...@@ -500,7 +658,7 @@ def get_versions():
# versionfile_source is the relative path from the top of the source # versionfile_source is the relative path from the top of the source
# tree (where the .git directory might live) to this file. Invert # tree (where the .git directory might live) to this file. Invert
# this to find the root from __file__. # this to find the root from __file__.
for i in cfg.versionfile_source.split('/'): for _ in cfg.versionfile_source.split('/'):
root = os.path.dirname(root) root = os.path.dirname(root)
except NameError: except NameError:
return {"version": "0+unknown", "full-revisionid": None, return {"version": "0+unknown", "full-revisionid": None,
......
import numpy as np import numpy as np
from pystencils.typing import numpy_name_to_c
def aligned_empty(shape, byte_alignment=True, dtype=np.float64, byte_offset=0, order='C', align_inner_coordinate=True): def aligned_empty(shape, byte_alignment=True, dtype=np.float64, byte_offset=0, order='C', align_inner_coordinate=True):
...@@ -21,26 +20,26 @@ def aligned_empty(shape, byte_alignment=True, dtype=np.float64, byte_offset=0, o ...@@ -21,26 +20,26 @@ def aligned_empty(shape, byte_alignment=True, dtype=np.float64, byte_offset=0, o
from pystencils.backends.simd_instruction_sets import (get_supported_instruction_sets, get_cacheline_size, from pystencils.backends.simd_instruction_sets import (get_supported_instruction_sets, get_cacheline_size,
get_vector_instruction_set) get_vector_instruction_set)
type_name = numpy_name_to_c(np.dtype(dtype).name)
instruction_sets = get_supported_instruction_sets() instruction_sets = get_supported_instruction_sets()
if instruction_sets is None: if instruction_sets is None:
byte_alignment = 64 byte_alignment = 64
elif byte_alignment == 'cacheline': elif byte_alignment == 'cacheline':
cacheline_sizes = [get_cacheline_size(is_name) for is_name in instruction_sets] cacheline_sizes = [get_cacheline_size(is_name) for is_name in instruction_sets]
if all([s is None for s in cacheline_sizes]): if all([s is None for s in cacheline_sizes]) or \
widths = [get_vector_instruction_set(type_name, is_name)['width'] * np.dtype(dtype).itemsize max([s for s in cacheline_sizes if s is not None]) > 0x100000:
widths = [get_vector_instruction_set(dtype, is_name)['width'] * np.dtype(dtype).itemsize
for is_name in instruction_sets for is_name in instruction_sets
if type(get_vector_instruction_set(type_name, is_name)['width']) is int] if type(get_vector_instruction_set(dtype, is_name)['width']) is int]
byte_alignment = 64 if all([s is None for s in widths]) else max(widths) byte_alignment = 64 if all([s is None for s in widths]) else max(widths)
else: else:
byte_alignment = max([s for s in cacheline_sizes if s is not None]) byte_alignment = max([s for s in cacheline_sizes if s is not None])
elif not any([type(get_vector_instruction_set(type_name, is_name)['width']) is int elif not any([type(get_vector_instruction_set(dtype, is_name)['width']) is int
for is_name in instruction_sets]): for is_name in instruction_sets]):
byte_alignment = 64 byte_alignment = 64
else: else:
byte_alignment = max([get_vector_instruction_set(type_name, is_name)['width'] * np.dtype(dtype).itemsize byte_alignment = max([get_vector_instruction_set(dtype, is_name)['width'] * np.dtype(dtype).itemsize
for is_name in instruction_sets for is_name in instruction_sets
if type(get_vector_instruction_set(type_name, is_name)['width']) is int]) if type(get_vector_instruction_set(dtype, is_name)['width']) is int])
if (not align_inner_coordinate) or (not hasattr(shape, '__len__')): if (not align_inner_coordinate) or (not hasattr(shape, '__len__')):
size = np.prod(shape) size = np.prod(shape)
d = np.dtype(dtype) d = np.dtype(dtype)
...@@ -78,7 +77,7 @@ def aligned_empty(shape, byte_alignment=True, dtype=np.float64, byte_offset=0, o ...@@ -78,7 +77,7 @@ def aligned_empty(shape, byte_alignment=True, dtype=np.float64, byte_offset=0, o
return tmp return tmp
def aligned_zeros(shape, byte_alignment=True, dtype=float, byte_offset=0, order='C', align_inner_coordinate=True): def aligned_zeros(shape, byte_alignment=True, dtype=np.float64, byte_offset=0, order='C', align_inner_coordinate=True):
arr = aligned_empty(shape, dtype=dtype, byte_offset=byte_offset, arr = aligned_empty(shape, dtype=dtype, byte_offset=byte_offset,
order=order, byte_alignment=byte_alignment, align_inner_coordinate=align_inner_coordinate) order=order, byte_alignment=byte_alignment, align_inner_coordinate=align_inner_coordinate)
x = np.zeros((), arr.dtype) x = np.zeros((), arr.dtype)
...@@ -86,7 +85,7 @@ def aligned_zeros(shape, byte_alignment=True, dtype=float, byte_offset=0, order= ...@@ -86,7 +85,7 @@ def aligned_zeros(shape, byte_alignment=True, dtype=float, byte_offset=0, order=
return arr return arr
def aligned_ones(shape, byte_alignment=True, dtype=float, byte_offset=0, order='C', align_inner_coordinate=True): def aligned_ones(shape, byte_alignment=True, dtype=np.float64, byte_offset=0, order='C', align_inner_coordinate=True):
arr = aligned_empty(shape, dtype=dtype, byte_offset=byte_offset, arr = aligned_empty(shape, dtype=dtype, byte_offset=byte_offset,
order=order, byte_alignment=byte_alignment, align_inner_coordinate=align_inner_coordinate) order=order, byte_alignment=byte_alignment, align_inner_coordinate=align_inner_coordinate)
x = np.ones((), arr.dtype) x = np.ones((), arr.dtype)
......
import numpy as np import numpy as np
import sympy as sp import sympy as sp
from sympy.codegen.ast import Assignment from sympy.codegen.ast import Assignment, AugmentedAssignment, AddAugmentedAssignment
from sympy.printing.latex import LatexPrinter from sympy.printing.latex import LatexPrinter
__all__ = ['Assignment', 'assignment_from_stencil'] __all__ = ['Assignment', 'AugmentedAssignment', 'AddAugmentedAssignment', 'assignment_from_stencil']
def print_assignment_latex(printer, expr): def print_assignment_latex(printer, expr):
binop = f"{expr.binop}=" if isinstance(expr, AugmentedAssignment) else ''
"""sympy cannot print Assignments as Latex. Thus, this function is added to the sympy Latex printer""" """sympy cannot print Assignments as Latex. Thus, this function is added to the sympy Latex printer"""
printed_lhs = printer.doprint(expr.lhs) printed_lhs = printer.doprint(expr.lhs)
printed_rhs = printer.doprint(expr.rhs) printed_rhs = printer.doprint(expr.rhs)
return fr"{printed_lhs} \leftarrow {printed_rhs}" return fr"{printed_lhs} \leftarrow_{{{binop}}} {printed_rhs}"
def assignment_str(assignment): def assignment_str(assignment):
return fr"{assignment.lhs}{assignment.rhs}" op = f"{assignment.binop}=" if isinstance(assignment, AugmentedAssignment) else ''
return fr"{assignment.lhs} {op} {assignment.rhs}"
_old_new = sp.codegen.ast.Assignment.__new__ _old_new = sp.codegen.ast.Assignment.__new__
...@@ -32,6 +34,9 @@ Assignment.__str__ = assignment_str ...@@ -32,6 +34,9 @@ Assignment.__str__ = assignment_str
Assignment.__new__ = _Assignment__new__ Assignment.__new__ = _Assignment__new__
LatexPrinter._print_Assignment = print_assignment_latex LatexPrinter._print_Assignment = print_assignment_latex
AugmentedAssignment.__str__ = assignment_str
LatexPrinter._print_AugmentedAssignment = print_assignment_latex
sp.MutableDenseMatrix.__hash__ = lambda self: hash(tuple(self)) sp.MutableDenseMatrix.__hash__ = lambda self: hash(tuple(self))
......
...@@ -5,12 +5,12 @@ from typing import Any, List, Optional, Sequence, Set, Union ...@@ -5,12 +5,12 @@ from typing import Any, List, Optional, Sequence, Set, Union
import sympy as sp import sympy as sp
import pystencils from pystencils.assignment import Assignment
from pystencils.typing.utilities import create_type, get_next_parent_of_type
from pystencils.enums import Target, Backend from pystencils.enums import Target, Backend
from pystencils.field import Field from pystencils.field import Field
from pystencils.typing.typed_sympy import FieldPointerSymbol, FieldShapeSymbol, FieldStrideSymbol, TypedSymbol
from pystencils.sympyextensions import fast_subs from pystencils.sympyextensions import fast_subs
from pystencils.typing import (create_type, get_next_parent_of_type,
FieldPointerSymbol, FieldShapeSymbol, FieldStrideSymbol, TypedSymbol, CFunction)
NodeOrExpr = Union['Node', sp.Expr] NodeOrExpr = Union['Node', sp.Expr]
...@@ -193,6 +193,10 @@ class KernelFunction(Node): ...@@ -193,6 +193,10 @@ class KernelFunction(Node):
# function that compiles the node to a Python callable, is set by the backends # function that compiles the node to a Python callable, is set by the backends
self._compile_function = compile_function self._compile_function = compile_function
self.assignments = assignments self.assignments = assignments
# If nontemporal stores are activated together with the Neon instruction set it results in cacheline zeroing
# For cacheline zeroing the information of the field size for each field is needed. Thus, in this case
# all field sizes are kernel parameters and not just the common field size used for the loops
self.use_all_written_field_sizes = False
@property @property
def target(self): def target(self):
...@@ -233,7 +237,8 @@ class KernelFunction(Node): ...@@ -233,7 +237,8 @@ class KernelFunction(Node):
@property @property
def fields_written(self) -> Set[Field]: def fields_written(self) -> Set[Field]:
assignments = self.atoms(SympyAssignment) assignments = self.atoms(SympyAssignment)
return {a.lhs.field for a in assignments if isinstance(a.lhs, ResolvedFieldAccess)} return set().union(itertools.chain.from_iterable([f.field for f in a.lhs.free_symbols if hasattr(f, 'field')]
for a in assignments))
@property @property
def fields_read(self) -> Set[Field]: def fields_read(self) -> Set[Field]:
...@@ -247,6 +252,11 @@ class KernelFunction(Node): ...@@ -247,6 +252,11 @@ class KernelFunction(Node):
This function is expensive, cache the result where possible! This function is expensive, cache the result where possible!
""" """
field_map = {f.name: f for f in self.fields_accessed} field_map = {f.name: f for f in self.fields_accessed}
sizes = set()
if self.use_all_written_field_sizes:
sizes = set().union(*(a.shape[:a.spatial_dimensions] for a in self.fields_written))
sizes = filter(lambda s: isinstance(s, FieldShapeSymbol), sizes)
def get_fields(symbol): def get_fields(symbol):
if hasattr(symbol, 'field_name'): if hasattr(symbol, 'field_name'):
...@@ -256,9 +266,13 @@ class KernelFunction(Node): ...@@ -256,9 +266,13 @@ class KernelFunction(Node):
return () return ()
argument_symbols = self._body.undefined_symbols - self.global_variables argument_symbols = self._body.undefined_symbols - self.global_variables
argument_symbols.update(sizes)
parameters = [self.Parameter(symbol, get_fields(symbol)) for symbol in argument_symbols] parameters = [self.Parameter(symbol, get_fields(symbol)) for symbol in argument_symbols]
if hasattr(self, 'indexing'): if hasattr(self, 'indexing'):
parameters += [self.Parameter(s, []) for s in self.indexing.symbolic_parameters()] parameters += [self.Parameter(s, []) for s in self.indexing.symbolic_parameters()]
# Exclude paramters of type CFunction. These parameters will result in a C function call that will be handled
# by including a respective header file in the compute kernel. Hence, it is not a free parameter.
parameters = [p for p in parameters if not isinstance(p.symbol, CFunction)]
parameters.sort(key=lambda p: p.symbol.name) parameters.sort(key=lambda p: p.symbol.name)
return parameters return parameters
...@@ -292,7 +306,7 @@ class SkipIteration(Node): ...@@ -292,7 +306,7 @@ class SkipIteration(Node):
class Block(Node): class Block(Node):
def __init__(self, nodes: List[Node]): def __init__(self, nodes: Union[Node, List[Node]]):
super(Block, self).__init__() super(Block, self).__init__()
if not isinstance(nodes, list): if not isinstance(nodes, list):
nodes = [nodes] nodes = [nodes]
...@@ -334,14 +348,6 @@ class Block(Node): ...@@ -334,14 +348,6 @@ class Block(Node):
assert self._nodes.count(insert_before) == 1 assert self._nodes.count(insert_before) == 1
idx = self._nodes.index(insert_before) idx = self._nodes.index(insert_before)
# move all assignment (definitions to the top)
if isinstance(new_node, SympyAssignment) and new_node.is_declaration:
while idx > 0:
pn = self._nodes[idx - 1]
if isinstance(pn, LoopOverCoordinate) or isinstance(pn, Conditional):
idx -= 1
else:
break
if not if_not_exists or self._nodes[idx] != new_node: if not if_not_exists or self._nodes[idx] != new_node:
self._nodes.insert(idx, new_node) self._nodes.insert(idx, new_node)
...@@ -350,14 +356,6 @@ class Block(Node): ...@@ -350,14 +356,6 @@ class Block(Node):
assert self._nodes.count(insert_after) == 1 assert self._nodes.count(insert_after) == 1
idx = self._nodes.index(insert_after) + 1 idx = self._nodes.index(insert_after) + 1
# move all assignment (definitions to the top)
if isinstance(new_node, SympyAssignment) and new_node.is_declaration:
while idx > 0:
pn = self._nodes[idx - 1]
if isinstance(pn, LoopOverCoordinate) or isinstance(pn, Conditional):
idx -= 1
else:
break
if not if_not_exists or not (self._nodes[idx - 1] == new_node if not if_not_exists or not (self._nodes[idx - 1] == new_node
or (idx < len(self._nodes) and self._nodes[idx] == new_node)): or (idx < len(self._nodes) and self._nodes[idx] == new_node)):
self._nodes.insert(idx, new_node) self._nodes.insert(idx, new_node)
...@@ -392,7 +390,7 @@ class Block(Node): ...@@ -392,7 +390,7 @@ class Block(Node):
def symbols_defined(self): def symbols_defined(self):
result = set() result = set()
for a in self.args: for a in self.args:
if isinstance(a, pystencils.Assignment): if isinstance(a, Assignment):
result.update(a.free_symbols) result.update(a.free_symbols)
else: else:
result.update(a.symbols_defined) result.update(a.symbols_defined)
...@@ -403,7 +401,7 @@ class Block(Node): ...@@ -403,7 +401,7 @@ class Block(Node):
result = set() result = set()
defined_symbols = set() defined_symbols = set()
for a in self.args: for a in self.args:
if isinstance(a, pystencils.Assignment): if isinstance(a, Assignment):
result.update(a.free_symbols) result.update(a.free_symbols)
defined_symbols.update({a.lhs}) defined_symbols.update({a.lhs})
else: else:
...@@ -433,7 +431,7 @@ class LoopOverCoordinate(Node): ...@@ -433,7 +431,7 @@ class LoopOverCoordinate(Node):
LOOP_COUNTER_NAME_PREFIX = "ctr" LOOP_COUNTER_NAME_PREFIX = "ctr"
BLOCK_LOOP_COUNTER_NAME_PREFIX = "_blockctr" BLOCK_LOOP_COUNTER_NAME_PREFIX = "_blockctr"
def __init__(self, body, coordinate_to_loop_over, start, stop, step=1, is_block_loop=False): def __init__(self, body, coordinate_to_loop_over, start, stop, step=1, is_block_loop=False, custom_loop_ctr=None):
super(LoopOverCoordinate, self).__init__(parent=None) super(LoopOverCoordinate, self).__init__(parent=None)
self.body = body self.body = body
body.parent = self body.parent = self
...@@ -444,11 +442,12 @@ class LoopOverCoordinate(Node): ...@@ -444,11 +442,12 @@ class LoopOverCoordinate(Node):
self.body.parent = self self.body.parent = self
self.prefix_lines = [] self.prefix_lines = []
self.is_block_loop = is_block_loop self.is_block_loop = is_block_loop
self.custom_loop_ctr = custom_loop_ctr
def new_loop_with_different_body(self, new_body): def new_loop_with_different_body(self, new_body):
result = LoopOverCoordinate(new_body, self.coordinate_to_loop_over, self.start, self.stop, result = LoopOverCoordinate(new_body, self.coordinate_to_loop_over, self.start, self.stop,
self.step, self.is_block_loop) self.step, self.is_block_loop, self.custom_loop_ctr)
result.prefix_lines = [l for l in self.prefix_lines] result.prefix_lines = [prefix_line for prefix_line in self.prefix_lines]
return result return result
def subs(self, subs_dict): def subs(self, subs_dict):
...@@ -510,10 +509,13 @@ class LoopOverCoordinate(Node): ...@@ -510,10 +509,13 @@ class LoopOverCoordinate(Node):
@property @property
def loop_counter_name(self): def loop_counter_name(self):
if self.is_block_loop: if self.custom_loop_ctr:
return LoopOverCoordinate.get_block_loop_counter_name(self.coordinate_to_loop_over) return self.custom_loop_ctr.name
else: else:
return LoopOverCoordinate.get_loop_counter_name(self.coordinate_to_loop_over) if self.is_block_loop:
return LoopOverCoordinate.get_block_loop_counter_name(self.coordinate_to_loop_over)
else:
return LoopOverCoordinate.get_loop_counter_name(self.coordinate_to_loop_over)
@staticmethod @staticmethod
def is_loop_counter_symbol(symbol): def is_loop_counter_symbol(symbol):
...@@ -537,10 +539,13 @@ class LoopOverCoordinate(Node): ...@@ -537,10 +539,13 @@ class LoopOverCoordinate(Node):
@property @property
def loop_counter_symbol(self): def loop_counter_symbol(self):
if self.is_block_loop: if self.custom_loop_ctr:
return self.get_block_loop_counter_symbol(self.coordinate_to_loop_over) return self.custom_loop_ctr
else: else:
return self.get_loop_counter_symbol(self.coordinate_to_loop_over) if self.is_block_loop:
return self.get_block_loop_counter_symbol(self.coordinate_to_loop_over)
else:
return self.get_loop_counter_symbol(self.coordinate_to_loop_over)
@property @property
def is_outermost_loop(self): def is_outermost_loop(self):
...@@ -566,10 +571,10 @@ class SympyAssignment(Node): ...@@ -566,10 +571,10 @@ class SympyAssignment(Node):
def __init__(self, lhs_symbol, rhs_expr, is_const=True, use_auto=False): def __init__(self, lhs_symbol, rhs_expr, is_const=True, use_auto=False):
super(SympyAssignment, self).__init__(parent=None) super(SympyAssignment, self).__init__(parent=None)
self._lhs_symbol = sp.sympify(lhs_symbol) self._lhs_symbol = sp.sympify(lhs_symbol)
self.rhs = sp.sympify(rhs_expr) self._rhs = sp.sympify(rhs_expr)
self._is_const = is_const self._is_const = is_const
self._is_declaration = self.__is_declaration() self._is_declaration = self.__is_declaration()
self.use_auto = use_auto self._use_auto = use_auto
def __is_declaration(self): def __is_declaration(self):
from pystencils.typing import CastFunc from pystencils.typing import CastFunc
...@@ -583,15 +588,28 @@ class SympyAssignment(Node): ...@@ -583,15 +588,28 @@ class SympyAssignment(Node):
def lhs(self): def lhs(self):
return self._lhs_symbol return self._lhs_symbol
@property
def rhs(self):
return self._rhs
@lhs.setter @lhs.setter
def lhs(self, new_value): def lhs(self, new_value):
self._lhs_symbol = new_value self._lhs_symbol = new_value
self._is_declaration = self.__is_declaration() self._is_declaration = self.__is_declaration()
@rhs.setter
def rhs(self, new_rhs_expr):
self._rhs = new_rhs_expr
def subs(self, subs_dict): def subs(self, subs_dict):
self.lhs = fast_subs(self.lhs, subs_dict) self.lhs = fast_subs(self.lhs, subs_dict)
self.rhs = fast_subs(self.rhs, subs_dict) self.rhs = fast_subs(self.rhs, subs_dict)
def fast_subs(self, subs_dict, skip=None):
self.lhs = fast_subs(self.lhs, subs_dict, skip)
self.rhs = fast_subs(self.rhs, subs_dict, skip)
return self
def optimize(self, optimizations): def optimize(self, optimizations):
try: try:
from sympy.codegen.rewriting import optimize from sympy.codegen.rewriting import optimize
...@@ -601,7 +619,7 @@ class SympyAssignment(Node): ...@@ -601,7 +619,7 @@ class SympyAssignment(Node):
@property @property
def args(self): def args(self):
return [self._lhs_symbol, self.rhs, sp.sympify(self._is_const)] return [self._lhs_symbol, self.rhs]
@property @property
def symbols_defined(self): def symbols_defined(self):
...@@ -619,7 +637,9 @@ class SympyAssignment(Node): ...@@ -619,7 +637,9 @@ class SympyAssignment(Node):
for i in range(len(symbol.offsets)): for i in range(len(symbol.offsets)):
loop_counters.add(LoopOverCoordinate.get_loop_counter_symbol(i)) loop_counters.add(LoopOverCoordinate.get_loop_counter_symbol(i))
result.update(loop_counters) result.update(loop_counters)
result.update(self._lhs_symbol.atoms(sp.Symbol)) result.update(self._lhs_symbol.atoms(sp.Symbol))
return result return result
@property @property
...@@ -630,6 +650,10 @@ class SympyAssignment(Node): ...@@ -630,6 +650,10 @@ class SympyAssignment(Node):
def is_const(self): def is_const(self):
return self._is_const return self._is_const
@property
def use_auto(self):
return self._use_auto
def replace(self, child, replacement): def replace(self, child, replacement):
if child == self.lhs: if child == self.lhs:
replacement.parent = self replacement.parent = self
...@@ -652,7 +676,7 @@ class SympyAssignment(Node): ...@@ -652,7 +676,7 @@ class SympyAssignment(Node):
return hash((self.lhs, self.rhs)) return hash((self.lhs, self.rhs))
def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) and (self.lhs, self.rhs) == (other.lhs, other.rhs) return type(self) is type(other) and (self.lhs, self.rhs) == (other.lhs, other.rhs)
class ResolvedFieldAccess(sp.Indexed): class ResolvedFieldAccess(sp.Indexed):
......