cbackend.py 29.2 KB
Newer Older
1
import re
Martin Bauer's avatar
Martin Bauer committed
2
from collections import namedtuple
3
from typing import Set
4

5
import numpy as np
6
7
import sympy as sp
from sympy.core import S
8
9
from sympy.logic.boolalg import BooleanFalse, BooleanTrue

10
from pystencils.astnodes import KernelFunction, Node
11
from pystencils.cpu.vectorization import vec_all, vec_any
Martin Bauer's avatar
Martin Bauer committed
12
from pystencils.data_types import (
Stephan Seitz's avatar
Stephan Seitz committed
13
14
    PointerType, VectorType, address_of, cast_func, create_type, get_type_of_expression,
    reinterpret_cast_func, vector_memory_access)
Martin Bauer's avatar
Martin Bauer committed
15
16
from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
from pystencils.integer_functions import (
Stephan Seitz's avatar
Stephan Seitz committed
17
18
    bit_shift_left, bit_shift_right, bitwise_and, bitwise_or, bitwise_xor,
    int_div, int_power_of_2, modulo_ceil)
19

Martin Bauer's avatar
Martin Bauer committed
20
21
try:
    from sympy.printing.ccode import C99CodePrinter as CCodePrinter
Martin Bauer's avatar
Martin Bauer committed
22
23
except ImportError:
    from sympy.printing.ccode import CCodePrinter  # for sympy versions < 1.1
Martin Bauer's avatar
Martin Bauer committed
24

25
__all__ = ['generate_c', 'CustomCodeNode', 'PrintNode', 'get_headers', 'CustomSympyPrinter']
26

27
28
29

HEADER_REGEX = re.compile(r'^[<"].*[">]$')

30
31
KERNCRAFT_NO_TERNARY_MODE = False

Martin Bauer's avatar
Fixes    
Martin Bauer committed
32

33
34
35
36
37
def generate_c(ast_node: Node,
               signature_only: bool = False,
               dialect='c',
               custom_backend=None,
               with_globals=True) -> str:
Martin Bauer's avatar
Martin Bauer committed
38
39
    """Prints an abstract syntax tree node as C or CUDA code.

Stephan Seitz's avatar
Stephan Seitz committed
40
41
42
    This function does not need to distinguish for most AST nodes between C, C++ or CUDA code, it just prints 'C-like'
    code as encoded in the abstract syntax tree (AST). The AST is built differently for C or CUDA by calling different
    create_kernel functions.
Martin Bauer's avatar
Martin Bauer committed
43
44
45
46

    Args:
        ast_node:
        signature_only:
47
        dialect: 'c' or 'cuda'
Martin Bauer's avatar
Martin Bauer committed
48
49
    Returns:
        C-like code for the ast node and its descendants
Martin Bauer's avatar
Martin Bauer committed
50
    """
51
52
53
54
55
56
    global_declarations = get_global_declarations(ast_node)
    for d in global_declarations:
        if hasattr(ast_node, "global_variables"):
            ast_node.global_variables.update(d.symbols_defined)
        else:
            ast_node.global_variables = d.symbols_defined
57
58
59
    if custom_backend:
        printer = custom_backend
    elif dialect == 'c':
60
61
62
63
        try:
            instruction_set = ast_node.instruction_set
        except Exception:
            instruction_set = None
64
        printer = CBackend(signature_only=signature_only,
65
                           vector_instruction_set=instruction_set)
66
67
68
    elif dialect == 'cuda':
        from pystencils.backends.cuda_backend import CudaBackend
        printer = CudaBackend(signature_only=signature_only)
Stephan Seitz's avatar
Stephan Seitz committed
69
    elif dialect == 'opencl':
70
71
        from pystencils.backends.opencl_backend import OpenClBackend
        printer = OpenClBackend(signature_only=signature_only)
72
    else:
Martin Bauer's avatar
Martin Bauer committed
73
        raise ValueError("Unknown dialect: " + str(dialect))
74
75
    code = printer(ast_node)
    if not signature_only and isinstance(ast_node, KernelFunction):
76
77
78
79
        if with_globals and global_declarations:
            code = "\n" + code
            for declaration in global_declarations:
                code = printer(declaration) + "\n" + code
80
81
82
83
84
85
86
87

    return code


def get_global_declarations(ast):
    global_declarations = []

    def visit_node(sub_ast):
88
        nonlocal global_declarations
89
90
91
92
93
94
95
96
97
        if hasattr(sub_ast, "required_global_declarations"):
            global_declarations += sub_ast.required_global_declarations

        if hasattr(sub_ast, "args"):
            for node in sub_ast.args:
                visit_node(node)

    visit_node(ast)

Stephan Seitz's avatar
Stephan Seitz committed
98
    return sorted(set(global_declarations), key=str)
99
100


Martin Bauer's avatar
Martin Bauer committed
101
102
def get_headers(ast_node: Node) -> Set[str]:
    """Return a set of header files, necessary to compile the printed C-like code."""
103
104
    headers = set()

Martin Bauer's avatar
Martin Bauer committed
105
106
107
    if isinstance(ast_node, KernelFunction) and ast_node.instruction_set:
        headers.update(ast_node.instruction_set['headers'])

Martin Bauer's avatar
Martin Bauer committed
108
109
110
    if hasattr(ast_node, 'headers'):
        headers.update(ast_node.headers)
    for a in ast_node.args:
111
        if isinstance(a, (sp.Expr, Node)):
Martin Bauer's avatar
Martin Bauer committed
112
            headers.update(get_headers(a))
113

114
115
116
117
    for g in get_global_declarations(ast_node):
        if isinstance(g, Node):
            headers.update(get_headers(g))

118
119
120
    for h in headers:
        assert HEADER_REGEX.match(h), f'header /{h}/ does not follow the pattern /"..."/ or /<...>/'

121
    return sorted(headers)
122
123


124
125
126
# --------------------------------------- Backend Specific Nodes -------------------------------------------------------


127
class CustomCodeNode(Node):
Martin Bauer's avatar
Martin Bauer committed
128
    def __init__(self, code, symbols_read, symbols_defined, parent=None):
129
        super(CustomCodeNode, self).__init__(parent=parent)
130
        self._code = "\n" + code
131
132
        self._symbols_read = set(symbols_read)
        self._symbols_defined = set(symbols_defined)
133
        self.headers = []
134

135
    def get_code(self, dialect, vector_instruction_set):
136
137
138
139
140
141
142
        return self._code

    @property
    def args(self):
        return []

    @property
Martin Bauer's avatar
Martin Bauer committed
143
    def symbols_defined(self):
144
        return self._symbols_defined
145
146

    @property
Martin Bauer's avatar
Martin Bauer committed
147
    def undefined_symbols(self):
148
        return self._symbols_read - self._symbols_defined
149

150
151
152
153
154
155
    def __eq___(self, other):
        return self._code == other._code

    def __hash__(self):
        return hash(self._code)

156

157
class PrintNode(CustomCodeNode):
Martin Bauer's avatar
Martin Bauer committed
158
159
    # noinspection SpellCheckingInspection
    def __init__(self, symbol_to_print):
160
        code = f'\nstd::cout << "{symbol_to_print.name}  =  " << {symbol_to_print.name} << std::endl; \n'
Martin Bauer's avatar
Martin Bauer committed
161
        super(PrintNode, self).__init__(code, symbols_read=[symbol_to_print], symbols_defined=set())
162
        self.headers.append("<iostream>")
163
164
165
166


# ------------------------------------------- Printer ------------------------------------------------------------------

167

Martin Bauer's avatar
Martin Bauer committed
168
169
# noinspection PyPep8Naming
class CBackend:
170

Martin Bauer's avatar
Martin Bauer committed
171
    def __init__(self, sympy_printer=None, signature_only=False, vector_instruction_set=None, dialect='c'):
Martin Bauer's avatar
Martin Bauer committed
172
173
        if sympy_printer is None:
            if vector_instruction_set is not None:
174
                self.sympy_printer = VectorizedCustomSympyPrinter(vector_instruction_set)
175
            else:
176
                self.sympy_printer = CustomSympyPrinter()
177
        else:
Martin Bauer's avatar
Martin Bauer committed
178
            self.sympy_printer = sympy_printer
179

180
        self._vector_instruction_set = vector_instruction_set
181
        self._indent = "   "
182
        self._dialect = dialect
Martin Bauer's avatar
Martin Bauer committed
183
        self._signatureOnly = signature_only
184
185

    def __call__(self, node):
Martin Bauer's avatar
Martin Bauer committed
186
        prev_is = VectorType.instruction_set
187
        VectorType.instruction_set = self._vector_instruction_set
188
        result = str(self._print(node))
Martin Bauer's avatar
Martin Bauer committed
189
        VectorType.instruction_set = prev_is
190
        return result
191
192

    def _print(self, node):
Stephan Seitz's avatar
Stephan Seitz committed
193
194
        if isinstance(node, str):
            return node
195
        for cls in type(node).__mro__:
Martin Bauer's avatar
Martin Bauer committed
196
197
198
            method_name = "_print_" + cls.__name__
            if hasattr(self, method_name):
                return getattr(self, method_name)(node)
199
        raise NotImplementedError(self.__class__.__name__ + " does not support node of type " + node.__class__.__name__)
200

201
202
203
    def _print_Type(self, node):
        return str(node)

204
    def _print_KernelFunction(self, node):
205
        function_arguments = [f"{self._print(s.symbol.dtype)} {s.symbol.name}" for s in node.get_parameters()]
206
        launch_bounds = ""
207
        if self._dialect == 'cuda':
208
209
            max_threads = node.indexing.max_threads_per_block()
            if max_threads:
210
                launch_bounds = f"__launch_bounds__({max_threads}) "
211
212
        func_declaration = "FUNC_PREFIX %svoid %s(%s)" % (launch_bounds, node.function_name,
                                                          ", ".join(function_arguments))
213
        if self._signatureOnly:
Martin Bauer's avatar
Martin Bauer committed
214
            return func_declaration
215

216
        body = self._print(node.body)
Martin Bauer's avatar
Martin Bauer committed
217
        return func_declaration + "\n" + body
218
219

    def _print_Block(self, node):
Martin Bauer's avatar
Martin Bauer committed
220
221
        block_contents = "\n".join([self._print(child) for child in node.args])
        return "{\n%s\n}" % (self._indent + self._indent.join(block_contents.splitlines(True)))
222
223

    def _print_PragmaBlock(self, node):
224
        return f"{node.pragma_line}\n{self._print_Block(node)}"
225
226

    def _print_LoopOverCoordinate(self, node):
Martin Bauer's avatar
Martin Bauer committed
227
        counter_symbol = node.loop_counter_name
228
229
230
231
        start = f"int {counter_symbol} = {self.sympy_printer.doprint(node.start)}"
        condition = f"{counter_symbol} < {self.sympy_printer.doprint(node.stop)}"
        update = f"{counter_symbol} += {self.sympy_printer.doprint(node.step)}"
        loop_str = f"for ({start}; {condition}; {update})"
232

Martin Bauer's avatar
Martin Bauer committed
233
        prefix = "\n".join(node.prefix_lines)
234
235
        if prefix:
            prefix += "\n"
236
        return f"{prefix}{loop_str}\n{self._print(node.body)}"
237
238

    def _print_SympyAssignment(self, node):
Martin Bauer's avatar
Martin Bauer committed
239
        if node.is_declaration:
240
241
            if node.use_auto:
                data_type = 'auto '
Stephan Seitz's avatar
Lint    
Stephan Seitz committed
242
            else:
243
244
245
246
247
248
                if node.is_const:
                    prefix = 'const '
                else:
                    prefix = ''
                data_type = prefix + self._print(node.lhs.dtype).replace(' const', '') + " "

249
250
            return "%s%s = %s;" % (data_type,
                                   self.sympy_printer.doprint(node.lhs),
251
                                   self.sympy_printer.doprint(node.rhs))
252
        else:
Martin Bauer's avatar
Martin Bauer committed
253
            lhs_type = get_type_of_expression(node.lhs)
Martin Bauer's avatar
Martin Bauer committed
254
            printed_mask = ""
Martin Bauer's avatar
Martin Bauer committed
255
            if type(lhs_type) is VectorType and isinstance(node.lhs, cast_func):
Martin Bauer's avatar
Martin Bauer committed
256
                arg, data_type, aligned, nontemporal, mask = node.lhs.args
Martin Bauer's avatar
Martin Bauer committed
257
258
259
                instr = 'storeU'
                if aligned:
                    instr = 'stream' if nontemporal else 'storeA'
Martin Bauer's avatar
Martin Bauer committed
260
                if mask != True:  # NOQA
Martin Bauer's avatar
Martin Bauer committed
261
262
263
                    instr = 'maskStore' if aligned else 'maskStoreU'
                    printed_mask = self.sympy_printer.doprint(mask)
                    if self._vector_instruction_set['dataTypePrefix']['double'] == '__mm256d':
264
                        printed_mask = f"_mm256_castpd_si256({printed_mask})"
Martin Bauer's avatar
Martin Bauer committed
265

266
267
268
269
270
271
                rhs_type = get_type_of_expression(node.rhs)
                if type(rhs_type) is not VectorType:
                    rhs = cast_func(node.rhs, VectorType(rhs_type))
                else:
                    rhs = node.rhs

272
                return self._vector_instruction_set[instr].format("&" + self.sympy_printer.doprint(node.lhs.args[0]),
Martin Bauer's avatar
Martin Bauer committed
273
274
                                                                  self.sympy_printer.doprint(rhs),
                                                                  printed_mask) + ';'
275
            else:
276
                return f"{self.sympy_printer.doprint(node.lhs)} = {self.sympy_printer.doprint(node.rhs)};"
277
278

    def _print_TemporaryMemoryAllocation(self, node):
279
        align = 64
Martin Bauer's avatar
Martin Bauer committed
280
281
282
283
284
285
        np_dtype = node.symbol.dtype.base_type.numpy_dtype
        required_size = np_dtype.itemsize * node.size + align
        size = modulo_ceil(required_size, align)
        code = "{dtype} {name}=({dtype})aligned_alloc({align}, {size}) + {offset};"
        return code.format(dtype=node.symbol.dtype,
                           name=self.sympy_printer.doprint(node.symbol.name),
286
                           size=self.sympy_printer.doprint(size),
Martin Bauer's avatar
Martin Bauer committed
287
288
                           offset=int(node.offset(align)),
                           align=align)
289
290

    def _print_TemporaryMemoryFree(self, node):
291
        align = 64
Martin Bauer's avatar
Martin Bauer committed
292
        return "free(%s - %d);" % (self.sympy_printer.doprint(node.symbol.name), node.offset(align))
293

Martin Bauer's avatar
Martin Bauer committed
294
    def _print_SkipIteration(self, _):
295
        return "continue;"
Martin Bauer's avatar
Martin Bauer committed
296

297
298
    def _print_CustomCodeNode(self, node):
        return node.get_code(self._dialect, self._vector_instruction_set)
299

300
    def _print_SourceCodeComment(self, node):
301
        return f"/* {node.text } */"
302
303
304
305

    def _print_EmptyLine(self, node):
        return ""

306
    def _print_Conditional(self, node):
307
        if type(node.condition_expr) is BooleanTrue:
308
            return self._print_Block(node.true_block)
309
        elif type(node.condition_expr) is BooleanFalse:
310
            return self._print_Block(node.false_block)
311
312
313
        cond_type = get_type_of_expression(node.condition_expr)
        if isinstance(cond_type, VectorType):
            raise ValueError("Problem with Conditional inside vectorized loop - use vec_any or vec_all")
Martin Bauer's avatar
Martin Bauer committed
314
315
        condition_expr = self.sympy_printer.doprint(node.condition_expr)
        true_block = self._print_Block(node.true_block)
316
        result = f"if ({condition_expr})\n{true_block} "
Martin Bauer's avatar
Martin Bauer committed
317
318
        if node.false_block:
            false_block = self._print_Block(node.false_block)
319
            result += f"else {false_block}"
320
321
        return result

322
323
324
325

# ------------------------------------------ Helper function & classes -------------------------------------------------


Martin Bauer's avatar
Martin Bauer committed
326
# noinspection PyPep8Naming
327
class CustomSympyPrinter(CCodePrinter):
Martin Bauer's avatar
Martin Bauer committed
328

329
    def __init__(self):
Martin Bauer's avatar
Martin Bauer committed
330
        super(CustomSympyPrinter, self).__init__()
331
        self._float_type = create_type("float32")
Martin Bauer's avatar
Martin Bauer committed
332

333
334
    def _print_Pow(self, expr):
        """Don't use std::pow function, for small integer exponents, write as multiplication"""
Martin Bauer's avatar
Martin Bauer committed
335
336
337
        if not expr.free_symbols:
            return self._typed_number(expr.evalf(), get_type_of_expression(expr))

338
        if expr.exp.is_integer and expr.exp.is_number and 0 < expr.exp < 8:
339
            return f"({self._print(sp.Mul(*[expr.base] * expr.exp, evaluate=False))})"
340
        elif expr.exp.is_integer and expr.exp.is_number and - 8 < expr.exp < 0:
341
            return f"1 / ({self._print(sp.Mul(*([expr.base] * -expr.exp), evaluate=False))})"
342
343
344
345
346
        else:
            return super(CustomSympyPrinter, self)._print_Pow(expr)

    def _print_Rational(self, expr):
        """Evaluate all rationals i.e. print 0.25 instead of 1.0/4.0"""
Martin Bauer's avatar
Martin Bauer committed
347
348
        res = str(expr.evalf().num)
        return res
349
350
351
352
353
354
355
356

    def _print_Equality(self, expr):
        """Equality operator is not printable in default printer"""
        return '((' + self._print(expr.lhs) + ") == (" + self._print(expr.rhs) + '))'

    def _print_Piecewise(self, expr):
        """Print piecewise in one line (remove newlines)"""
        result = super(CustomSympyPrinter, self)._print_Piecewise(expr)
Martin Bauer's avatar
Martin Bauer committed
357
358
        return result.replace("\n", "")

359
    def _print_Abs(self, expr):
360
        if expr.args[0].is_integer:
361
            return f'abs({self._print(expr.args[0])})'
362
        else:
363
            return f'fabs({self._print(expr.args[0])})'
364

365
366
367
    def _print_Type(self, node):
        return str(node)

368
    def _print_Function(self, expr):
369
        infix_functions = {
Martin Bauer's avatar
Martin Bauer committed
370
371
372
373
374
            bitwise_xor: '^',
            bit_shift_right: '>>',
            bit_shift_left: '<<',
            bitwise_or: '|',
            bitwise_and: '&',
Martin Bauer's avatar
Martin Bauer committed
375
        }
Martin Bauer's avatar
Martin Bauer committed
376
377
        if hasattr(expr, 'to_c'):
            return expr.to_c(self._print)
378
379
        if isinstance(expr, reinterpret_cast_func):
            arg, data_type = expr.args
380
            return f"*(({self._print(PointerType(data_type, restrict=False))})(& {self._print(arg)}))"
381
382
        elif isinstance(expr, address_of):
            assert len(expr.args) == 1, "address_of must only have one argument"
383
            return f"&({self._print(expr.args[0])})"
384
        elif isinstance(expr, cast_func):
Martin Bauer's avatar
Martin Bauer committed
385
            arg, data_type = expr.args
386
            if isinstance(arg, sp.Number) and arg.is_finite:
387
388
                return self._typed_number(arg, data_type)
            else:
389
                return f"(({data_type})({self._print(arg)}))"
390
        elif isinstance(expr, fast_division):
391
            return f"({self._print(expr.args[0] / expr.args[1])})"
392
        elif isinstance(expr, fast_sqrt):
393
            return f"({self._print(sp.sqrt(expr.args[0]))})"
394
395
        elif isinstance(expr, vec_any) or isinstance(expr, vec_all):
            return self._print(expr.args[0])
396
        elif isinstance(expr, fast_inv_sqrt):
397
            return f"({self._print(1 / sp.sqrt(expr.args[0]))})"
Michael Kuron's avatar
Michael Kuron committed
398
        elif isinstance(expr, sp.Abs):
399
            return f"abs({self._print(expr.args[0])})"
400
401
        elif isinstance(expr, sp.Max):
            return self._print(expr)
Michael Kuron's avatar
Michael Kuron committed
402
        elif isinstance(expr, sp.Mod):
403
            if expr.args[0].is_integer and expr.args[1].is_integer:
404
                return f"({self._print(expr.args[0])} % {self._print(expr.args[1])})"
Michael Kuron's avatar
Michael Kuron committed
405
            else:
406
                return f"fmod({self._print(expr.args[0])}, {self._print(expr.args[1])})"
407
        elif expr.func in infix_functions:
408
            return f"({self._print(expr.args[0])} {infix_functions[expr.func]} {self._print(expr.args[1])})"
409
        elif expr.func == int_power_of_2:
410
            return f"(1 << ({self._print(expr.args[0])}))"
411
        elif expr.func == int_div:
412
            return f"(({self._print(expr.args[0])}) / ({self._print(expr.args[1])}))"
413
        else:
414
            name = expr.name if hasattr(expr, 'name') else expr.__class__.__name__
415
            arg_str = ', '.join(self._print(a) for a in expr.args)
416
            return f'{name}({arg_str})'
Martin Bauer's avatar
Martin Bauer committed
417

418
419
    def _typed_number(self, number, dtype):
        res = self._print(number)
420
421
422
423
        if dtype.numpy_dtype == np.float32:
            return res + '.0f' if '.' not in res else res + 'f'
        elif dtype.numpy_dtype == np.float64:
            return res + '.0' if '.' not in res else res
424
425
        else:
            return res
426

Stephan Seitz's avatar
Stephan Seitz committed
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
    def _print_Sum(self, expr):
        template = """[&]() {{
    {dtype} sum = ({dtype}) 0;
    for ( {iterator_dtype} {var} = {start}; {condition}; {var} += {increment} ) {{
        sum += {expr};
    }}
    return sum;
}}()"""
        var = expr.limits[0][0]
        start = expr.limits[0][1]
        end = expr.limits[0][2]
        code = template.format(
            dtype=get_type_of_expression(expr.args[0]),
            iterator_dtype='int',
            var=self._print(var),
            start=self._print(start),
            end=self._print(end),
            expr=self._print(expr.function),
            increment=str(1),
            condition=self._print(var) + ' <= ' + self._print(end)  # if start < end else '>='
        )
        return code

    def _print_Product(self, expr):
        template = """[&]() {{
    {dtype} product = ({dtype}) 1;
    for ( {iterator_dtype} {var} = {start}; {condition}; {var} += {increment} ) {{
        product *= {expr};
    }}
    return product;
}}()"""
        var = expr.limits[0][0]
        start = expr.limits[0][1]
        end = expr.limits[0][2]
        code = template.format(
            dtype=get_type_of_expression(expr.args[0]),
            iterator_dtype='int',
            var=self._print(var),
            start=self._print(start),
            end=self._print(end),
            expr=self._print(expr.function),
            increment=str(1),
            condition=self._print(var) + ' <= ' + self._print(end)  # if start < end else '>='
        )
        return code
472

473
474
475
    def _print_ConditionalFieldAccess(self, node):
        return self._print(sp.Piecewise((node.outofbounds_value, node.outofbounds_condition), (node.access, True)))

476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
    def _print_Max(self, expr):
        def inner_print_max(args):
            if len(args) == 1:
                return self._print(args[0])
            half = len(args) // 2
            a = inner_print_max(args[:half])
            b = inner_print_max(args[half:])
            return f"(({a} > {b}) ? {a} : {b})"
        return inner_print_max(expr.args)

    def _print_Min(self, expr):
        def inner_print_min(args):
            if len(args) == 1:
                return self._print(args[0])
            half = len(args) // 2
            a = inner_print_min(args[:half])
            b = inner_print_min(args[half:])
            return f"(({a} < {b}) ? {a} : {b})"
        return inner_print_min(expr.args)
495

496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
    def _print_re(self, expr):
        return f"real({self._print(expr.args[0])})"

    def _print_im(self, expr):
        return f"imag({self._print(expr.args[0])})"

    def _print_ImaginaryUnit(self, expr):
        return "complex<double>{0,1}"

    def _print_TypedImaginaryUnit(self, expr):
        if expr.dtype.numpy_dtype == np.complex64:
            return "complex<float>{0,1}"
        elif expr.dtype.numpy_dtype == np.complex128:
            return "complex<double>{0,1}"
        else:
            raise NotImplementedError(
                "only complex64 and complex128 supported")

    def _print_Complex(self, expr):
        return self._typed_number(expr, np.complex64)

517

Martin Bauer's avatar
Martin Bauer committed
518
# noinspection PyPep8Naming
519
520
521
class VectorizedCustomSympyPrinter(CustomSympyPrinter):
    SummandInfo = namedtuple("SummandInfo", ['sign', 'term'])

522
523
    def __init__(self, instruction_set):
        super(VectorizedCustomSympyPrinter, self).__init__()
Martin Bauer's avatar
Martin Bauer committed
524
        self.instruction_set = instruction_set
525

Martin Bauer's avatar
Martin Bauer committed
526
527
528
529
    def _scalarFallback(self, func_name, expr, *args, **kwargs):
        expr_type = get_type_of_expression(expr)
        if type(expr_type) is not VectorType:
            return getattr(super(VectorizedCustomSympyPrinter, self), func_name)(expr, *args, **kwargs)
530
        else:
Martin Bauer's avatar
Martin Bauer committed
531
            assert self.instruction_set['width'] == expr_type.width
532
533
            return None

534
    def _print_Function(self, expr):
535
        if isinstance(expr, vector_memory_access):
Martin Bauer's avatar
Martin Bauer committed
536
            arg, data_type, aligned, _, mask = expr.args
Martin Bauer's avatar
Martin Bauer committed
537
538
            instruction = self.instruction_set['loadA'] if aligned else self.instruction_set['loadU']
            return instruction.format("& " + self._print(arg))
539
        elif isinstance(expr, cast_func):
Martin Bauer's avatar
Martin Bauer committed
540
541
            arg, data_type = expr.args
            if type(data_type) is VectorType:
542
543
544
545
546
547
548
549
550
                if isinstance(arg, sp.Tuple):
                    is_boolean = get_type_of_expression(arg[0]) == create_type("bool")
                    printed_args = [self._print(a) for a in arg]
                    instruction = 'makeVecBool' if is_boolean else 'makeVec'
                    return self.instruction_set[instruction].format(*printed_args)
                else:
                    is_boolean = get_type_of_expression(arg) == create_type("bool")
                    instruction = 'makeVecConstBool' if is_boolean else 'makeVecConst'
                    return self.instruction_set[instruction].format(self._print(arg))
551
        elif expr.func == fast_division:
552
553
            result = self._scalarFallback('_print_Function', expr)
            if not result:
554
555
                result = self.instruction_set['/'].format(self._print(expr.args[0]), self._print(expr.args[1]))
            return result
556
        elif expr.func == fast_sqrt:
557
            return f"({self._print(sp.sqrt(expr.args[0]))})"
558
        elif expr.func == fast_inv_sqrt:
559
560
561
562
563
            result = self._scalarFallback('_print_Function', expr)
            if not result:
                if self.instruction_set['rsqrt']:
                    return self.instruction_set['rsqrt'].format(self._print(expr.args[0]))
                else:
564
                    return f"({self._print(1 / sp.sqrt(expr.args[0]))})"
565
566
567
568
569
570
571
572
573
574
575
576
577
        elif isinstance(expr, vec_any):
            expr_type = get_type_of_expression(expr.args[0])
            if type(expr_type) is not VectorType:
                return self._print(expr.args[0])
            else:
                return self.instruction_set['any'].format(self._print(expr.args[0]))
        elif isinstance(expr, vec_all):
            expr_type = get_type_of_expression(expr.args[0])
            if type(expr_type) is not VectorType:
                return self._print(expr.args[0])
            else:
                return self.instruction_set['all'].format(self._print(expr.args[0]))

578
579
        return super(VectorizedCustomSympyPrinter, self)._print_Function(expr)

580
581
582
583
584
    def _print_And(self, expr):
        result = self._scalarFallback('_print_And', expr)
        if result:
            return result

Martin Bauer's avatar
Martin Bauer committed
585
586
587
588
        arg_strings = [self._print(a) for a in expr.args]
        assert len(arg_strings) > 0
        result = arg_strings[0]
        for item in arg_strings[1:]:
Martin Bauer's avatar
Martin Bauer committed
589
            result = self.instruction_set['&'].format(result, item)
590
591
592
593
594
595
596
        return result

    def _print_Or(self, expr):
        result = self._scalarFallback('_print_Or', expr)
        if result:
            return result

Martin Bauer's avatar
Martin Bauer committed
597
598
599
600
        arg_strings = [self._print(a) for a in expr.args]
        assert len(arg_strings) > 0
        result = arg_strings[0]
        for item in arg_strings[1:]:
Martin Bauer's avatar
Martin Bauer committed
601
            result = self.instruction_set['|'].format(result, item)
602
603
        return result

604
    def _print_Add(self, expr, order=None):
605
606
607
        result = self._scalarFallback('_print_Add', expr)
        if result:
            return result
608
609
610
611

        summands = []
        for term in expr.args:
            if term.func == sp.Mul:
Martin Bauer's avatar
Martin Bauer committed
612
                sign, t = self._print_Mul(term, inside_add=True)
613
614
615
616
617
618
619
620
621
622
623
624
625
            else:
                t = self._print(term)
                sign = 1
            summands.append(self.SummandInfo(sign, t))
        # Use positive terms first
        summands.sort(key=lambda e: e.sign, reverse=True)
        # if no positive term exists, prepend a zero
        if summands[0].sign == -1:
            summands.insert(0, self.SummandInfo(1, "0"))

        assert len(summands) >= 2
        processed = summands[0].term
        for summand in summands[1:]:
Martin Bauer's avatar
Martin Bauer committed
626
            func = self.instruction_set['-'] if summand.sign == -1 else self.instruction_set['+']
627
628
629
            processed = func.format(processed, summand.term)
        return processed

630
    def _print_Pow(self, expr):
631
632
633
        result = self._scalarFallback('_print_Pow', expr)
        if result:
            return result
634

635
        one = self.instruction_set['makeVecConst'].format(1.0)
636

637
638
        if expr.exp.is_integer and expr.exp.is_number and 0 < expr.exp < 8:
            return "(" + self._print(sp.Mul(*[expr.base] * expr.exp, evaluate=False)) + ")"
639
        elif expr.exp == -1:
640
            one = self.instruction_set['makeVecConst'].format(1.0)
641
642
643
            return self.instruction_set['/'].format(one, self._print(expr.base))
        elif expr.exp == 0.5:
            return self.instruction_set['sqrt'].format(self._print(expr.base))
644
645
646
        elif expr.exp == -0.5:
            root = self.instruction_set['sqrt'].format(self._print(expr.base))
            return self.instruction_set['/'].format(one, root)
647
648
649
        elif expr.exp.is_integer and expr.exp.is_number and - 8 < expr.exp < 0:
            return self.instruction_set['/'].format(one,
                                                    self._print(sp.Mul(*[expr.base] * (-expr.exp), evaluate=False)))
650
        else:
651
            raise ValueError("Generic exponential not supported: " + str(expr))
652

Martin Bauer's avatar
Martin Bauer committed
653
654
655
656
    def _print_Mul(self, expr, inside_add=False):
        # noinspection PyProtectedMember
        from sympy.core.mul import _keep_coeff

657
658
659
        result = self._scalarFallback('_print_Mul', expr)
        if result:
            return result
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687

        c, e = expr.as_coeff_Mul()
        if c < 0:
            expr = _keep_coeff(-c, e)
            sign = -1
        else:
            sign = 1

        a = []  # items in the numerator
        b = []  # items that are in the denominator (if any)

        # Gather args for numerator/denominator
        for item in expr.as_ordered_factors():
            if item.is_commutative and item.is_Pow and item.exp.is_Rational and item.exp.is_negative:
                if item.exp != -1:
                    b.append(sp.Pow(item.base, -item.exp, evaluate=False))
                else:
                    b.append(sp.Pow(item.base, -item.exp))
            else:
                a.append(item)

        a = a or [S.One]

        a_str = [self._print(x) for x in a]
        b_str = [self._print(x) for x in b]

        result = a_str[0]
        for item in a_str[1:]:
Martin Bauer's avatar
Martin Bauer committed
688
            result = self.instruction_set['*'].format(result, item)
689
690
691
692

        if len(b) > 0:
            denominator_str = b_str[0]
            for item in b_str[1:]:
Martin Bauer's avatar
Martin Bauer committed
693
694
                denominator_str = self.instruction_set['*'].format(denominator_str, item)
            result = self.instruction_set['/'].format(result, denominator_str)
695

Martin Bauer's avatar
Martin Bauer committed
696
        if inside_add:
697
698
699
            return sign, result
        else:
            if sign < 0:
Martin Bauer's avatar
Martin Bauer committed
700
                return self.instruction_set['*'].format(self._print(S.NegativeOne), result)
701
702
703
            else:
                return result

704
    def _print_Relational(self, expr):
705
706
707
        result = self._scalarFallback('_print_Relational', expr)
        if result:
            return result
Martin Bauer's avatar
Martin Bauer committed
708
        return self.instruction_set[expr.rel_op].format(self._print(expr.lhs), self._print(expr.rhs))
709
710

    def _print_Equality(self, expr):
711
712
713
        result = self._scalarFallback('_print_Equality', expr)
        if result:
            return result
Martin Bauer's avatar
Martin Bauer committed
714
        return self.instruction_set['=='].format(self._print(expr.lhs), self._print(expr.rhs))
715
716

    def _print_Piecewise(self, expr):
717
718
719
        result = self._scalarFallback('_print_Piecewise', expr)
        if result:
            return result
720

Martin Bauer's avatar
Martin Bauer committed
721
        if expr.args[-1].cond.args[0] is not sp.sympify(True):
722
723
724
725
726
727
728
729
730
            # We need the last conditional to be a True, otherwise the resulting
            # function may not return a result.
            raise ValueError("All Piecewise expressions must contain an "
                             "(expr, True) statement to be used as a default "
                             "condition. Without one, the generated "
                             "expression may not evaluate to anything under "
                             "some condition.")

        result = self._print(expr.args[-1][0])
Martin Bauer's avatar
Martin Bauer committed
731
        for true_expr, condition in reversed(expr.args[:-1]):
732
            if isinstance(condition, cast_func) and get_type_of_expression(condition.args[0]) == create_type("bool"):
733
734
735
736
737
                if not KERNCRAFT_NO_TERNARY_MODE:
                    result = "(({}) ? ({}) : ({}))".format(self._print(condition.args[0]), self._print(true_expr),
                                                           result)
                else:
                    print("Warning - skipping ternary op")
738
739
740
            else:
                # noinspection SpellCheckingInspection
                result = self.instruction_set['blendv'].format(result, self._print(true_expr), self._print(condition))
741
        return result