diff --git a/CHANGELOG.md b/CHANGELOG.md index 6435199777e8e7ddc29b034a546e15b4fb471e67..09aba46af66e2afe07472a283433dc8e92b35d02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,3 +4,4 @@ ### Removed * LLVM backend because it was not used much and not good integrated in pystencils. +* OpenCL backend because it was not used much and not good integrated in pystencils. diff --git a/README.md b/README.md index 0b09df94a64b3366749dac24b1f533de1402523d..62105e71da702e228e113205e6a3337f5d7dc413 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,6 @@ Without `[interactive]` you get a minimal version with very little dependencies. All options: - `gpu`: use this if an NVIDIA GPU is available and CUDA is installed -- `opencl`: basic OpenCL support (experimental) - `alltrafos`: pulls in additional dependencies for loop simplification e.g. libisl - `bench_db`: functionality to store benchmark result in object databases - `interactive`: installs dependencies to work in Jupyter including image I/O, plotting etc. diff --git a/doc/notebooks/02_tutorial_basic_kernels.ipynb b/doc/notebooks/02_tutorial_basic_kernels.ipynb index d5541ea0c619d9ffd746f1b638f07ab0052e6f9a..413572375e075500898e146cc4c46a5dbb058f8c 100644 --- a/doc/notebooks/02_tutorial_basic_kernels.ipynb +++ b/doc/notebooks/02_tutorial_basic_kernels.ipynb @@ -46,8 +46,9 @@ "outputs": [ { "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxkAAAAnCAYAAABje4W/AAAACXBIWXMAAA7EAAAOxAGVKw4bAAAULUlEQVR4Ae2d67XdtBLHd846BSShggsd5EIFhA4CqSDQASw+hW8s6CBQQRI6gFtBAh1ABwmng9z/z8fykbXtbUmW36O1vPXw6PUfeTyjh/e958+fPzmdTq91+e6bH3744Rc/wcLTISCsf1Lp7+oaPpH/ndJuiMt/JO+prkcKf6HrY4W/08X9vxU/45PS/PKge6u0v+SbCxAIsDLsA3wsul0ESo7toCyTKReGRYCVyZQLWNktQ8AQ2AcCknsv1JOvg948uKoTfhPBPe86U1yDjBYthIAw/11FvZT/M5fCGHwYCc49VTpGxUk+TMQA/Ka+6dNx/2Ndf+veG/muvP8q3qKr8x7eE0aG/eFHwT4BKDW2VY7JlIQhUgp3qjTsE4A3UkPAEFgUAckrdNPGjlBjqolyZ2Qs2rijVs5LRH1/LN9fZcCg+BNM6vsYDbiHuqCtDA6F7+sKjUGUZgzG3+Q795UCpLecaJ7U5bfSS0VU9relypqinLrvu8Q+B6+18yunT0fNU3hsm0yJHEiFcafW1WAfCUGLzGRKCw6LGAKHROD6kL1eWacljHmZsILxh8JfeM3DkHhVx9k25VYwTqJrwtxXnFUOjJYfiTun9Acu7HylsaT1j/x/vDTqwiDBGmX1Y9CJzq2QsNXrI8WdAXRSmJWUF7pa7RwsdGYCtW9x7HO6rHYfkl85WB01z9ixrfxjZUqvfOjjyaVxrXsmU+LleTL2fTy5lL4Hfl3qn90zBAyBcQhcjctuuccgIAGNks8Wqce6eKFzxsK9HE4K/6XrRhf3cc7guI21fzEQMFJu2sntmO6jnHK24w93R2EMGAwP7nENOuWB/p18zo/Qh9/lo7D77rXSmv74N5YOq12rwD4HB7X9cPzKwemoeQqO7TEyJUY+tFgUOa5NpnioCbMueZ6MvVdkdHDr/IruqBEaAoZANgJmZGRDVyajBDVK+j2VxgoGiv+39YvDr+BLRVh5uPETXbim52UTKvmOxPdR+jFoGqf8GDMYCs3KRnOzP0A5vqFCmO1HrKZUTmGXFmW4uHxz+Wrf4tjn9FXtPiS/crA6ap6xY1v5eWazZYryDsqHkDcx41o0JlPawJ3J8xzs20XGxfbAr7ieGpUhYAjkImBGRi5yI/NJQP+ky523OCkcbpXya2Alwz9n4d8j702d4PzwvlsJIR1DoDEOWoSREeXHkEABCY0S6meW3XeVUuAnLB1W+zeLfQ52W+dXTp+PmqfU2FY5TpY4vwWp7vfKlMTx1io3MmIy5Q6oljyfAfu7muNDq+NXfNON0hAwBMYgYEbGGPTG5eUlHa4o8DnhX7wX/Kl+aaDUD61ScAicFY+WU35mut6TWJfVqTS0Mg1H7veQUM/D4N4bxf1zJsHtRaJbxj4HsK3zK6fPR81TcmznypSU8ZbDJ5MpQq1Hnk+N/V74ldMPy2MIGAKJCFwn0ht5OQSeqShmodxZCErm/EV4UBoDg61SF1cfyKeLGXqMCrdCwgvnR6XdyMdRVmVwVLHyPxgY4UuOuql3TW6P2OfguxV+5fTtqHmKje0JZErXeMvhk8mUW9RS5Hkp7PfCr5x+WB5DwBBIRMCMjETASpHrBc5na7kuOtFhXPCHToNOtM3XnXqIMQB4QY91fWVQfriFakqjJqsfa8JebXmkTmAYDjmMyBDboTzu/qb55Tph/jACpce2ysuRKSnjbbhT5xQmU24xQd6GWIdxhx60vfJjYjm0On45UMw3BAyBaRHYnJFRC8NfBQuzOK8UD2f+p0Vs26XzkuFlM8oJc3cIHR6EhlIYh6b35TaqIdvK3Im9sASvSbeTGb+GB4owwtgzuTIMVUhxNq4Tx1tYXkzcZMotSsWwn1gOLcYve65jHqd90hjv8/laErur/GYskxNhqIv/cUBZHjqnsEwjV1or2KlpCPwkp3z3dYWz7fwfB/u/K6f7hPkjwNCgoD63feuW+IC/wiUL+xyoVJfxKxE4+KPL5EoGbsrSJVMuyoeeMRpbu8kUIcWYXQD7WB75dIvxC4x0jXqulR95yuflu8a5308LrwgB8Ws071fUnVmbUhK7q1lbXqgyAeCU24vnFApVt7diWIVoCUviujAivtdVhRX3/7Eb+q+VxsH0yinMJ2/5Az7OgUDLoXP2g4eOWfreL2OFxDuPn2Gf01/hbfzKAW4gj3A1uTKAUc/ts3EtLIfkQ5dMGRrXrnqTKQ6J2/N6oTxPxv6uuPhQhBxyhS3KrwLPNXIBjN+7Dpm/DQQK8H6wo9Shi7O1u3KlsLveKCoILbdlp3gX6gHzVj4zRXtzGBNsMWv2WqufrD408bDDNQ4P5DdGBjSK9+ap71dbs0QXrm6EVRwlfoZ9TsdrPHux133GrfErHVyTK+mYkaNzXF+SD11jdGhcU5FoTKYAxJ0rgv1dcfGhDfFr7HM9Nn88qBNSil8ownvVa/qQm4N3yKRKLvU1YqPpRbC72mjnmVmYchUDJXyXirEEDbix/Nua/RoaB6LPwdy9AIeKP8T9XOxzwDF+5aB2yhnjKRXtUq7kjmsboylDp5t2Zuy7G3E5dQ3vgLHPNfn3sBq/S/lzefhNLtMHqt/07bHPTdX561gIJMyYxf5M17s6Dw8dy0T8rwOHJp/qeqTwF7pQYJlpvdHFXka+t964+j7CB0We8ijridJZ5m25DlqUZOr7sUVYKKL6AHayVZJCzRxVjProPnfrf952qEy2MkQbdqJl1uS1/F0aa0Ng9d3PxL6vuEvpm+CX8DC5comLG7mXOa43MUbXzoI5sM/BQO2a/R2gOtE9fN3ior4gemidTsNXHL9T2o0u9ACUcmaoKRPd5rX8N/LP9BSlr9rV/ZlFr1Fdh5DppRl+CTfq0v1N6tlXMUCpczyI38vnAeQBwyj4U5f787en3FP8JJ8/mEOJ5QHFkbdxSufhJS9lufLIw97+1pJTD60rL1rhbSofCNT1U/6zAdLN3wb7lE6IvmUoRuTly1/FeRRR7+pJUrHP6dAW+KU2mlzJYe5K86SO6y2M0ZVCfdasGbA/qzMiYdZ3gDDo0i169QXR8+GYl/J/5lIYI6KiV/wPXeg31R/mKszk6Ze6tmhgoFfNotcIn0PIdOFZ1EXgRn2b1LMHjQx1HquUg72f00uc0pidZuD+rjBWvvt60EOFWd1wCiw0jXKqdOI8yBgX/gw34ZaVPUDLVwNulKeYU3lYif/T9ax02cUaWbigKfs5ZdmFYVikuLXhM3d7VJ/JlUVG3rSVTjmOpix7WlTmKX1t+MzZHtV1Sbc40xdEj96CruKfu0RvYQLUd+xL92n8e1VYZbALg/ImcSrb/whLUh3KO5teAw5q3O51xSQGRBAP4UYRomF8bVLPvo7AgG/H82nSRqlXmIGLY0WDh/sVETnS3QoGwDTh6u6tNc15gMbwqNOZgaAs32ERx9L6+ZLDag918XDQR7b4yMtyGE9hP7IKOnom4ci4wujDj3XMNF18IcQWZHSTI2ByJR5ikyvxWPVSmkzphWYPN7L0BY0JVjOY+GTlAoMidF26SUOjPGwJY4K0mTRVmLbg2IbFFxjdpGuV2PVzKY/usdLyQleoT3UV1aTVZc6p1+xapsMDgct4CN1DEnr4g4HrdvyE+Vx8CDfoNqtnX7tedvkCB6OBzr0M7gM0+xb9B8uB7wyOIEsV/Uq/rS00KoPysdJ42H2XQuvnSw6rDbzEsRJ5iD9X+Ca5kIQMKv9DAvkuSYXBvUsdq3nA982LOsM+D84hfqWUqrJMrqQAFklrY7t60ffKFeGDXDeZEjmepiYrKVPU1iR9QXVjGLD1CSW80l2I62oMAoX7dJMKmvp+tY2qStCP0jA63smvtlXJf6yLHR9dBowrJyYPk59sKW/a5+rs86HVNYteo3p2L9PVx04jT+ms4HC+rOJ5Hz+60mNxE101eSp/c3r2dVfHvbRP63A4O8wD0zIWFMdaa215qvNWnsDhgeXqMiZOut+Ul0Lr1zEmrDo5wP5WZTB7XvxF5LdN9fS+CH06C5dHwLAvj2lGiSZXMkAbymJjewihae4b7tPgGluq8I/WLfwylQ+FHUUcxY3wtwr7H0NxxkdLNxHNTV0OKxbVmY06jkeav7WcFZJqW7n8ZlLWo4/Ko7yUU51ble/qD4o5j4p2Lr3mMDL9HOVRKSm4UdHm9OwhI6NCTwO1eTgU5oHm4QutOtJitgo1ZVWFn07NnkeV/URpGDTv63uDtK5t8pmRwPGFCB50yqrCjkbxi050LG2xLJk0Y3CxULtpCOwMAT0fbGOMftH1dV9lNM83ZYpuVXJFbaI9zNB9pnCz5K0ws4pNvK9/Ll20JlccGOYbAj0I6DkZK1caeVJXcaZbqA4mQiv9QD76wUk+RgRKfLjDoMlf06FTUIebdGWVotGDFGZHBnIsbAeyEjkSpp8S89DOWD1LpLdOdcwmf1RX00eFVyfTQUTtGq0r1tAW8yJxo75Y/jd8qBvZjGXVlaVnK1/W+/BqAKVq65MK5+Fxjv1juOoegfo+NOEqBbcrJxoeNB6Spiyl0VkePlYQcCw9utWQKFoyKQ+DhnMjLFc91IWhQPhTXZQf7ZSP8yK0y5whYAgECOj54MXxr/zwgGRAeTG6erlS95PPViJHONhJv0/ykScI2ySnfCZXkhAz4iMhUD9fWXJFeaN1ixpTnt/WCoTK4J3PrD9lOYcuUSlrdfvYElMZGPLRY3xa8lQygkDgmDSlrC6XkueNCkBZTHZq79TyZ/UyHdCEQzFdMZkJ3RmicKvbzphbRM8WbozTrPfhdXe/b1NVMOcuGNTM7DPAP9LFA+MMAQUrR8dJwzC45Jj9+1V0MJqyXupy5ZPmP/gptP75EMBwxg5finKzDkqOdhgsjyP6E12gERoCe0BAz4R71ph15LpJ7Vddhnvu1ypXUApQOlA+fHmHguImRVK7bnIlFTGjPwQCtUxAoc+VKyn6wjPVw/udsxBOyUeuNasSNehsoWLCErqTfCYcnEPnQReKccgSV08MPTRdeZC11JvrJpM/wgb81i7TwY12ull+eDJWV8zlRZUvATfo4f1Senb2+/De8+fPeYk+VWejlv9FxwzmW/nhA6nk5Z3axZLnJ/LdQFq+UdYCQ2BnCOj5Qm6wxYAX32inclYpV9QuvjyDYK8OXNbxTf4h12gmWQGGwMQI6PkqKlemam7dzpbepDSUQA5aP1C4kYsKo5Pw5cOz7eQpeUT7SOUw4Ysyv3qndq5Spjvg1L4iuqLKYcxmHfx2bfH9NeOmtkW/D0X7r/r1n96VDBEwoL/XxWpA9cDUaaQzE7A6V7evsVQVh/FmbIhTYCGvUpTkf6qLWRgOveWs9CirubEIbJwnnFM4e2kOYaI8W5MrtNdfYWUl48ehfh7l/sbH8C7ZtHGeZMmVBRhZrbr49Qp3t+LJuzZ8r4bxKmtinmom269zDWH1YWsy/VS3uZSuiH7cGJWxPNkibupb8vvw6gIgGBi8UCsnQO4rwHmMVSmmaheGBBYT7qku36hwSnV186g/YKS+V9/ZVph/Y+frWQi9PxVueHxUfJbo95Z5orYjC95l4rYJuRL07T3x+lm5L79TYQjy7D4qHEyurIzLW+aJ2j5GrszKiVoGMP5DxwRE804VHWG2KVV6CX3UxcFz313M4xFSHysla3ObkOnCfRJdUeWyoh/+91sMjzaBW0dHkt6HVx0FuKQXCnDG4uv6oeDBwMDw9yU62sV8tYeH95V8znSwvw6G8yk64mZkCAQ5eNfa3iZ8wAbrm+Uvc/MjsGWeIBNy5cAm5Io3HHhOMMzZl03YDIw7cLY8hu96sa/QlnkyRq4swUVWLlqGRi0X+QM+tjWhg7AN3d/5AT39ZItN5SLyOFK2SSWvHrvME/qbkOnCeW264iZwC8ZN8vsw+UxGUKFFN4CAHi5WepglCveKYmAg7OwMi0CY0xlP5kQ7ry7xiGfmYf1yOslnEoPP1+bMWuU1YsW5hIPJlZXxx3gyH0OENasUfBETxSvJKQ9frIs2GESLLEL2bOI8RhIYRrwJBOoxGP0+FH11JuPSSsYmOm6NjEKAFSm3X7QrAwLM3LwIGE/mxTunNraHVjOOEpgoFAhYMzDukLQxfIfFWkLGk5k4IVkA1mx/aq1mDFVfyxLyprizFaqUzEZrCBRAIOt9eF2gYiti5QhIqPV9OYxDPCfdty0gM/PQeDIz4HnVsZzNPl62PbAFgrNM5moEbAyvbygYT+blifBmKyVbo/x/Ch9qBDIl2sgQLVs1WcXwz5sO1WH3DYHSCGS9D83IKM2GjZQngYWBwQxM8lLvRrq4uWYaT9bFMvEjWhFYV8uXa42N4eWw76vZeNKHTJl04ctZ1ejdAKJNXQ3lzOlNmdZaKYZAHgIag1nvQ9sulYf3HnJxHoOvXuQe4N0DBmvrg/FkbRyx9qQiYGM4FbHp6Y0nE2M8pREwZdkTw2LFGwInMzIOOAgktFj24oxG3zaqA6KybJeNJ8vib7WPR8DG8HgMS5dgPCmNqJVnCBgCKQiYkZGC1g5o9dJhfycHWO0rFSvhp/FkJYywZmQjYGM4G7rJMhpPJoPWCjYEDIFIBMzIiARqD2R66bjP1TYrGErjENrHe+jfFvtgPNki16zNPgI2hn001hE2nqyDD9YKQ+DoCLiD33yz+YMHBl9MSD2c5GW34NoQED856P2Z/PCgN4aH8XoBhhlPFgDdqiyKgI3honAWKcx4UgRGK8QQMAQSEJDcYRs+O2Va7t6HD75t0bpnkZ0gIOazUlH9G3pHlx7r/icd6ZY0IQLGkwnBtaJnQcDG8CwwJ1ViPEmCy4gNAUNgYgTcSsbE1VjxCyOAgYGhcWZlKs3+I2MZ5hhPlsHdai2HgI3hcliWKsl4UgpJK8cQMARGI/B/iUsndZCxSrQAAAAASUVORK5CYII=\n", "text/latex": [ - "$$\\left [ grad_{x} \\leftarrow \\frac{{{src}_{E}}}{2} - \\frac{{{src}_{W}}}{2}, \\quad grad_{y} \\leftarrow \\frac{{{src}_{N}}}{2} - \\frac{{{src}_{S}}}{2}, \\quad {{dst}_{C}} \\leftarrow grad_{x} + grad_{y}\\right ]$$" + "$\\displaystyle \\left[ grad_{x} \\leftarrow \\frac{{src}_{(1,0)}}{2} - \\frac{{src}_{(-1,0)}}{2}, \\ grad_{y} \\leftarrow \\frac{{src}_{(0,1)}}{2} - \\frac{{src}_{(0,-1)}}{2}, \\ {dst}_{(0,0)} \\leftarrow grad_{x} + grad_{y}\\right]$" ], "text/plain": [ "⎡ src_E src_W src_N src_S ⎤\n", @@ -87,8 +88,9 @@ "outputs": [ { "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxkAAAAnCAYAAABje4W/AAAACXBIWXMAAA7EAAAOxAGVKw4bAAAULUlEQVR4Ae2d67XdtBLHd846BSShggsd5EIFhA4CqSDQASw+hW8s6CBQQRI6gFtBAh1ABwmng9z/z8fykbXtbUmW36O1vPXw6PUfeTyjh/e958+fPzmdTq91+e6bH3744Rc/wcLTISCsf1Lp7+oaPpH/ndJuiMt/JO+prkcKf6HrY4W/08X9vxU/45PS/PKge6u0v+SbCxAIsDLsA3wsul0ESo7toCyTKReGRYCVyZQLWNktQ8AQ2AcCknsv1JOvg948uKoTfhPBPe86U1yDjBYthIAw/11FvZT/M5fCGHwYCc49VTpGxUk+TMQA/Ka+6dNx/2Ndf+veG/muvP8q3qKr8x7eE0aG/eFHwT4BKDW2VY7JlIQhUgp3qjTsE4A3UkPAEFgUAckrdNPGjlBjqolyZ2Qs2rijVs5LRH1/LN9fZcCg+BNM6vsYDbiHuqCtDA6F7+sKjUGUZgzG3+Q795UCpLecaJ7U5bfSS0VU9relypqinLrvu8Q+B6+18yunT0fNU3hsm0yJHEiFcafW1WAfCUGLzGRKCw6LGAKHROD6kL1eWacljHmZsILxh8JfeM3DkHhVx9k25VYwTqJrwtxXnFUOjJYfiTun9Acu7HylsaT1j/x/vDTqwiDBGmX1Y9CJzq2QsNXrI8WdAXRSmJWUF7pa7RwsdGYCtW9x7HO6rHYfkl85WB01z9ixrfxjZUqvfOjjyaVxrXsmU+LleTL2fTy5lL4Hfl3qn90zBAyBcQhcjctuuccgIAGNks8Wqce6eKFzxsK9HE4K/6XrRhf3cc7guI21fzEQMFJu2sntmO6jnHK24w93R2EMGAwP7nENOuWB/p18zo/Qh9/lo7D77rXSmv74N5YOq12rwD4HB7X9cPzKwemoeQqO7TEyJUY+tFgUOa5NpnioCbMueZ6MvVdkdHDr/IruqBEaAoZANgJmZGRDVyajBDVK+j2VxgoGiv+39YvDr+BLRVh5uPETXbim52UTKvmOxPdR+jFoGqf8GDMYCs3KRnOzP0A5vqFCmO1HrKZUTmGXFmW4uHxz+Wrf4tjn9FXtPiS/crA6ap6xY1v5eWazZYryDsqHkDcx41o0JlPawJ3J8xzs20XGxfbAr7ieGpUhYAjkImBGRi5yI/NJQP+ky523OCkcbpXya2Alwz9n4d8j702d4PzwvlsJIR1DoDEOWoSREeXHkEABCY0S6meW3XeVUuAnLB1W+zeLfQ52W+dXTp+PmqfU2FY5TpY4vwWp7vfKlMTx1io3MmIy5Q6oljyfAfu7muNDq+NXfNON0hAwBMYgYEbGGPTG5eUlHa4o8DnhX7wX/Kl+aaDUD61ScAicFY+WU35mut6TWJfVqTS0Mg1H7veQUM/D4N4bxf1zJsHtRaJbxj4HsK3zK6fPR81TcmznypSU8ZbDJ5MpQq1Hnk+N/V74ldMPy2MIGAKJCFwn0ht5OQSeqShmodxZCErm/EV4UBoDg61SF1cfyKeLGXqMCrdCwgvnR6XdyMdRVmVwVLHyPxgY4UuOuql3TW6P2OfguxV+5fTtqHmKje0JZErXeMvhk8mUW9RS5Hkp7PfCr5x+WB5DwBBIRMCMjETASpHrBc5na7kuOtFhXPCHToNOtM3XnXqIMQB4QY91fWVQfriFakqjJqsfa8JebXmkTmAYDjmMyBDboTzu/qb55Tph/jACpce2ysuRKSnjbbhT5xQmU24xQd6GWIdxhx60vfJjYjm0On45UMw3BAyBaRHYnJFRC8NfBQuzOK8UD2f+p0Vs26XzkuFlM8oJc3cIHR6EhlIYh6b35TaqIdvK3Im9sASvSbeTGb+GB4owwtgzuTIMVUhxNq4Tx1tYXkzcZMotSsWwn1gOLcYve65jHqd90hjv8/laErur/GYskxNhqIv/cUBZHjqnsEwjV1or2KlpCPwkp3z3dYWz7fwfB/u/K6f7hPkjwNCgoD63feuW+IC/wiUL+xyoVJfxKxE4+KPL5EoGbsrSJVMuyoeeMRpbu8kUIcWYXQD7WB75dIvxC4x0jXqulR95yuflu8a5308LrwgB8Ws071fUnVmbUhK7q1lbXqgyAeCU24vnFApVt7diWIVoCUviujAivtdVhRX3/7Eb+q+VxsH0yinMJ2/5Az7OgUDLoXP2g4eOWfreL2OFxDuPn2Gf01/hbfzKAW4gj3A1uTKAUc/ts3EtLIfkQ5dMGRrXrnqTKQ6J2/N6oTxPxv6uuPhQhBxyhS3KrwLPNXIBjN+7Dpm/DQQK8H6wo9Shi7O1u3KlsLveKCoILbdlp3gX6gHzVj4zRXtzGBNsMWv2WqufrD408bDDNQ4P5DdGBjSK9+ap71dbs0QXrm6EVRwlfoZ9TsdrPHux133GrfErHVyTK+mYkaNzXF+SD11jdGhcU5FoTKYAxJ0rgv1dcfGhDfFr7HM9Nn88qBNSil8ownvVa/qQm4N3yKRKLvU1YqPpRbC72mjnmVmYchUDJXyXirEEDbix/Nua/RoaB6LPwdy9AIeKP8T9XOxzwDF+5aB2yhnjKRXtUq7kjmsboylDp5t2Zuy7G3E5dQ3vgLHPNfn3sBq/S/lzefhNLtMHqt/07bHPTdX561gIJMyYxf5M17s6Dw8dy0T8rwOHJp/qeqTwF7pQYJlpvdHFXka+t964+j7CB0We8ijridJZ5m25DlqUZOr7sUVYKKL6AHayVZJCzRxVjProPnfrf952qEy2MkQbdqJl1uS1/F0aa0Ng9d3PxL6vuEvpm+CX8DC5comLG7mXOa43MUbXzoI5sM/BQO2a/R2gOtE9fN3ior4gemidTsNXHL9T2o0u9ACUcmaoKRPd5rX8N/LP9BSlr9rV/ZlFr1Fdh5DppRl+CTfq0v1N6tlXMUCpczyI38vnAeQBwyj4U5f787en3FP8JJ8/mEOJ5QHFkbdxSufhJS9lufLIw97+1pJTD60rL1rhbSofCNT1U/6zAdLN3wb7lE6IvmUoRuTly1/FeRRR7+pJUrHP6dAW+KU2mlzJYe5K86SO6y2M0ZVCfdasGbA/qzMiYdZ3gDDo0i169QXR8+GYl/J/5lIYI6KiV/wPXeg31R/mKszk6Ze6tmhgoFfNotcIn0PIdOFZ1EXgRn2b1LMHjQx1HquUg72f00uc0pidZuD+rjBWvvt60EOFWd1wCiw0jXKqdOI8yBgX/gw34ZaVPUDLVwNulKeYU3lYif/T9ax02cUaWbigKfs5ZdmFYVikuLXhM3d7VJ/JlUVG3rSVTjmOpix7WlTmKX1t+MzZHtV1Sbc40xdEj96CruKfu0RvYQLUd+xL92n8e1VYZbALg/ImcSrb/whLUh3KO5teAw5q3O51xSQGRBAP4UYRomF8bVLPvo7AgG/H82nSRqlXmIGLY0WDh/sVETnS3QoGwDTh6u6tNc15gMbwqNOZgaAs32ERx9L6+ZLDag918XDQR7b4yMtyGE9hP7IKOnom4ci4wujDj3XMNF18IcQWZHSTI2ByJR5ikyvxWPVSmkzphWYPN7L0BY0JVjOY+GTlAoMidF26SUOjPGwJY4K0mTRVmLbg2IbFFxjdpGuV2PVzKY/usdLyQleoT3UV1aTVZc6p1+xapsMDgct4CN1DEnr4g4HrdvyE+Vx8CDfoNqtnX7tedvkCB6OBzr0M7gM0+xb9B8uB7wyOIEsV/Uq/rS00KoPysdJ42H2XQuvnSw6rDbzEsRJ5iD9X+Ca5kIQMKv9DAvkuSYXBvUsdq3nA982LOsM+D84hfqWUqrJMrqQAFklrY7t60ffKFeGDXDeZEjmepiYrKVPU1iR9QXVjGLD1CSW80l2I62oMAoX7dJMKmvp+tY2qStCP0jA63smvtlXJf6yLHR9dBowrJyYPk59sKW/a5+rs86HVNYteo3p2L9PVx04jT+ms4HC+rOJ5Hz+60mNxE101eSp/c3r2dVfHvbRP63A4O8wD0zIWFMdaa215qvNWnsDhgeXqMiZOut+Ul0Lr1zEmrDo5wP5WZTB7XvxF5LdN9fS+CH06C5dHwLAvj2lGiSZXMkAbymJjewihae4b7tPgGluq8I/WLfwylQ+FHUUcxY3wtwr7H0NxxkdLNxHNTV0OKxbVmY06jkeav7WcFZJqW7n8ZlLWo4/Ko7yUU51ble/qD4o5j4p2Lr3mMDL9HOVRKSm4UdHm9OwhI6NCTwO1eTgU5oHm4QutOtJitgo1ZVWFn07NnkeV/URpGDTv63uDtK5t8pmRwPGFCB50yqrCjkbxi050LG2xLJk0Y3CxULtpCOwMAT0fbGOMftH1dV9lNM83ZYpuVXJFbaI9zNB9pnCz5K0ws4pNvK9/Ll20JlccGOYbAj0I6DkZK1caeVJXcaZbqA4mQiv9QD76wUk+RgRKfLjDoMlf06FTUIebdGWVotGDFGZHBnIsbAeyEjkSpp8S89DOWD1LpLdOdcwmf1RX00eFVyfTQUTtGq0r1tAW8yJxo75Y/jd8qBvZjGXVlaVnK1/W+/BqAKVq65MK5+Fxjv1juOoegfo+NOEqBbcrJxoeNB6Spiyl0VkePlYQcCw9utWQKFoyKQ+DhnMjLFc91IWhQPhTXZQf7ZSP8yK0y5whYAgECOj54MXxr/zwgGRAeTG6erlS95PPViJHONhJv0/ykScI2ySnfCZXkhAz4iMhUD9fWXJFeaN1ixpTnt/WCoTK4J3PrD9lOYcuUSlrdfvYElMZGPLRY3xa8lQygkDgmDSlrC6XkueNCkBZTHZq79TyZ/UyHdCEQzFdMZkJ3RmicKvbzphbRM8WbozTrPfhdXe/b1NVMOcuGNTM7DPAP9LFA+MMAQUrR8dJwzC45Jj9+1V0MJqyXupy5ZPmP/gptP75EMBwxg5finKzDkqOdhgsjyP6E12gERoCe0BAz4R71ph15LpJ7Vddhnvu1ypXUApQOlA+fHmHguImRVK7bnIlFTGjPwQCtUxAoc+VKyn6wjPVw/udsxBOyUeuNasSNehsoWLCErqTfCYcnEPnQReKccgSV08MPTRdeZC11JvrJpM/wgb81i7TwY12ull+eDJWV8zlRZUvATfo4f1Senb2+/De8+fPeYk+VWejlv9FxwzmW/nhA6nk5Z3axZLnJ/LdQFq+UdYCQ2BnCOj5Qm6wxYAX32inclYpV9QuvjyDYK8OXNbxTf4h12gmWQGGwMQI6PkqKlemam7dzpbepDSUQA5aP1C4kYsKo5Pw5cOz7eQpeUT7SOUw4Ysyv3qndq5Spjvg1L4iuqLKYcxmHfx2bfH9NeOmtkW/D0X7r/r1n96VDBEwoL/XxWpA9cDUaaQzE7A6V7evsVQVh/FmbIhTYCGvUpTkf6qLWRgOveWs9CirubEIbJwnnFM4e2kOYaI8W5MrtNdfYWUl48ehfh7l/sbH8C7ZtHGeZMmVBRhZrbr49Qp3t+LJuzZ8r4bxKmtinmom269zDWH1YWsy/VS3uZSuiH7cGJWxPNkibupb8vvw6gIgGBi8UCsnQO4rwHmMVSmmaheGBBYT7qku36hwSnV186g/YKS+V9/ZVph/Y+frWQi9PxVueHxUfJbo95Z5orYjC95l4rYJuRL07T3x+lm5L79TYQjy7D4qHEyurIzLW+aJ2j5GrszKiVoGMP5DxwRE804VHWG2KVV6CX3UxcFz313M4xFSHysla3ObkOnCfRJdUeWyoh/+91sMjzaBW0dHkt6HVx0FuKQXCnDG4uv6oeDBwMDw9yU62sV8tYeH95V8znSwvw6G8yk64mZkCAQ5eNfa3iZ8wAbrm+Uvc/MjsGWeIBNy5cAm5Io3HHhOMMzZl03YDIw7cLY8hu96sa/QlnkyRq4swUVWLlqGRi0X+QM+tjWhg7AN3d/5AT39ZItN5SLyOFK2SSWvHrvME/qbkOnCeW264iZwC8ZN8vsw+UxGUKFFN4CAHi5WepglCveKYmAg7OwMi0CY0xlP5kQ7ry7xiGfmYf1yOslnEoPP1+bMWuU1YsW5hIPJlZXxx3gyH0OENasUfBETxSvJKQ9frIs2GESLLEL2bOI8RhIYRrwJBOoxGP0+FH11JuPSSsYmOm6NjEKAFSm3X7QrAwLM3LwIGE/mxTunNraHVjOOEpgoFAhYMzDukLQxfIfFWkLGk5k4IVkA1mx/aq1mDFVfyxLyprizFaqUzEZrCBRAIOt9eF2gYiti5QhIqPV9OYxDPCfdty0gM/PQeDIz4HnVsZzNPl62PbAFgrNM5moEbAyvbygYT+blifBmKyVbo/x/Ch9qBDIl2sgQLVs1WcXwz5sO1WH3DYHSCGS9D83IKM2GjZQngYWBwQxM8lLvRrq4uWYaT9bFMvEjWhFYV8uXa42N4eWw76vZeNKHTJl04ctZ1ejdAKJNXQ3lzOlNmdZaKYZAHgIag1nvQ9sulYf3HnJxHoOvXuQe4N0DBmvrg/FkbRyx9qQiYGM4FbHp6Y0nE2M8pREwZdkTw2LFGwInMzIOOAgktFj24oxG3zaqA6KybJeNJ8vib7WPR8DG8HgMS5dgPCmNqJVnCBgCKQiYkZGC1g5o9dJhfycHWO0rFSvhp/FkJYywZmQjYGM4G7rJMhpPJoPWCjYEDIFIBMzIiARqD2R66bjP1TYrGErjENrHe+jfFvtgPNki16zNPgI2hn001hE2nqyDD9YKQ+DoCLiD33yz+YMHBl9MSD2c5GW34NoQED856P2Z/PCgN4aH8XoBhhlPFgDdqiyKgI3honAWKcx4UgRGK8QQMAQSEJDcYRs+O2Va7t6HD75t0bpnkZ0gIOazUlH9G3pHlx7r/icd6ZY0IQLGkwnBtaJnQcDG8CwwJ1ViPEmCy4gNAUNgYgTcSsbE1VjxCyOAgYGhcWZlKs3+I2MZ5hhPlsHdai2HgI3hcliWKsl4UgpJK8cQMARGI/B/iUsndZCxSrQAAAAASUVORK5CYII=\n", "text/latex": [ - "$$\\left [ grad_{x} \\leftarrow \\frac{{{src}_{E}}}{2} - \\frac{{{src}_{W}}}{2}, \\quad grad_{y} \\leftarrow \\frac{{{src}_{N}}}{2} - \\frac{{{src}_{S}}}{2}, \\quad {{dst}_{C}} \\leftarrow grad_{x} + grad_{y}\\right ]$$" + "$\\displaystyle \\left[ grad_{x} \\leftarrow \\frac{{src}_{(1,0)}}{2} - \\frac{{src}_{(-1,0)}}{2}, \\ grad_{y} \\leftarrow \\frac{{src}_{(0,1)}}{2} - \\frac{{src}_{(0,-1)}}{2}, \\ {dst}_{(0,0)} \\leftarrow grad_{x} + grad_{y}\\right]$" ], "text/plain": [ "⎡ src_E src_W src_N src_S ⎤\n", @@ -128,8 +130,9 @@ "outputs": [ { "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAARwAAAA/CAYAAAAyu3fMAAAACXBIWXMAAA7EAAAOxAGVKw4bAAARp0lEQVR4Ae2d7ZXdNBPHL/dsASGp4IEOIFQQ6ACyFRA6gJNv+bYHOoBUkEAHQAUJdEA6IGwHef4/RaMja/0iX9vX9s3MOVq9j8Zj6e/RWL770bt37w5O29HAs2fPPpE0vyq8Ufqb7UjmkrgGpmvgOJ2Fc5hLAwKYJ+L1j8JbhW/n4ut8XANb0cDVVgT50OUQ2HwvHfyo8IvS333o+vDrv0wNfORbqvVvrADmS0nxu8LfSn++vkQugWtgGQ34lmoZvY7l+nPs4NuosZrz9rvSgAPOyrdLFs1nEgFHMdbN3yuL48O7BhbVgAPOouqtYn4dW72oau2NXAM71oA7jde/eVg30Jv30eX/lSXH27h7Clh1f1z+FfsVmgYccEwTO4rjNoyzOp8rfTtGdLXnTRj0r8KnCj+q7Gxgp7H+0pg3Cq8V/lL+KwUHHSnjUkj3s3OO+ZZqJ3dZN/Gewq8KOJifK5hlVH0F6stif6X4B4WflP5B4XelR/OqHjRrqHGCZaP4NxUDlMQAj9OFaED3tneOuYWzkxutG8kCDSePlebMDs7malKffLGHfvBUYNEDYl9VMzu9IWMEx7jGxaryk9Sn63JzPWvmmFs4m7ttiwnE4m57C/ZK5V9qsuBTOQdxitppRQ1wrxXYzvIQmpMG55gDzpzq3jYvDhe2LXbz31Dv9AFoQECDtfxI4VOl/1HAYp6DBueYb6nmUPPGeWhC1Vgv95e6DI3P9u+pQgA15cNYim2LiHw4GvmODMKZjb8qOJMV0496fE04nAHJLxQo/0b1BprKtpPa0B9HOQR//FgsvINi5ON4wmdK48RmHPxb1LMgf1HcIJXl/Gj3WmVtFmSj31YykhWZuUb08L0Cumd7fRPrlKwn9blX0fq+WzgVWrqAJgYmTLIuqpkwXX17yzUZef0NuAAgf5CO+YNiFjuORt6W/RQD35KFhaCYNvSxTz4ACwi/E30DiFHQRerLZyMvFAf+SvOGD8AwulYdi++gGL7fKbbv2fJ21H+iwOLE+W78kK3RTvndULwO9Mp1sdX6WWHsfKiaYw44u5kWiwv6YPER2gdg8f+mCV5aKQAAIASoGLElxN9k7T9W+o71YY2JVY+1Qp/c+oA3IGf1ZlmxaGgbwEdpFl3JH/BifKwBo8dKUN4gtflagfEXIfGeaysU5BM/PhwGeLiWP5UGeOaU/8HVIppwplvTQJvvxmS0J5NtN6x88ViTGTBhQuO4bpDqsIoou1YwsMBCS8CkevJVpLYsIsANayl/IweovIxMkMcsm4PapTT1ymP9IO8NeSOVf2xpi1WGQ5bfNMrlNSsIXT9QnQGbdbsTqw3yAWhYXWblhXbKY2EBCg057zAZWSB+gCmgivXI1pZr+FZxn76r5thxpCzefIcayCYKk7ckK0sLo2ywYN6enn0TGRDIqW9i5+1COi4WzhyxeAAMfDK28A9KA2wcD6AeMvB5n2v+ZeEDWH3yHlSPThsHGlUGAP2rmK0i8nD+6Y5VpPJEqufaw3EGxXafUn1MAAjpesrKmfJdYyf2ksF00tbWyt444CSVXXwC/4kt8Pxi78fMGqd9DeRsQuZyWdraWN4mtuUHYy0GFvlHaohlw3XiJC3HxMeERdLKP7anTy9IqB4CAAC3nChLOhY/0mzf2u5J6Kc6wBBwKnUQ6vmjeuNTXk9qMzYhnmwF2XJiOeFvAzxb9VLwRpa260lz7Fh08OzlaoDtxMOWy8NMD0/5lrpFizSJ2SoxkfMtThhTdWZxIPdJJB74gMw/c1C63E7lfBkv98vkdfS1BWdxWW/yUg6Q5ODCIrynUAIHvEoLTkWjKYDO6F5FB8n8RAF9cT+CY1/5UuaiVyM7OMcccBr62k3GHLz25EiCa4JwqOudQnCKWoXyOD/fKv46K2MRsE341soWjhmPkNMjZR5LrnLhYRHgo0gLV/m2/jmvMg0INCyNeP04R2+tsdIAAmHIekGH4VW+9SVWf2QNW73IK/GO7cprjsWhz517aJUj4ldqewe0a/tLZnstjsOY7/PwF40BmjCU+gzOsataobzd+hrQDbWnvT1NzaGHPyC8TVGMP4LJ8rpFYqwZnvqcYcFxSfxIeSyNxUj8AZOnClhYACLXYa+Vsa6Qi22PLVQWP/kANoq5Xsx7ygEnFimvuTstEtVDACnWhvlBKEM/pZMVvmyncnCjbYPop4D+ABiznACTG5Xlstf6mbiOLjBqjD2QYWyuoZokL+OiF3SB4xmwmYN655gDzhwqPhMPTYo7T9e2obsmj8qZmOVia2Mxa5nGBdA6ZVc9ANkpl+oBgl4waBM4jjsIppF/1YJT26E3Syxk9JxTmbc62o62JKxzFtcCXOiia2BcLGDAs+q6s7F6k+LHtXbeSwecXvWdpZKb73Q5GgBAGvdUi9Cc0VghJQCW+VM0ESy02o4RFGYFmtqxj7UNvV3Yq5t5Pqc6bA8/x8SbUy7ndYIGtJi5j23bmxuV21b4oHak7QAjebaabNVOIcazLd4p/c/WxwFnnKp5cjWeXuO6t7bGv2F+l9YGXrg7DWDRNEBHeV5vc9iPbQwnhNli5s562vOWKHfq8xkFIIT/K6RjX2UbhMN4yJ/V6LBWxrdUa2le42ry2BNvtH9iRbF96GENABL4MRr+Ht3vRj5nozosIz7VyAGH7VlnH/qrfXgAKp7DFwTLRakBOBIaRfH2AmKPl94cqI4n8bXCB/NFra51aTITOn/SLT2m819YA1ornPfhHAtWSTUQqC0PoLEPHwO3ha9qHvZpS6WL5QwCrxrtC1heXdqCYLRr1QW0VczZBl4Rmjc6b3dQ+cV9UYsC5iTpiFeSgDh6vJ2Tt/NaXwO6p6wN7u2YLTjrpnouqC1zyI5GrH/RFRIEwJHg7B9xiOaOS8AlHB6L9eaUwslJWzP1UGg4A6LYCPC6yC9q7QKnxNIdZjOgjQVZ6m4Ka++7IQ1ka6RKqhPmwkv1GWsRVcmyVKPwr34lNIADoCA8lg0mYTIFleZJHF7tKf1OaZC7daGonIUE8rIf7UVr1dOuceBKZQBY69exKm8l9TELi+3gna9wVT/qi1rai4/5V/IxAVvo7fuo8ZcDbJ1nTaxllJXr5uvbXTj6THaPXQNTNZD+t3hcCHjPjdhamRUTypRnEWK9dIKJ2vyn+teK8Zx3kuoBlueK0yJVGmCzhQ6oDZ4VUBsWL68UeQtwUEx/LIc0fizjA7TG9dB+DKk/lglmbxhrTF9rq77ohyeTbUetymPXwMVr4GhXqAXAIl31i1qNj5XQ+3WsyZvFWDfJrFR/0mz5sNoCZWWA3Nr0PwnwUDLxy2pJxrWF8vFdA+fQwFGTnnMB5p85KL3KF7WnXKxkZcECImn7F/ncKsZayikAUV6wRloyc+aG700gQGcLIPheGv/rGlhYA0fxZwuCzyKRFgFbh3N/UZvGH5HoWqz4WMzfYuxeKZG2WVa4YvxIYyP/nyvK4EO7Bs6qgSuNxhkQtiDBFxJH5ylc+hiwJhoO3ti2EdFPAauJrY5ZTiysG5XdxsbwanO8xurJEWBTghFjM+4mCF0osH3kpwH4wSN3IG/izrgQS2rgShOdV+GEXlI7tiSDTlyYqO2QcxYwMPChy6nUxQP+5TZrSYA7VX4sSxz1gLMDzqla9H670cBxJUkBA0BhEgnY4APotFkuJYjSpgShseMzFmEWyuVXerI+ZhHKmbgGFtQAW6qzkxYXb6PaQKJXlrgonyrOLagbdcIPFQBGdaTTV7gZQ8azLV5WXJ8U7/Q2rL7XYMvXaoHMDxUG+UuG/MwRFifb12ogndpf461Gkp0XAc8VuJe7OFogmZGVA7R+yFNKOCqsRW1f1HLGhQX1VCGklc/PBnHznqgMp3YgpfGD9H2FG1sGh/EWty1mMQ1aOLpWJi6/lMfk5boBXn7tD70M0tT+gwPM2ECyGrAmriqzXwdke1y+FEjtNpbg3nBv7c3kxsQ7rzirWDjxEplQOKaTtaIJxZM65WO7FDHhlGl8UUulyjv7xPqwmCN/inZHkt0OOCbQVBmOZ/L4gnrfwE3tf06FSVZbpF3DVlt0XQzOVa5r4ZhJ50HZc8mxlXGOawnCjdDY9+LkqhZD7dl+DG49CoYGbkXxrrKcyC79UlwAr/t5yzhkIU3tz1jnomTBnmvAJcfRvbldkv+eeK8GOChJNwILh9foQ4sl1ylbreobqLZYBrv6oja/2CwN0LKVKMme9tT30dT+fbxnq9P9wk9zZzs12wDOaFUNXK06ugbXBMMfUQ04atv60WjPdeBcrAaoHj6rVVXqp9OnMbX/XBce5QBMzHmP05uHQbBYFWPZXMfxsNr4kBjiZ1PSVvJ9UZg7bL3MGvpCafxb+LYapDLase1m3AcKWNbhnJligBiZaMMLCAAcXpRjFRLTl3rb8qNr+gOOlPHQZOuEvMjDfKMP10UZfamHXyCl6Qtv2tr8v1Z58vUo3Sl3YLLDP1dbkFmKXQwQluR9Rt0ZmPTpySZtm1hT+7fxHFWm+8ACY/HxEa1ZZQelcXrzo258LAyo8IYRYGCBBlBQuo24XkApAIxi+POpSOMNZSz/U3X8v6UwrmIOW9KWMkCBmI9qAUDa2Pko+HPi/qXKqOdXBwLwZWUBbFR3UBk/usXHzZTZNcI7/MwLbSDlkZ23iw2/m/LpepXmejrlhs8e6bhHoV3mVg3w5J5CU/sPjQ3YNMAgdsASYPGxwMYQH8Ama1dp829hNeTEuFi5BgAHpQEpQM6sI9qzXQVgTEYcvYG/YoAeoLlWCBTLKE9WS6ziTVoaK5aVW+GHKkd+gCcngM6oVm5rv4vYAWcXt6nVd2OS348Jfguoi8oJn7er6Z+3H53WwgJMPlF4VXZWnQFFWsxlm478647ytIizcRsWRuzHuGydjAAPk+WgvuRzeqEMIMV1UA9YAUj4CAOpDLDDwukltcOq4p78pzQWHhYXvsncWmOcGrl7x9pa5Sa2VFtTytbk0UTk9TdipcWUyWhl5VM1NZnaPzE6PREWqbqXizjnONbCyft2pW1ctjUJGGJjrIkStACBVlJ/LB/kZ9uDVfaF8vgfnyjYt3BjfnMJX81TBYALHxJWHts3+I+VW132QQ44+7hPSMlT0SZiLrVZKNT30dT+fbyH6gwMDRzb2lubO3VahCxGFvlYMp5YEcH3MsCgDxDpii8HcLlRbBYlZYBEDX81C9YR9/FtvKZwXUoDiPiIAMKxcqvLPui4DzFdSmmAPT17/5J4UuI3GFosU/uX41bnJRtbFeRrOElhoDq2IRDyGQ1di7XrjbNxW7dr2di9fLJKwACw4PMKAxjK8P1wIp50DWHNNSwu9Wd7Bk98O6avueSukeksbRxwzqLm6YPECclTERM8kNL3lHiswE+MBKJM4Z1CY/+vPBN6sH9ks0T0SEwfS45y68R2gjdUuYVGug1cTS6z6izfFzMugGDAFtoqz7hmSVCGLgmdpD4AAX3wt4S+WRmvtHN+JZ+SN98ElmXkTQ+1cpfjbDqfftN401JesHCadDzZARFeqdpTs/WK4wRlodwqYNLj9LxROQshkfKtr5Vr+ydGMyc0PtYBWwjkh8izjbBFFgr5ozKzeFjEXCN+LMDqqQL6ggf92N7Ah20NoBLK1Ta9PVLaxlV1OovDuHzPRx/65jxfqLz1XqgcS4Z+qV7pYK0oDm+1VB9IeZPXwA55eTgAplh7ti1TMpwPYuuXdKF0p9x02CM54Kx81zSpqgFnZVF9eNfAZA0cJ3NwBq4B14BroFIDDjiVivJmrgHXwHQNOOBM1+FUDm8jgzGO0Kljen/XwCoacMBZRe2NQe1t0p1Xxo1WnnENXIAGHHBWvolyGtubDd6SOLkGLloDDjjbuL32DU3+c6rbkMylcA3MqAF/LT6jMqewkqXD1opzG3z30zhXM4Wv93UNbEkDbuFs525wspRDbvxOix0U2450LolrYAYNuIUzgxLnZCGwsYOA/IiT+XfmHMJ5uQZW04BbOKupvn1ggQxH8vkgs++7nPbOXuoa2LgG/g+UwvemfwkDYgAAAABJRU5ErkJggg==\n", "text/latex": [ - "$${{src}_{E}} + \\begin{cases} 1.0 & \\text{for}\\: {{src}_{N}} > 0 \\\\0.0 & \\text{otherwise} \\end{cases}$$" + "$\\displaystyle {src}_{(1,0)} + \\begin{cases} 1.0 & \\text{for}\\: {src}_{(0,1)} > 0 \\\\0.0 & \\text{otherwise} \\end{cases}$" ], "text/plain": [ " ⎛⎧1.0 for src_N > 0⎞\n", @@ -160,8 +163,9 @@ "outputs": [ { "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAbcAAAA/CAYAAABjA4bqAAAACXBIWXMAAA7EAAAOxAGVKw4bAAAWrUlEQVR4Ae2d7ZUctRKGhz0bgDERXJMB2BHYZIC9EdhkAGd/mX8+kAEQwQIZgCPwQgaQAYsz4L6PViWrNf2hnu6Z6Z6pOqdH31LpbbVKVVL3fPT69etnm83mN10l/f3tt99+WkZ6+HAICP9Hau0XXdyL54dr2VtyBBwBR2D5CGhe/ENcftbC6fPLLPKLzI/3rgh78IAI6Ka9UnM/6Ppd18sDNu1NOQKOgCOwFgSYGx8WzAZlLQk3TaZMok4LQED34mux8Z2uH+X/agEsOQuOgCPgCCwOAc2Pf5ZMKe49cUm4lRk8fBwEdGMwEyPY/hwj2JSXMn9Fru8U/hW/3C/loJWzmmGF80AX5ubvlPa33EDyU/4fXQyMBwp/HxLO+Cdi4pie8Rjwrq8XgYv1sn6ynGOKhKpNkZqEKXMj90e5t7qudBkh0Ni3+4kI5UFoPdaV7NSKYwL/LaZR/lrXWZOwcEzPegR459eOgAu3Bd1BTagIHA6RoLVtqds9rFLmJ5Vhny4dPon1YW4mnXiEH/RSftPsmMRJM7M02txTMuWk9Ge6kkDM06b6Ve/XutAol0SrxnQMkAvFf0wXPK8jsIWAmyW3IDlqhGlcNyO5+Eb53+pCUGFu/FwXAisISLmYJVOdFq846IWudBJTaZglG4JVcUHoyjUBSLmNwggA2nwuP+U6KebFRIrwpNyvigtmUbnf66KeJe0vHgVTYVBNEdNTxb8aB8/oCLQhcNEW6XFHQ4BJH0p7YffB7t84wSHIPlYu9tLudCFEcmIfryGwskQ0pqH2EJim9YWiCiPwTNPMquv0YhrlgAxCDdMo4Zx+UTya59FJfHAfDo7pmI6Lx5PBn/uuC+2dcerkCMyCgAu3WWA8aiUcFGEPbaPJASGFuTFpWIpjouaASIpTOCeEngnVEK+8HC4xP2lbglF5MJ0G02bM2ukoHwL0kdxcuyNMfCD54Q8Ncwl0FEzHdFx4nQT+6gfvKbEg4/6z7+sCbsxAOOG8GgssqrlY+PygqzFPDXXdzZJDCC0/HUGE8DKt5538uTBCgPQJIUyS32QDh/xvdBmhIdiJQYsb6z5UASawnAiXgjMJuzzjEfxrwHQMLIvEP45Zxi7aPGOBcXo7pmOe9zQR0Hhg0fOGsUEP5TI3/CH3C11DliaKbFy4BRjW+6Mb3TAXlj1ROoIu7am1pDNQ+va62ibGspqhMIKMenIi/D6PkL/MUyQfJngsTNUuEzx7fV2E6bZLA+8qQ/xS8UdTZ3xu1C/GYec4JY/TeSCgscBCPSx6rMeKe68LQccec5WFx4WboeduFwKspEebijQQGZxBeMWBWa622NMq45iEz4FaMY149C00qrFRXWvB/1zuefW9W0tGxph4DQfZ5O9dZI/sE4ucsOgpyr1TOJysVnvlwrjI6prbFiAe0URAg4i9netmbFhpf6Y4hB7a1rXyYA41EwIaCGYFDrkYcaKSL68g0CjTWKUrjfraBrSiT4vU11ZMx/Qy4uX4jwHN886KgMYg2hSvDfH8s3XBvtgcH39gXLcJS1sMkx7mGrmddNmZ4gmOwAcEwka/Bm4yicmPIOLaGsxKQyt7+aF4Mjtt5c3yfKUys2gtWZ1L9m5hOobZNeMv3lnIsGBiktoozGIHNyx45KIRsO9pe72cAk4mWaVTjnQWUewPM+k90UU8iyibBBXsJuWjDr7KA9EGe89mbYBHXs35THHs85jJmPS/FN6afBWX10e+W8Wd9IJN/aOfmNLBDq2Ke4bgYb8sYCl/NanMg4rMYbwM5bsYyuDpjoAGHA8yDzkPeC1VDUAqU728uoAt/WxoR0zH4LNY/NV3NFcEGYul3/HH8EYuQgWtn1NyvP/IxaInTJ5yNwpT5nP8IoQSxPihbBCYRPSRynMi9kZuaEN+Xk1BOBldKS3sf8ql7nzxlefbKJ2Tv0zqWC+sPvhr5LOKT9WNfed+gAWHP9DkaoRVDomN2z7BWFWnC7ccVvd3IsDAVeJdZ4YiQfm3VrZFljzIZHXSK9y8s+Yfi6mVq3FXjD9CJr3gn/UVQYPAQ4AZMR6fKc7yf1zTb+VhkUa5fMxRP0J1E9NNa2SyJW8QdPIzsZZjG0EJD7mpjI8jEJ9I6V/GulPcsT3ih62CWUl18j4rQo7+v5UfIQfmc9EnNRVd1mTyPMtEQAPmv0Nzpjb30mRZr8If9TWkdCaZt7qqVnGxLjSEfELbakLpJ4PpVud6ItTvRqrCvfg3Ms8UUJsILiZBDg40SGloe8Rd6bJ7+F7+ZIJUOuFqUn4mX4Qpi6v8BB5j6udYETwlc7nyJT/pCqPVwTPm0USKz/ebyWefxkv8pswjPKoH3hCcaJKmvfbWoHymQWKC/URhE9Qb+dE0ET6NfvVWWJmoOhH2CP1ncjEr03c+/dd3n1iwdBELDchMyfehjt/LjniPXgECGiQHn4CWAkt8QKoe7jE8nzOmY3DaU15b3fdNfgibnPomwzxf8usesyeMJQKthYl3Q1hXmPTlBuEpN6Qp2QQdWUtC0CAcO3lWGgKJfbvGIaqyoqGwytN344k6B0llEKr/yKW/G7looez35sIcwYNWnITeYMW7ZRjkWTxwSIXa2/JaXNUC4WI3Hr2UI+AIOAKzI2CTlk1ibQ1YHkt7b54xLhO5LhaHTPLs/YUj5kUdCCMEYWsbiodProb5saiDIJrT5D1ltYf2ipAqMaCNLqLt/CAYfgScLSQ28lscfZmNVC9mWEy9aIVYTRDwrVgWjcJP4i9LM80t9SdL2/JebsV4hCOQIaDByIC/jlE24IZMC1kN7q1BwHEOWgWTN5MfAiffv9oo3jQWzIg7k+phsmfSDQdR5DJRonm1maNps8FH3rDKmJbROmErHSFigmN2s1/OS5tfbfO88vyWwhB+0QLz+MCn4jr7q7QqUrtoi2iB1Fl9ejWrnHtsptQseoOlxsZIHt/qd+HWCotHZghgrkgPpvysQFmN2Sm1LKt7JyBwjjgz8Zb0VBF2CMH21sjDZIfpkAnTiPJtdVh6m4vAamhRqpPTuhyCYNIPJD+CgWtIK+NwCRpe45CJysPvTawn1au4Q1IXNphyTQsyftjn3FpUWGKNq75i5mWuQEDyzyQ79VvluBdo1ixCgrCVS18wATM+qsiFWxVMZ53plQZWesdISPDQEserAfnkc9YgzdD5s8GZsSO8sAbwwW++pMJK3Y7RszJnhc7kZpMjQoZwEGxyEVBMosS/UJiJGkFSo3W8VF40qvCJJ/khNLC0gLuPCnVjksyFaUz64FBOFwsTngs7YclEHN7zUjy8IkyWROAFjzmBNXiOIvWPesAS/DiYMteilzEArk/kcoAE96nC1XPOaoWbOskD8pMubsjPCpeDU9FOMyAArrcz1ONV9CNwNjjHCQptp5WUjrms83lWOgKnV+i0VqzI2PbgBBnbqJqolbfvIAaTP4KjQSrD/IVAHCKEZ24+HMqfp2+1GxPhqaxztAAWX9SDFQchVIVVbH/QUX3w3jkGBitQhjULNwYoqi+28iHTQQ0WS8jDYFkUCd+GuUXMMeBY0Q5OEIvqyMKZcZwXfoN2Zw8hsvVcx+cHM+DeSG3YYRgUgPJ5LcPkKQVeL2+qHwE0q1DrbXBk4sXI/IvKLnBR+aGdVnH3RRf1i7kAKgfefeyRf4U3q032J2Y/gn/kri2qecd5UbdjEjO6lzzLCI69k9rCxFtqg2/UsM2TG6Xjt5fec57g0cyqefxq/asWbkKdlY+tTma/CRoIr3QxoR+KaAv7/6gV1CGYE08Mfh6cnTeKD8Hn2ttwnNd+B1v5Z46aLOCoQxfPIPuVwa8whziMaIM5iwVoIPl5dYAXtzEdkhdzMPuOJTGX1uxZluUWG16tWTIiyipkn1obJri+Fzhnu7EaeLa62md/duJXvPHQsKEfzCgxvJG7OCG8UwcXUiji6jgv5H7MyAYCibmkb29usLn4vHXWoXS0RD5BloQblSrcWSamB7Op8p3U8zxKuEXQ7PQKuCDpOXkU9mXkonlc6ar6krbyM2ly4wGVEzHUx/HP8Da9/Ila8iIEaA+1e3ZSewibvWmFLQyDA9S2qrpPOcJvxJ2j00y6psVOflCP0JVFN+k4L/r2TGJO95b36HjfC21rrwJE9e+y4DfhO6mfSyt8UcuQQAOAa7lMcggfBBEnZfJTT1ekK24jlwmRkz5MhJBN3iGgeG4C5anP6qQM6nNjA7Yjr9U3u6YT26f+gwgatcdRWgQHeLFJuyTiHtm9ws/1aoF8LgmzXXhxnHdBbSVl9LwwD/J8N+a2PbCPAK2eQ5SXuce++7gHdo5X5WVN0wIANRd7bfoYqOLQarhR4aSi/GhhtiH5UH5AtpM05Eun7mI53m1BqOUrGfwNbWkgb/Xb6qq3itQeQoZXDA7yFQ61B7amGSWMqpg9QCbxl+75AZo72yYc59O/9brHzHd7FW6qf+wcwmtU1cJwTXepSripQ0z2nLBJIMhvJirbhOSm2f5U0EIMCOU17c2i0Io42VPeCDQEq2+XvFZmJ1f8wBdCnH6ympGzEzGIy35sVRTbY+WEyWIw/1YFHuEIOAKrQkDPeZpDl8D40viZE5NB4abOI6gQXDdFwwiidLJP+cLxdbnEQybo7kPN3xcKNsyJKkcbaH/lO2tj8jZbGRkSDwgltE+EMW/D73sgIthYOblgG3mvPLsj4Ag4An0IDAo3FX4cKyjfveLkXENAxXzswTVMizE+OJrIEWJcbUJso/RU55i81obKoHlBmETRxDD7Bb/SchOoordJefiu2a1S3ura9/tc/6Mdtcd+yy4fGFUxJ0fAEXAEHIESgTEHSpJg0GSMcEJDKwUU9RNfo4mk+igkQlia9seJSbQ4o6q8KoNgw3zKgZeHuvjWGX4EtJlR5e0nlYEPyiIc90aqH83XBCh/yw6uTo6AI+AIOAITEagRbsG8qIk3FzbswUEN02PMQ742oRcKKM97edDOUn2KQ7tC+KAxQfzvj2l/VXnvi30wkyqca4ccDqkRuLGacNqT/UD4OgQ9VSPwi7bo5Ag4Ao6AIzARgUHhFoURWlV4w11htJk7XSZ8chYQWMQn02KemPkxXT5RPv4gkPrQzGjjIXFyOT1oVJ1XZfMDKkmDVHxpUrW6h1z7i/ShfJPSxR8CHw2T9wMPJVAn8eyFHQFHwBFYMgIfvX79OpgXNanyr7RVpLzsEd3KLU9BVpXfdybxhRbIXlY4xi537y9PTu0TPKoODrOwOLBXKKZW6+UdAUfAETgrBDR//qsOv+zV3JQJTYIj8ZjMAhEnD1euXd0nHvFXfCHA6BTEV1Lyfbrez8+EEkf+Ee/wiwZHPxLeR2bLm3cEHAFHYJUIXA5wfa10NLtAcdJlv40j87ua+mJt8zoIB10cq8esGfb8op+GFi/cIhrsOYI3B2CGTLuhiPpoh17+UUT1qdBQWD9Ty1s97joCh0ZAY5dFNvMRVo9V/KejeIZXLF/Mofk2iqKc5kRgSLiZdvZKN+ITNYxGwU2pmnjnZLSmLvGVm0kXyeNAP9DcoCrNTf3lIXkjNxyWkUs5Tl2GAzlU1EdTy/fV7WmOwJwIaKyy599YpCrMApt/qcCc/3DO9vZYF8KN59ROSe+xqfOuule4adAgINYoJE7+rure8AI4X3lJp0Dl59UCwixKOKDTSVPLd1bsCY7AzAhorJpA6Ko534LoyrOIePWFjyjz5X5byC6Cr1Nk4uIUO3UmfeIUaZtp+J3i+aeGIe1vavkzgdm7uQAETuoEsQu2w4woF26HwXkfrbA3d9dSsa1iSe+jqeX76vY0R2AWBCQI2FezfeVZ6vRKzgOBXrPkeUCwvl5WaGV0qnMPYmr59SHmHC8RgTgOEVzsmUEciOJ0dtgKkYvGxslnCGsE/yQC3cifzPH3UeFw1CP5Tct7Iv875eP90QYpjnzs39FuOEuguLBfL5dFHzyR540uFovURTzWDlzKkk4afp41yiOIieOvbTA/wi/8YIIkH/0ijrLhP97kBlJeylI3ec3qcqX4tDcnfyffoRL/aSBw2Qh5YC0ImODiQegie0Da0qeWb6vT4xyBagTiZM5E3zj8pPjfSNP1vS4EGB9SQAghDPIDY2VbjHcEYBBmchEWHK6ivFkzNjH+rdI4iBLi5fIxCfIShwDC5bUihC152MPmFDb18+1ZvsxEOp/oC0I2iwuCTWkbxfG9WE5uE2c8UDcHwRIpDO8cmGnskyuc+is//enkO1XmnoTARfK559QQYEU6haaWn9K2lz19BBBsDcETu4yGw0TPZD6GHqtMOlovv+1How3lRLu8NmDCZiN/29eBMPkjzIxHDoGE+uWyqESoXekKFOOIR7vLif+cTG3FhHI74bHi4R8hl5OdVieulu+8/Fn7Xbit8/aXD0feC9PKeO+ti6aW76rX4x2BQQQ0iSO4Hul6V2ZW2p8xLgmOMk9H+LYjPgmMrN2G5hTL0S7mRyMElfGyUVnCOd0ogIZJP0jH/Ijw4xRzIMUhWMM7tzGq1VE+tEWeyX/lR3NFk+RjDrkWSjs1fLe2cY6RbpZc4V3XoOfIP5ynBzfrhsWVq8WUZWr5VJF7HIHdEAgCQUVLgZHXNlZzy8t2+a1dTINJCMXMaEmlgETgtJLKo9HBP6ZDtE2+lcs7wK908a8maHaYXEmrIfbWrnUhJNnzQ3vFBEr9Y/lWEScXbusdA6z2bNDnvTDNjfQ+mlq+r25PcwT6ELCFly3E2vJanq00TfhM/LVCIy9vdaIdIXyGqE/4Upa9NwTZG7lmKSEOgVRTv7IFrY/n+C72KfRLfoQve3oI3bF8q4jThUOwWgSwwWOrL4kVIHb+oQdzavmyXQ87AlUIaGxi7mN8Ng5QUFhpmPIgxqfR0Fi2fL1u1u5VW8as7bbktjgED4KJT4CZMCOOvbry300U3UloqQ1NUuUxcVIne3GG11x8dzJySgku3FZ6N+PgZ7WHGSOQ/A/keaHr5X1MmCz4isl/uhr2eoV5eAbLWz3uOgIzI/BU9b3QOCzNj5jkOCmZWx7wty3kjCWzVli4z6VdhI8J0ZBXYdo1DYk4niWuTlIZhA5l0r+OZHEc48/rK+sp675W/jKOsOFQy3fZztmGd/rLm7NFa88d1+BmtYqw4gixrQQ7W40PAw/le12YRdgQ51uTPHSJFG49Sl1bPlXkHkdgRgQ0/tB6MMMxfiHCmOJsQg+R/CjONDkEBmOcfWcEo+1TUQflMBFSD6ZBBFiIV950ilF+a1fJ6V032uXj65ShLM+h1Xmj+NbnUfFoaJRL6fIHLUxuOF2p9EAKG78mWOGXhSiCGy3WTJvyhvfvMJ8mLOTv5JsCTvcICKfwlzcu3BY0InRTRgm3BbHurDgCjoAjsAgETLhdLIIbZ8IRcAQcAUfAEZgRARduM4LpVTkCjoAj4AgsAwEXbsu4D8bFXfSM2SC3su46Ao6AI+AIRARcuC1rKNiJxq0j0sti07lxBBwBR2DZCLhwW9D90Uaona7ipJaTI+AIOAKOwI4IuHDbEbg9FrPvyXHE2MkRcAQcAUdgBwRcuO0A2j6LSHvjvR/eU9vly+j7ZM3rdgQcAUdgNQi4cFvmreJrBLysyn9M2Qufy+TUuXIEHAFHYIEIpJe4W3jjrftPW+I96kAICH97qZs/O7T9uAO17s04Ao6AI7BsBDQvcgiPL7+U9PxSMbe62k7n2bH0spCHD4SAbhyf4eLG+asBB8Lcm3EEHIFVIcDny9rmx9v/AwbVQyLFDNTSAAAAAElFTkSuQmCC\n", "text/latex": [ - "$$\\left [ grad_{x} \\leftarrow \\begin{cases} \\frac{{{src}_{E}}}{2} - \\frac{{{src}_{W}}}{2} & \\text{for}\\: {{src}_{W}} > 0 \\\\0.0 & \\text{otherwise} \\end{cases}\\right ]$$" + "$\\displaystyle \\left[ grad_{x} \\leftarrow \\begin{cases} \\frac{{src}_{(1,0)}}{2} - \\frac{{src}_{(-1,0)}}{2} & \\text{for}\\: {src}_{(-1,0)} > 0 \\\\0.0 & \\text{otherwise} \\end{cases}\\right]$" ], "text/plain": [ "⎡ ⎧src_E src_W ⎤\n", @@ -200,10 +204,10 @@ { "data": { "text/html": [ - "<div>Subexpressions:</div><table style=\"border:none; width: 100%; \"><tr style=\"border:none\"> <td style=\"border:none\">$$a \\leftarrow {{src}_{N}} + {{src}_{W}}$$</td> </tr> <tr style=\"border:none\"> <td style=\"border:none\">$$b \\leftarrow 2 {{src}_{E}} + {{src}_{S}}$$</td> </tr> <tr style=\"border:none\"> <td style=\"border:none\">$$c \\leftarrow - {{src}_{C}} + 2 {{src}_{E}} + {{src}_{N}} + {{src}_{S}} + {{src}_{W}}$$</td> </tr> </table><div>Main Assignments:</div><table style=\"border:none; width: 100%; \"><tr style=\"border:none\"> <td style=\"border:none\">$${{dst}_{C}} \\leftarrow a + b + c$$</td> </tr> </table>" + "<div>Subexpressions:</div><table style=\"border:none; width: 100%; \"><tr style=\"border:none\"> <td style=\"border:none\">$$a \\leftarrow {src}_{(0,1)} + {src}_{(-1,0)}$$</td> </tr> <tr style=\"border:none\"> <td style=\"border:none\">$$b \\leftarrow 2 {src}_{(1,0)} + {src}_{(0,-1)}$$</td> </tr> <tr style=\"border:none\"> <td style=\"border:none\">$$c \\leftarrow - {src}_{(0,0)} + 2 {src}_{(1,0)} + {src}_{(0,1)} + {src}_{(0,-1)} + {src}_{(-1,0)}$$</td> </tr> </table><div>Main Assignments:</div><table style=\"border:none; width: 100%; \"><tr style=\"border:none\"> <td style=\"border:none\">$${dst}_{(0,0)} \\leftarrow a + b + c$$</td> </tr> </table>" ], "text/plain": [ - "Equation Collection for dst_C" + "AssignmentCollection: dst_C, <- f(src_W, src_S, src_N, src_C, src_E)" ] }, "execution_count": 7, @@ -232,7 +236,13 @@ { "data": { "text/plain": [ - "{'adds': 8, 'muls': 2, 'divs': 0}" + "{'adds': 8,\n", + " 'muls': 2,\n", + " 'divs': 0,\n", + " 'sqrts': 0,\n", + " 'fast_sqrts': 0,\n", + " 'fast_inv_sqrts': 0,\n", + " 'fast_div': 0}" ] }, "execution_count": 8, @@ -261,10 +271,10 @@ { "data": { "text/html": [ - "<div>Subexpressions:</div><table style=\"border:none; width: 100%; \"><tr style=\"border:none\"> <td style=\"border:none\">$$a \\leftarrow {{src}_{N}} + {{src}_{W}}$$</td> </tr> <tr style=\"border:none\"> <td style=\"border:none\">$$b \\leftarrow 2 {{src}_{E}} + {{src}_{S}}$$</td> </tr> <tr style=\"border:none\"> <td style=\"border:none\">$$c \\leftarrow - {{src}_{C}} + a + b$$</td> </tr> </table><div>Main Assignments:</div><table style=\"border:none; width: 100%; \"><tr style=\"border:none\"> <td style=\"border:none\">$${{dst}_{C}} \\leftarrow a + b + c$$</td> </tr> </table>" + "<div>Subexpressions:</div><table style=\"border:none; width: 100%; \"><tr style=\"border:none\"> <td style=\"border:none\">$$a \\leftarrow {src}_{(0,1)} + {src}_{(-1,0)}$$</td> </tr> <tr style=\"border:none\"> <td style=\"border:none\">$$b \\leftarrow 2 {src}_{(1,0)} + {src}_{(0,-1)}$$</td> </tr> <tr style=\"border:none\"> <td style=\"border:none\">$$c \\leftarrow - {src}_{(0,0)} + a + b$$</td> </tr> </table><div>Main Assignments:</div><table style=\"border:none; width: 100%; \"><tr style=\"border:none\"> <td style=\"border:none\">$${dst}_{(0,0)} \\leftarrow a + b + c$$</td> </tr> </table>" ], "text/plain": [ - "Equation Collection for dst_C" + "AssignmentCollection: dst_C, <- f(src_W, src_S, src_N, src_C, src_E)" ] }, "execution_count": 9, @@ -285,7 +295,13 @@ { "data": { "text/plain": [ - "{'adds': 6, 'muls': 1, 'divs': 0}" + "{'adds': 6,\n", + " 'muls': 1,\n", + " 'divs': 0,\n", + " 'sqrts': 0,\n", + " 'fast_sqrts': 0,\n", + " 'fast_inv_sqrts': 0,\n", + " 'fast_div': 0}" ] }, "execution_count": 10, @@ -314,8 +330,13 @@ { "data": { "text/html": [ - "<style>.highlight .hll { background-color: #ffffcc }\n", - ".highlight { background: #f8f8f8; }\n", + "<style>pre { line-height: 125%; }\n", + "td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }\n", + "span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }\n", + "td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }\n", + "span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }\n", + ".highlight .hll { background-color: #ffffcc }\n", + ".highlight { background: #f8f8f8; }\n", ".highlight .c { color: #408080; font-style: italic } /* Comment */\n", ".highlight .err { border: 1px solid #FF0000 } /* Error */\n", ".highlight .k { color: #008000; font-weight: bold } /* Keyword */\n", @@ -394,40 +415,39 @@ { "data": { "text/html": [ - "<div class=\"highlight\"><pre><span></span><span class=\"n\">FUNC_PREFIX</span> <span class=\"kt\">void</span> <span class=\"nf\">kernel</span><span class=\"p\">(</span><span class=\"kt\">double</span> <span class=\"o\">*</span> <span class=\"n\">RESTRICT</span> <span class=\"n\">fd_dst</span><span class=\"p\">,</span> <span class=\"kt\">double</span> <span class=\"o\">*</span> <span class=\"n\">RESTRICT</span> <span class=\"k\">const</span> <span class=\"n\">fd_src</span><span class=\"p\">)</span>\n", - "<span class=\"p\">{</span>\n", - " <span class=\"k\">for</span> <span class=\"p\">(</span><span class=\"kt\">int</span> <span class=\"n\">ctr_0</span> <span class=\"o\">=</span> <span class=\"mi\">2</span><span class=\"p\">;</span> <span class=\"n\">ctr_0</span> <span class=\"o\"><</span> <span class=\"mi\">18</span><span class=\"p\">;</span> <span class=\"n\">ctr_0</span> <span class=\"o\">+=</span> <span class=\"mi\">1</span><span class=\"p\">)</span>\n", - " <span class=\"p\">{</span>\n", - " <span class=\"kt\">double</span> <span class=\"o\">*</span> <span class=\"n\">RESTRICT</span> <span class=\"n\">fd_dst_C</span> <span class=\"o\">=</span> <span class=\"mi\">30</span><span class=\"o\">*</span><span class=\"n\">ctr_0</span> <span class=\"o\">+</span> <span class=\"n\">fd_dst</span><span class=\"p\">;</span>\n", - " <span class=\"kt\">double</span> <span class=\"o\">*</span> <span class=\"n\">RESTRICT</span> <span class=\"k\">const</span> <span class=\"n\">fd_src_2E</span> <span class=\"o\">=</span> <span class=\"mi\">30</span><span class=\"o\">*</span><span class=\"n\">ctr_0</span> <span class=\"o\">+</span> <span class=\"n\">fd_src</span> <span class=\"o\">+</span> <span class=\"mi\">60</span><span class=\"p\">;</span>\n", - " <span class=\"kt\">double</span> <span class=\"o\">*</span> <span class=\"n\">RESTRICT</span> <span class=\"k\">const</span> <span class=\"n\">fd_src_W</span> <span class=\"o\">=</span> <span class=\"mi\">30</span><span class=\"o\">*</span><span class=\"n\">ctr_0</span> <span class=\"o\">+</span> <span class=\"n\">fd_src</span> <span class=\"o\">-</span> <span class=\"mi\">30</span><span class=\"p\">;</span>\n", - " <span class=\"k\">for</span> <span class=\"p\">(</span><span class=\"kt\">int</span> <span class=\"n\">ctr_1</span> <span class=\"o\">=</span> <span class=\"mi\">2</span><span class=\"p\">;</span> <span class=\"n\">ctr_1</span> <span class=\"o\"><</span> <span class=\"mi\">28</span><span class=\"p\">;</span> <span class=\"n\">ctr_1</span> <span class=\"o\">+=</span> <span class=\"mi\">1</span><span class=\"p\">)</span>\n", - " <span class=\"p\">{</span>\n", - " <span class=\"n\">fd_dst_C</span><span class=\"p\">[</span><span class=\"n\">ctr_1</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"n\">fd_src_2E</span><span class=\"p\">[</span><span class=\"n\">ctr_1</span><span class=\"p\">]</span> <span class=\"o\">+</span> <span class=\"n\">fd_src_W</span><span class=\"p\">[</span><span class=\"n\">ctr_1</span><span class=\"p\">];</span>\n", - " <span class=\"p\">}</span>\n", - " <span class=\"p\">}</span>\n", - "<span class=\"p\">}</span>\n", + "<div class=\"highlight\"><pre><span></span><span class=\"n\">FUNC_PREFIX</span><span class=\"w\"> </span><span class=\"kt\">void</span><span class=\"w\"> </span><span class=\"n\">kernel</span><span class=\"p\">(</span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">RESTRICT</span><span class=\"w\"> </span><span class=\"n\">_data_dst</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">RESTRICT</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"n\">_data_src</span><span class=\"p\">)</span><span class=\"w\"></span>\n", + "<span class=\"p\">{</span><span class=\"w\"></span>\n", + "<span class=\"w\"> </span><span class=\"k\">for</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"mi\">2</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\"><</span><span class=\"w\"> </span><span class=\"mi\">18</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+=</span><span class=\"w\"> </span><span class=\"mi\">1</span><span class=\"p\">)</span><span class=\"w\"></span>\n", + "<span class=\"w\"> </span><span class=\"p\">{</span><span class=\"w\"></span>\n", + "<span class=\"w\"> </span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">RESTRICT</span><span class=\"w\"> </span><span class=\"n\">_data_dst_00</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"n\">_data_dst</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">30</span><span class=\"o\">*</span><span class=\"n\">ctr_0</span><span class=\"p\">;</span><span class=\"w\"></span>\n", + "<span class=\"w\"> </span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">RESTRICT</span><span class=\"w\"> </span><span class=\"n\">_data_src_02</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"n\">_data_src</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">30</span><span class=\"o\">*</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">60</span><span class=\"p\">;</span><span class=\"w\"></span>\n", + "<span class=\"w\"> </span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">RESTRICT</span><span class=\"w\"> </span><span class=\"n\">_data_src_0m1</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"n\">_data_src</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">30</span><span class=\"o\">*</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"mi\">30</span><span class=\"p\">;</span><span class=\"w\"></span>\n", + "<span class=\"w\"> </span><span class=\"k\">for</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"mi\">2</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\"><</span><span class=\"w\"> </span><span class=\"mi\">28</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+=</span><span class=\"w\"> </span><span class=\"mi\">1</span><span class=\"p\">)</span><span class=\"w\"></span>\n", + "<span class=\"w\"> </span><span class=\"p\">{</span><span class=\"w\"></span>\n", + "<span class=\"w\"> </span><span class=\"n\">_data_dst_00</span><span class=\"p\">[</span><span class=\"n\">ctr_1</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"n\">_data_src_02</span><span class=\"p\">[</span><span class=\"n\">ctr_1</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">_data_src_0m1</span><span class=\"p\">[</span><span class=\"n\">ctr_1</span><span class=\"p\">];</span><span class=\"w\"></span>\n", + "<span class=\"w\"> </span><span class=\"p\">}</span><span class=\"w\"></span>\n", + "<span class=\"w\"> </span><span class=\"p\">}</span><span class=\"w\"></span>\n", + "<span class=\"p\">}</span><span class=\"w\"></span>\n", "</pre></div>\n" ], "text/plain": [ - "FUNC_PREFIX void kernel(double * RESTRICT fd_dst, double * RESTRICT const fd_src)\n", + "FUNC_PREFIX void kernel(double * RESTRICT _data_dst, double * RESTRICT const _data_src)\n", "{\n", - " for (int ctr_0 = 2; ctr_0 < 18; ctr_0 += 1)\n", + " for (int64_t ctr_0 = 2; ctr_0 < 18; ctr_0 += 1)\n", " {\n", - " double * RESTRICT fd_dst_C = 30*ctr_0 + fd_dst;\n", - " double * RESTRICT const fd_src_2E = 30*ctr_0 + fd_src + 60;\n", - " double * RESTRICT const fd_src_W = 30*ctr_0 + fd_src - 30;\n", - " for (int ctr_1 = 2; ctr_1 < 28; ctr_1 += 1)\n", + " double * RESTRICT _data_dst_00 = _data_dst + 30*ctr_0;\n", + " double * RESTRICT _data_src_02 = _data_src + 30*ctr_0 + 60;\n", + " double * RESTRICT _data_src_0m1 = _data_src + 30*ctr_0 - 30;\n", + " for (int64_t ctr_1 = 2; ctr_1 < 28; ctr_1 += 1)\n", " {\n", - " fd_dst_C[ctr_1] = fd_src_2E[ctr_1] + fd_src_W[ctr_1];\n", + " _data_dst_00[ctr_1] = _data_src_02[ctr_1] + _data_src_0m1[ctr_1];\n", " }\n", " }\n", "}" ] }, - "execution_count": 11, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ @@ -451,8 +471,13 @@ { "data": { "text/html": [ - "<style>.highlight .hll { background-color: #ffffcc }\n", - ".highlight { background: #f8f8f8; }\n", + "<style>pre { line-height: 125%; }\n", + "td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }\n", + "span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }\n", + "td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }\n", + "span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }\n", + ".highlight .hll { background-color: #ffffcc }\n", + ".highlight { background: #f8f8f8; }\n", ".highlight .c { color: #408080; font-style: italic } /* Comment */\n", ".highlight .err { border: 1px solid #FF0000 } /* Error */\n", ".highlight .k { color: #008000; font-weight: bold } /* Keyword */\n", @@ -531,40 +556,39 @@ { "data": { "text/html": [ - "<div class=\"highlight\"><pre><span></span><span class=\"n\">FUNC_PREFIX</span> <span class=\"kt\">void</span> <span class=\"nf\">kernel</span><span class=\"p\">(</span><span class=\"kt\">double</span> <span class=\"o\">*</span> <span class=\"n\">RESTRICT</span> <span class=\"n\">fd_dst</span><span class=\"p\">,</span> <span class=\"kt\">double</span> <span class=\"o\">*</span> <span class=\"n\">RESTRICT</span> <span class=\"k\">const</span> <span class=\"n\">fd_src</span><span class=\"p\">)</span>\n", - "<span class=\"p\">{</span>\n", - " <span class=\"k\">for</span> <span class=\"p\">(</span><span class=\"kt\">int</span> <span class=\"n\">ctr_0</span> <span class=\"o\">=</span> <span class=\"mi\">0</span><span class=\"p\">;</span> <span class=\"n\">ctr_0</span> <span class=\"o\"><</span> <span class=\"mi\">18</span><span class=\"p\">;</span> <span class=\"n\">ctr_0</span> <span class=\"o\">+=</span> <span class=\"mi\">1</span><span class=\"p\">)</span>\n", - " <span class=\"p\">{</span>\n", - " <span class=\"kt\">double</span> <span class=\"o\">*</span> <span class=\"n\">RESTRICT</span> <span class=\"n\">fd_dst_C</span> <span class=\"o\">=</span> <span class=\"mi\">30</span><span class=\"o\">*</span><span class=\"n\">ctr_0</span> <span class=\"o\">+</span> <span class=\"n\">fd_dst</span><span class=\"p\">;</span>\n", - " <span class=\"kt\">double</span> <span class=\"o\">*</span> <span class=\"n\">RESTRICT</span> <span class=\"k\">const</span> <span class=\"n\">fd_src_2E</span> <span class=\"o\">=</span> <span class=\"mi\">30</span><span class=\"o\">*</span><span class=\"n\">ctr_0</span> <span class=\"o\">+</span> <span class=\"n\">fd_src</span> <span class=\"o\">+</span> <span class=\"mi\">60</span><span class=\"p\">;</span>\n", - " <span class=\"kt\">double</span> <span class=\"o\">*</span> <span class=\"n\">RESTRICT</span> <span class=\"k\">const</span> <span class=\"n\">fd_src_W</span> <span class=\"o\">=</span> <span class=\"mi\">30</span><span class=\"o\">*</span><span class=\"n\">ctr_0</span> <span class=\"o\">+</span> <span class=\"n\">fd_src</span> <span class=\"o\">-</span> <span class=\"mi\">30</span><span class=\"p\">;</span>\n", - " <span class=\"k\">for</span> <span class=\"p\">(</span><span class=\"kt\">int</span> <span class=\"n\">ctr_1</span> <span class=\"o\">=</span> <span class=\"mi\">1</span><span class=\"p\">;</span> <span class=\"n\">ctr_1</span> <span class=\"o\"><</span> <span class=\"mi\">30</span><span class=\"p\">;</span> <span class=\"n\">ctr_1</span> <span class=\"o\">+=</span> <span class=\"mi\">1</span><span class=\"p\">)</span>\n", - " <span class=\"p\">{</span>\n", - " <span class=\"n\">fd_dst_C</span><span class=\"p\">[</span><span class=\"n\">ctr_1</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"n\">fd_src_2E</span><span class=\"p\">[</span><span class=\"n\">ctr_1</span><span class=\"p\">]</span> <span class=\"o\">+</span> <span class=\"n\">fd_src_W</span><span class=\"p\">[</span><span class=\"n\">ctr_1</span><span class=\"p\">];</span>\n", - " <span class=\"p\">}</span>\n", - " <span class=\"p\">}</span>\n", - "<span class=\"p\">}</span>\n", + "<div class=\"highlight\"><pre><span></span><span class=\"n\">FUNC_PREFIX</span><span class=\"w\"> </span><span class=\"kt\">void</span><span class=\"w\"> </span><span class=\"n\">kernel</span><span class=\"p\">(</span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">RESTRICT</span><span class=\"w\"> </span><span class=\"n\">_data_dst</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">RESTRICT</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"n\">_data_src</span><span class=\"p\">)</span><span class=\"w\"></span>\n", + "<span class=\"p\">{</span><span class=\"w\"></span>\n", + "<span class=\"w\"> </span><span class=\"k\">for</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"mi\">0</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\"><</span><span class=\"w\"> </span><span class=\"mi\">18</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+=</span><span class=\"w\"> </span><span class=\"mi\">1</span><span class=\"p\">)</span><span class=\"w\"></span>\n", + "<span class=\"w\"> </span><span class=\"p\">{</span><span class=\"w\"></span>\n", + "<span class=\"w\"> </span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">RESTRICT</span><span class=\"w\"> </span><span class=\"n\">_data_dst_00</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"n\">_data_dst</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">30</span><span class=\"o\">*</span><span class=\"n\">ctr_0</span><span class=\"p\">;</span><span class=\"w\"></span>\n", + "<span class=\"w\"> </span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">RESTRICT</span><span class=\"w\"> </span><span class=\"n\">_data_src_02</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"n\">_data_src</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">30</span><span class=\"o\">*</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">60</span><span class=\"p\">;</span><span class=\"w\"></span>\n", + "<span class=\"w\"> </span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">RESTRICT</span><span class=\"w\"> </span><span class=\"n\">_data_src_0m1</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"n\">_data_src</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">30</span><span class=\"o\">*</span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">-</span><span class=\"w\"> </span><span class=\"mi\">30</span><span class=\"p\">;</span><span class=\"w\"></span>\n", + "<span class=\"w\"> </span><span class=\"k\">for</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"mi\">1</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\"><</span><span class=\"w\"> </span><span class=\"mi\">30</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_1</span><span class=\"w\"> </span><span class=\"o\">+=</span><span class=\"w\"> </span><span class=\"mi\">1</span><span class=\"p\">)</span><span class=\"w\"></span>\n", + "<span class=\"w\"> </span><span class=\"p\">{</span><span class=\"w\"></span>\n", + "<span class=\"w\"> </span><span class=\"n\">_data_dst_00</span><span class=\"p\">[</span><span class=\"n\">ctr_1</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"n\">_data_src_02</span><span class=\"p\">[</span><span class=\"n\">ctr_1</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">_data_src_0m1</span><span class=\"p\">[</span><span class=\"n\">ctr_1</span><span class=\"p\">];</span><span class=\"w\"></span>\n", + "<span class=\"w\"> </span><span class=\"p\">}</span><span class=\"w\"></span>\n", + "<span class=\"w\"> </span><span class=\"p\">}</span><span class=\"w\"></span>\n", + "<span class=\"p\">}</span><span class=\"w\"></span>\n", "</pre></div>\n" ], "text/plain": [ - "FUNC_PREFIX void kernel(double * RESTRICT fd_dst, double * RESTRICT const fd_src)\n", + "FUNC_PREFIX void kernel(double * RESTRICT _data_dst, double * RESTRICT const _data_src)\n", "{\n", - " for (int ctr_0 = 0; ctr_0 < 18; ctr_0 += 1)\n", + " for (int64_t ctr_0 = 0; ctr_0 < 18; ctr_0 += 1)\n", " {\n", - " double * RESTRICT fd_dst_C = 30*ctr_0 + fd_dst;\n", - " double * RESTRICT const fd_src_2E = 30*ctr_0 + fd_src + 60;\n", - " double * RESTRICT const fd_src_W = 30*ctr_0 + fd_src - 30;\n", - " for (int ctr_1 = 1; ctr_1 < 30; ctr_1 += 1)\n", + " double * RESTRICT _data_dst_00 = _data_dst + 30*ctr_0;\n", + " double * RESTRICT _data_src_02 = _data_src + 30*ctr_0 + 60;\n", + " double * RESTRICT _data_src_0m1 = _data_src + 30*ctr_0 - 30;\n", + " for (int64_t ctr_1 = 1; ctr_1 < 30; ctr_1 += 1)\n", " {\n", - " fd_dst_C[ctr_1] = fd_src_2E[ctr_1] + fd_src_W[ctr_1];\n", + " _data_dst_00[ctr_1] = _data_src_02[ctr_1] + _data_src_0m1[ctr_1];\n", " }\n", " }\n", "}" ] }, - "execution_count": 12, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ @@ -703,7 +727,7 @@ { "data": { "text/plain": [ - "KernelFunction kernel([<double * RESTRICT fd_dst>, <double * RESTRICT const fd_src>])" + "KernelFunction kernel([_data_dst, _data_src])" ] }, "execution_count": 17, @@ -722,7 +746,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -736,7 +760,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.2" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py index cbcc62f8654840f2da3370b8c2206c98dc8b9fa3..a17ee7269bbb01aef673dc22c6432698d0c3cd5b 100644 --- a/pystencils/backends/cbackend.py +++ b/pystencils/backends/cbackend.py @@ -47,7 +47,7 @@ def generate_c(ast_node: Node, Args: ast_node: ast representation of kernel signature_only: generate signature without function body - dialect: `Backend`: 'C', 'CUDA' or 'OPENCL' + dialect: `Backend`: 'C' or 'CUDA' custom_backend: use own custom printer for code generation with_globals: enable usage of global variables Returns: @@ -71,9 +71,6 @@ def generate_c(ast_node: Node, elif dialect == Backend.CUDA: from pystencils.backends.cuda_backend import CudaBackend printer = CudaBackend(signature_only=signature_only) - elif dialect == Backend.OPENCL: - from pystencils.backends.opencl_backend import OpenClBackend - printer = OpenClBackend(signature_only=signature_only) else: raise ValueError(f'Unknown {dialect=}') code = printer(ast_node) diff --git a/pystencils/backends/opencl1.1_known_functions.txt b/pystencils/backends/opencl1.1_known_functions.txt deleted file mode 100644 index abeab29043e2c76b2b2df8e9024c00e7ac34de0f..0000000000000000000000000000000000000000 --- a/pystencils/backends/opencl1.1_known_functions.txt +++ /dev/null @@ -1,100 +0,0 @@ -acos -acosh -acospi -asin -asinh -asinpi -atan -atan2 -atanh -atanpi -atan2pi -cbrt -ceil -copysign -cos -cosh -cospi -erfc -erf -exp -exp2 -exp10 -expm1 -fabs -fdim -floor -fma -fmax -fmax -fmin45 -fmin -fmod -fract -frexp -hypot -ilogb -ldexp -lgamma -lgamma_r -log -log2 -log10 -log1p -logb -mad -maxmag -minmag -modf -nextafter -pow -pown -powr -remquo -intn -remquo -rint -rootn -rootn -round -rsqrt -sin -sincos -sinh -sinpi -sqrt -tan -tanh -tanpi -tgamma -trunc - - -half_cos -half_divide -half_exp -half_exp2 -half_exp10 -half_log -half_log2 -half_log10 -half_powr -half_recip -half_rsqrt -half_sin -half_sqrt -half_tan -native_cos -native_divide -native_exp -native_exp2 -native_exp10 -native_log -native_log2 -native_log10 -native_powr -native_recip -native_rsqrt -native_sin -native_sqrt -native_tan diff --git a/pystencils/backends/opencl_backend.py b/pystencils/backends/opencl_backend.py deleted file mode 100644 index c2d71e4d202549e21cc877d0a159792422ee66bd..0000000000000000000000000000000000000000 --- a/pystencils/backends/opencl_backend.py +++ /dev/null @@ -1,108 +0,0 @@ -from os.path import dirname, join - -import pystencils.data_types -from pystencils.astnodes import Node -from pystencils.backends.cbackend import CustomSympyPrinter, generate_c -from pystencils.backends.cuda_backend import CudaBackend, CudaSympyPrinter -from pystencils.enums import Backend -from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt - -with open(join(dirname(__file__), 'opencl1.1_known_functions.txt')) as f: - lines = f.readlines() - OPENCL_KNOWN_FUNCTIONS = {l.strip(): l.strip() for l in lines if l} - - -def generate_opencl(ast_node: Node, signature_only: bool = False, custom_backend=None, with_globals=True) -> str: - """Prints an abstract syntax tree node (made for `Target` 'GPU') as OpenCL code. # TODO Backend instead of Target? - - Args: - ast_node: ast representation of kernel - signature_only: generate signature without function body - custom_backend: use own custom printer for code generation - with_globals: enable usage of global variables - - Returns: - OpenCL code for the ast node and its descendants - """ - return generate_c(ast_node, signature_only, dialect=Backend.OPENCL, - custom_backend=custom_backend, with_globals=with_globals) - - -class OpenClBackend(CudaBackend): - - def __init__(self, - sympy_printer=None, - signature_only=False): - if not sympy_printer: - sympy_printer = OpenClSympyPrinter() - - super().__init__(sympy_printer, signature_only) - self._dialect = Backend.OPENCL - - def _print_Type(self, node): - code = super()._print_Type(node) - if isinstance(node, pystencils.data_types.PointerType): - return "__global " + code - else: - return code - - def _print_ThreadBlockSynchronization(self, node): - raise NotImplementedError() - - def _print_TextureDeclaration(self, node): - raise NotImplementedError() - - -class OpenClSympyPrinter(CudaSympyPrinter): - language = "OpenCL" - - DIMENSION_MAPPING = { - 'x': '0', - 'y': '1', - 'z': '2' - } - INDEXING_FUNCTION_MAPPING = { - 'blockIdx': 'get_group_id', - 'threadIdx': 'get_local_id', - 'blockDim': 'get_local_size', - 'gridDim': 'get_global_size' - } - - def __init__(self): - CustomSympyPrinter.__init__(self) - self.known_functions = OPENCL_KNOWN_FUNCTIONS - - def _print_Type(self, node): - code = super()._print_Type(node) - if isinstance(node, pystencils.data_types.PointerType): - return "__global " + code - else: - return code - - def _print_ThreadIndexingSymbol(self, node): - symbol_name: str = node.name - function_name, dimension = tuple(symbol_name.split(".")) - dimension = self.DIMENSION_MAPPING[dimension] - function_name = self.INDEXING_FUNCTION_MAPPING[function_name] - return f"(int64_t) {function_name}({dimension})" - - def _print_TextureAccess(self, node): - raise NotImplementedError() - - # For math functions, OpenCL is more similar to the C++ printer CustomSympyPrinter - # since built-in math functions are generic. - # In CUDA, you have to differentiate between `sin` and `sinf` - try: - _print_math_func = CustomSympyPrinter._print_math_func - except AttributeError: - pass - _print_Pow = CustomSympyPrinter._print_Pow - - def _print_Function(self, expr): - if isinstance(expr, fast_division): - return "native_divide(%s, %s)" % tuple(self._print(a) for a in expr.args) - elif isinstance(expr, fast_sqrt): - return f"native_sqrt({tuple(self._print(a) for a in expr.args)})" - elif isinstance(expr, fast_inv_sqrt): - return f"native_rsqrt({tuple(self._print(a) for a in expr.args)})" - return CustomSympyPrinter._print_Function(self, expr) diff --git a/pystencils/datahandling/__init__.py b/pystencils/datahandling/__init__.py index 139ac4e7d4127e37818399374f5bb0c76d8f1980..7f142428cf14b62813a7b9b1b245ffae021c1fb8 100644 --- a/pystencils/datahandling/__init__.py +++ b/pystencils/datahandling/__init__.py @@ -23,8 +23,7 @@ def create_data_handling(domain_size: Tuple[int, ...], default_layout: str = 'SoA', default_target: Target = Target.CPU, parallel: bool = False, - default_ghost_layers: int = 1, - opencl_queue=None) -> DataHandling: + default_ghost_layers: int = 1) -> DataHandling: """Creates a data handling instance. Args: @@ -43,7 +42,6 @@ def create_data_handling(domain_size: Tuple[int, ...], default_target = new_target if parallel: - assert not opencl_queue, "OpenCL is only supported for SerialDataHandling" if wlb is None: raise ValueError("Cannot create parallel data handling because walberla module is not available") @@ -71,8 +69,7 @@ def create_data_handling(domain_size: Tuple[int, ...], periodicity=periodicity, default_target=default_target, default_layout=default_layout, - default_ghost_layers=default_ghost_layers, - opencl_queue=opencl_queue) + default_ghost_layers=default_ghost_layers) __all__ = ['create_data_handling'] diff --git a/pystencils/datahandling/datahandling_interface.py b/pystencils/datahandling/datahandling_interface.py index d06b6f7328d7b00986530ce48453f2c378e6639d..d6dc7b4ea75de2108e8818c4f3764659cfed3f0d 100644 --- a/pystencils/datahandling/datahandling_interface.py +++ b/pystencils/datahandling/datahandling_interface.py @@ -17,8 +17,8 @@ class DataHandling(ABC): 'gather' function that has collects (parts of the) distributed data on a single process. """ - _GPU_LIKE_TARGETS = [Target.GPU, Target.OPENCL] - _GPU_LIKE_BACKENDS = [Backend.CUDA, Backend.OPENCL] + _GPU_LIKE_TARGETS = [Target.GPU] + _GPU_LIKE_BACKENDS = [Backend.CUDA] # ---------------------------- Adding and accessing data ----------------------------------------------------------- @property diff --git a/pystencils/datahandling/pyopencl.py b/pystencils/datahandling/pyopencl.py deleted file mode 100644 index 2466c80cdae1ce6cd143c941e1b5df6983d669af..0000000000000000000000000000000000000000 --- a/pystencils/datahandling/pyopencl.py +++ /dev/null @@ -1,49 +0,0 @@ -try: - import pyopencl.array as gpuarray -except ImportError: - gpuarray = None - -import numpy as np - -import pystencils - - -class PyOpenClArrayHandler: - - def __init__(self, queue): - if not queue: - from pystencils.opencl.opencljit import get_global_cl_queue - queue = get_global_cl_queue() - assert queue, "OpenCL queue missing!\n" \ - "Use `import pystencils.opencl.autoinit` if you want it to be automatically created" - self.queue = queue - - def zeros(self, shape, dtype=np.float64, order='C'): - cpu_array = np.zeros(shape=shape, dtype=dtype, order=order) - return self.to_gpu(cpu_array) - - def ones(self, shape, dtype=np.float64, order='C'): - cpu_array = np.ones(shape=shape, dtype=dtype, order=order) - return self.to_gpu(cpu_array) - - def empty(self, shape, dtype=np.float64, layout=None): - if layout: - cpu_array = pystencils.field.create_numpy_array_with_layout(shape=shape, dtype=dtype, layout=layout) - return self.to_gpu(cpu_array) - else: - return gpuarray.empty(self.queue, shape, dtype) - - def to_gpu(self, array): - return gpuarray.to_device(self.queue, array) - - def upload(self, gpuarray, numpy_array): - gpuarray.set(numpy_array, self.queue) - - def download(self, gpuarray, numpy_array): - gpuarray.get(self.queue, numpy_array) - - def randn(self, shape, dtype=np.float64): - cpu_array = np.random.randn(*shape).astype(dtype) - return self.from_numpy(cpu_array) - - from_numpy = to_gpu diff --git a/pystencils/datahandling/serial_datahandling.py b/pystencils/datahandling/serial_datahandling.py index 2950c8558ad060744eb0a1ec2a81f44cd2367e13..319411fef297e4b74ecb73f0b76ee32419c81aa0 100644 --- a/pystencils/datahandling/serial_datahandling.py +++ b/pystencils/datahandling/serial_datahandling.py @@ -7,7 +7,6 @@ import numpy as np from pystencils.datahandling.blockiteration import SerialBlock from pystencils.datahandling.datahandling_interface import DataHandling from pystencils.datahandling.pycuda import PyCudaArrayHandler, PyCudaNotAvailableHandler -from pystencils.datahandling.pyopencl import PyOpenClArrayHandler from pystencils.enums import Target from pystencils.field import ( Field, FieldType, create_numpy_array_with_layout, layout_string_to_tuple, @@ -24,8 +23,6 @@ class SerialDataHandling(DataHandling): default_layout: str = 'SoA', periodicity: Union[bool, Sequence[bool]] = False, default_target: Target = Target.CPU, - opencl_queue=None, - opencl_ctx=None, array_handler=None) -> None: """ Creates a data handling for single node simulations. @@ -48,17 +45,12 @@ class SerialDataHandling(DataHandling): self.custom_data_cpu = DotDict() self.custom_data_gpu = DotDict() self._custom_data_transfer_functions = {} - self._opencl_queue = opencl_queue - self._opencl_ctx = opencl_ctx if not array_handler: try: self.array_handler = PyCudaArrayHandler() except Exception: self.array_handler = PyCudaNotAvailableHandler() - - if default_target == Target.OPENCL or opencl_queue: - self.array_handler = PyOpenClArrayHandler(opencl_queue) else: self.array_handler = array_handler @@ -280,8 +272,6 @@ class SerialDataHandling(DataHandling): def synchronization_function(self, names, stencil=None, target=None, functor=None, **_): if target is None: target = self.default_target - if target == Target.OPENCL: # TODO potential misuse between Target and Backend - target = Target.GPU assert target in (Target.CPU, Target.GPU) if not hasattr(names, '__len__') or type(names) is str: names = [names] @@ -324,16 +314,13 @@ class SerialDataHandling(DataHandling): else: if functor is None: from pystencils.gpucuda.periodicity import get_periodic_boundary_functor as functor - target = Target.GPU if not isinstance(self.array_handler, - PyOpenClArrayHandler) else Target.OPENCL + target = Target.GPU result.append(functor(filtered_stencil, self._domainSize, index_dimensions=self.fields[name].index_dimensions, index_dim_shape=values_per_cell, dtype=self.fields[name].dtype.numpy_dtype, ghost_layers=gls, - target=target, - opencl_queue=self._opencl_queue, - opencl_ctx=self._opencl_ctx)) + target=target)) if target == Target.CPU: def result_functor(): diff --git a/pystencils/display_utils.py b/pystencils/display_utils.py index 22492dd3af901b30b2c559602f5ca4eaea451f7b..3250765c83bafb46de7b878669dbe485a64dd91e 100644 --- a/pystencils/display_utils.py +++ b/pystencils/display_utils.py @@ -46,7 +46,7 @@ def get_code_obj(ast: Union[KernelFunction, KernelWrapper], custom_backend=None) if isinstance(ast, KernelWrapper): ast = ast.ast - if ast.backend not in {Backend.C, Backend.CUDA, Backend.OPENCL}: + if ast.backend not in {Backend.C, Backend.CUDA}: raise NotImplementedError(f'get_code_obj is not implemented for backend {ast.backend}') dialect = ast.backend diff --git a/pystencils/enums.py b/pystencils/enums.py index 77d59a3ef4efb2aa1b7569d3d0c6daf21f638b75..a52a049a9101b20dd8f1f347f5cd2d72b09a8016 100644 --- a/pystencils/enums.py +++ b/pystencils/enums.py @@ -13,10 +13,6 @@ class Target(Enum): """ Target GPU architecture. """ - OPENCL = auto() - """ - Target all architectures OpenCL covers (Thus both, Target and Backend) - """ class Backend(Enum): @@ -32,7 +28,3 @@ class Backend(Enum): """ Use the CUDA backend to generate code for NVIDIA GPUs. """ - OPENCL = auto() - """ - Use the OpenCL backend to generate code for OpenCL. - """ diff --git a/pystencils/gpucuda/periodicity.py b/pystencils/gpucuda/periodicity.py index e5083af4ac0781121901cca0edcb1d8273df5894..5a402c606fee50024c049efc3ad50ab40f417515 100644 --- a/pystencils/gpucuda/periodicity.py +++ b/pystencils/gpucuda/periodicity.py @@ -2,7 +2,6 @@ import numpy as np from itertools import product import pystencils.gpucuda -import pystencils.opencl from pystencils import Assignment, Field from pystencils.gpucuda.kernelcreation import create_cuda_kernel from pystencils.enums import Target @@ -33,19 +32,14 @@ def create_copy_kernel(domain_size, from_slice, to_slice, index_dimensions=0, in # TODO: type flot is dangerous here def get_periodic_boundary_functor(stencil, domain_size, index_dimensions=0, index_dim_shape=1, ghost_layers=1, - thickness=None, dtype=float, target=Target.GPU, opencl_queue=None, opencl_ctx=None): - assert target in {Target.GPU, Target.OPENCL} + thickness=None, dtype=float, target=Target.GPU): + assert target in {Target.GPU} src_dst_slice_tuples = get_periodic_boundary_src_dst_slices(stencil, ghost_layers, thickness) kernels = [] for src_slice, dst_slice in src_dst_slice_tuples: ast = create_copy_kernel(domain_size, src_slice, dst_slice, index_dimensions, index_dim_shape, dtype) - if target == pystencils.Target.GPU: - kernels.append(pystencils.gpucuda.make_python_function(ast)) - else: - ast._target = pystencils.Target.OPENCL - ast._backend = pystencils.Backend.OPENCL - kernels.append(pystencils.opencl.make_python_function(ast, opencl_queue, opencl_ctx)) + kernels.append(pystencils.gpucuda.make_python_function(ast)) def functor(pdfs, **_): for kernel in kernels: diff --git a/pystencils/gpucuda/texture_utils.py b/pystencils/gpucuda/texture_utils.py deleted file mode 100644 index 0b383507fb8bd57ff1ca73aa4ec00db0481d2717..0000000000000000000000000000000000000000 --- a/pystencils/gpucuda/texture_utils.py +++ /dev/null @@ -1,57 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de> -# -# Distributed under terms of the GPLv3 license. - -""" - -""" -from typing import Union - -import numpy as np - -try: - import pycuda.driver as cuda - from pycuda import gpuarray - import pycuda -except Exception: - pass - - -def ndarray_to_tex(tex_ref, # type: Union[cuda.TextureReference, cuda.SurfaceReference] - ndarray, - address_mode=None, - filter_mode=None, - use_normalized_coordinates=False, - read_as_integer=False): - - if isinstance(address_mode, str): - address_mode = getattr(pycuda.driver.address_mode, address_mode.upper()) - if address_mode is None: - address_mode = cuda.address_mode.BORDER - if filter_mode is None: - filter_mode = cuda.filter_mode.LINEAR - - if isinstance(ndarray, np.ndarray): - cu_array = cuda.np_to_array(ndarray, 'C') - elif isinstance(ndarray, gpuarray.GPUArray): - cu_array = cuda.gpuarray_to_array(ndarray, 'C') - else: - raise TypeError( - 'ndarray must be numpy.ndarray or pycuda.gpuarray.GPUArray') - - tex_ref.set_array(cu_array) - - tex_ref.set_address_mode(0, address_mode) - if ndarray.ndim >= 2: - tex_ref.set_address_mode(1, address_mode) - if ndarray.ndim >= 3: - tex_ref.set_address_mode(2, address_mode) - tex_ref.set_filter_mode(filter_mode) - - if not use_normalized_coordinates: - tex_ref.set_flags(tex_ref.get_flags() & ~cuda.TRSF_NORMALIZED_COORDINATES) - - if not read_as_integer: - tex_ref.set_flags(tex_ref.get_flags() & ~cuda.TRSF_READ_AS_INTEGER) diff --git a/pystencils/kernelcreation.py b/pystencils/kernelcreation.py index a42d2d2e1b59e695aa78173488404ac06a6bc52b..e3b9ed90730a917a5e7d09f12759d596d23af858 100644 --- a/pystencils/kernelcreation.py +++ b/pystencils/kernelcreation.py @@ -1,4 +1,3 @@ -import functools import itertools import warnings from dataclasses import dataclass, field @@ -107,14 +106,6 @@ class CreateKernelConfig: """ If set to `True`, auto can be used in the generated code for data types. This makes the type system more robust. """ - opencl_queue: Any = None - """ - OpenCL queue if OpenCL target is used. - """ - opencl_ctx: Any = None - """ - OpenCL context if OpenCL target is used. - """ index_fields: List[Field] = None """ List of index fields, i.e. 1D fields with struct data type. If not `None`, `create_index_kernel` @@ -139,8 +130,6 @@ class CreateKernelConfig: self.backend = Backend.C elif self.target == Target.GPU: self.backend = Backend.CUDA - elif self.target == Target.OPENCL: - self.backend = Backend.OPENCL else: raise NotImplementedError(f'Target {self.target} has no default backend') @@ -278,20 +267,14 @@ def create_domain_kernel(assignments: List[Assignment], *, config: CreateKernelC raise ValueError("Blocking cannot be combined with cacheline-zeroing") else: raise ValueError("Invalid value for cpu_vectorize_info") - elif config.target == Target.GPU or config.target == Target.OPENCL: - if config.backend == Backend.CUDA or config.backend == Backend.OPENCL: + elif config.target == Target.GPU: + if config.backend == Backend.CUDA: from pystencils.gpucuda import create_cuda_kernel ast = create_cuda_kernel(assignments, function_name=config.function_name, type_info=config.data_type, indexing_creator=indexing_creator_from_params(config.gpu_indexing, config.gpu_indexing_params), iteration_slice=config.iteration_slice, ghost_layers=config.ghost_layers, skip_independence_check=config.skip_independence_check) - if config.backend == Backend.OPENCL: - from pystencils.opencl.opencljit import make_python_function - ast._backend = config.backend - ast.compile = functools.partial(make_python_function, ast, config.opencl_queue, config.opencl_ctx) - ast._target = config.target - ast._backend = config.backend if not ast: raise NotImplementedError( @@ -353,8 +336,8 @@ def create_indexed_kernel(assignments: List[Assignment], *, config: CreateKernel coordinate_names=config.coordinate_names) if config.cpu_openmp: add_openmp(ast, num_threads=config.cpu_openmp) - elif config.target == Target.GPU or config.target == Target.OPENCL: - if config.backend == Backend.CUDA or config.backend == Backend.OPENCL: + elif config.target == Target.GPU: + if config.backend == Backend.CUDA: from pystencils.gpucuda import created_indexed_cuda_kernel idx_creator = indexing_creator_from_params(config.gpu_indexing, config.gpu_indexing_params) ast = created_indexed_cuda_kernel(assignments, @@ -362,12 +345,6 @@ def create_indexed_kernel(assignments: List[Assignment], *, config: CreateKernel type_info=config.data_type, coordinate_names=config.coordinate_names, indexing_creator=idx_creator) - if config.backend == Backend.OPENCL: - from pystencils.opencl.opencljit import make_python_function - ast._backend = config.backend - ast.compile = functools.partial(make_python_function, ast, config.opencl_queue, config.opencl_ctx) - ast._target = config.target - ast._backend = config.backend if not ast: raise NotImplementedError(f'Indexed kernels are not yet supported for {config.target} with {config.backend}') diff --git a/pystencils/opencl/__init__.py b/pystencils/opencl/__init__.py deleted file mode 100644 index e405a2f4e1b22a285cecb833da7764f88762d1ae..0000000000000000000000000000000000000000 --- a/pystencils/opencl/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -""" - -""" - -from pystencils.opencl.opencljit import ( - clear_global_ctx, init_globally, init_globally_with_context, make_python_function) - -__all__ = ['init_globally', 'init_globally_with_context', 'clear_global_ctx', 'make_python_function'] diff --git a/pystencils/opencl/autoinit.py b/pystencils/opencl/autoinit.py deleted file mode 100644 index 3d20169b640a83895edd9366a279fc6f2e13f6b4..0000000000000000000000000000000000000000 --- a/pystencils/opencl/autoinit.py +++ /dev/null @@ -1,16 +0,0 @@ -""" -Automatically initializes OpenCL context using any device. - -Use `pystencils.opencl.{init_globally_with_context,init_globally}` if you want to use a specific device. -""" - -from pystencils.opencl.opencljit import ( - clear_global_ctx, init_globally, init_globally_with_context, make_python_function) - -__all__ = ['init_globally', 'init_globally_with_context', 'clear_global_ctx', 'make_python_function'] - -try: - init_globally() -except Exception as e: - import warnings - warnings.warn(str(e)) diff --git a/pystencils/opencl/opencljit.py b/pystencils/opencl/opencljit.py deleted file mode 100644 index 5fb14a5adbfa6374d5cff0b22197d3f93736550f..0000000000000000000000000000000000000000 --- a/pystencils/opencl/opencljit.py +++ /dev/null @@ -1,142 +0,0 @@ -import numpy as np - -from pystencils.backends.cbackend import get_headers -from pystencils.backends.opencl_backend import generate_opencl -from pystencils.gpucuda.cudajit import _build_numpy_argument_list, _check_arguments -from pystencils.include import get_pystencils_include_path -from pystencils.kernel_wrapper import KernelWrapper - -USE_FAST_MATH = True - - -_global_cl_ctx = None -_global_cl_queue = None - - -def get_global_cl_queue(): - return _global_cl_queue - - -def get_global_cl_ctx(): - return _global_cl_ctx - - -def init_globally(device_index=0): - import pyopencl as cl - global _global_cl_ctx - global _global_cl_queue - _global_cl_ctx = cl.create_some_context(device_index) - _global_cl_queue = cl.CommandQueue(_global_cl_ctx) - - -def init_globally_with_context(opencl_ctx, opencl_queue): - global _global_cl_ctx - global _global_cl_queue - _global_cl_ctx = opencl_ctx - _global_cl_queue = opencl_queue - - -def clear_global_ctx(): - global _global_cl_ctx - global _global_cl_queue - _global_cl_ctx = None - _global_cl_queue = None - - -def make_python_function(kernel_function_node, opencl_queue, opencl_ctx, argument_dict=None, custom_backend=None): - """ - Creates a **OpenCL** kernel function from an abstract syntax tree which - was created for the ``target='Target.GPU'`` e.g. by :func:`pystencils.gpucuda.create_cuda_kernel` - or :func:`pystencils.gpucuda.created_indexed_cuda_kernel` - - Args: - opencl_queue: a valid :class:`pyopencl.CommandQueue` - opencl_ctx: a valid :class:`pyopencl.Context` - kernel_function_node: the abstract syntax tree - argument_dict: parameters passed here are already fixed. Remaining parameters have to be passed to the - returned kernel functor. - - Returns: - compiled kernel as Python function - """ - import pyopencl as cl - - if not opencl_ctx: - opencl_ctx = _global_cl_ctx - if not opencl_queue: - opencl_queue = _global_cl_queue - - assert opencl_ctx, "No valid OpenCL context!\n" \ - "Use `import pystencils.opencl.autoinit` if you want it to be automatically created" - assert opencl_queue, "No valid OpenCL queue!\n" \ - "Use `import pystencils.opencl.autoinit` if you want it to be automatically created" - - if argument_dict is None: - argument_dict = {} - - # check if double precision is supported and required - if any([d.double_fp_config == 0 for d in opencl_ctx.devices]): - for param in kernel_function_node.get_parameters(): - if param.symbol.dtype.base_type: - if param.symbol.dtype.base_type.numpy_dtype == np.float64: - raise ValueError('OpenCL device does not support double precision') - else: - if param.symbol.dtype.numpy_dtype == np.float64: - raise ValueError('OpenCL device does not support double precision') - - # Changing of kernel name necessary since compilation with default name "kernel" is not possible (OpenCL keyword!) - kernel_function_node.function_name = "opencl_" + kernel_function_node.function_name - header_list = ['"opencl_stdint.h"'] + list(get_headers(kernel_function_node)) - includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list]) - - code = includes + "\n" - code += "#define FUNC_PREFIX __kernel\n" - code += "#define RESTRICT restrict\n\n" - code += str(generate_opencl(kernel_function_node, custom_backend=custom_backend)) - options = [] - if USE_FAST_MATH: - options.append("-cl-unsafe-math-optimizations") - options.append("-cl-mad-enable") - options.append("-cl-fast-relaxed-math") - options.append("-cl-finite-math-only") - options.append("-I") - options.append(get_pystencils_include_path()) - mod = cl.Program(opencl_ctx, code).build(options=options) - func = getattr(mod, kernel_function_node.function_name) - - parameters = kernel_function_node.get_parameters() - - cache = {} - cache_values = [] - - def wrapper(**kwargs): - key = hash(tuple((k, v.ctypes.data, v.strides, v.shape) if isinstance(v, np.ndarray) else (k, id(v)) - for k, v in kwargs.items())) - try: - args, block_and_thread_numbers = cache[key] - func(opencl_queue, block_and_thread_numbers['grid'], block_and_thread_numbers['block'], *args) - except KeyError: - full_arguments = argument_dict.copy() - full_arguments.update(kwargs) - assert not any(isinstance(a, np.ndarray) - for a in full_arguments.values()), 'Calling a OpenCL kernel with a Numpy array!' - assert not any('pycuda' in str(type(a)) - for a in full_arguments.values()), 'Calling a OpenCL kernel with a PyCUDA array!' - shape = _check_arguments(parameters, full_arguments) - - indexing = kernel_function_node.indexing - block_and_thread_numbers = indexing.call_parameters(shape) - block_and_thread_numbers['block'] = tuple(int(i) for i in block_and_thread_numbers['block']) - block_and_thread_numbers['grid'] = tuple(int(b * g) for (b, g) in zip(block_and_thread_numbers['block'], - block_and_thread_numbers['grid'])) - - args = _build_numpy_argument_list(parameters, full_arguments) - args = [a.data if hasattr(a, 'data') else a for a in args] - cache[key] = (args, block_and_thread_numbers) - cache_values.append(kwargs) # keep objects alive such that ids remain unique - func(opencl_queue, block_and_thread_numbers['grid'], block_and_thread_numbers['block'], *args) - - wrapper.ast = kernel_function_node - wrapper.parameters = kernel_function_node.get_parameters() - wrapper = KernelWrapper(wrapper, parameters, kernel_function_node) - return wrapper diff --git a/pystencils/rng.py b/pystencils/rng.py index 513792a9631d7a4bb585d63a4eb481fcd241871a..7c4f894f9871e350fd9a5f531708d123dcb7be2b 100644 --- a/pystencils/rng.py +++ b/pystencils/rng.py @@ -5,7 +5,6 @@ import sympy as sp from pystencils.data_types import TypedSymbol, cast_func from pystencils.astnodes import LoopOverCoordinate from pystencils.backends.cbackend import CustomCodeNode -from pystencils.enums import Backend from pystencils.sympyextensions import fast_subs @@ -54,8 +53,7 @@ class RNGBase(CustomCodeNode): else: code += f"{vector_instruction_set[r.dtype.base_name] if vector_instruction_set else r.dtype} " + \ f"{r.name};\n" - args = [print_arg(a) for a in self.args] + \ - [('&' if dialect == Backend.OPENCL else '') + r.name for r in self.result_symbols] + args = [print_arg(a) for a in self.args] + ['' + r.name for r in self.result_symbols] code += (self._name + "(" + ", ".join(args) + ");\n") return code diff --git a/pystencils_tests/test_create_kernel_config.py b/pystencils_tests/test_create_kernel_config.py index 836569a9382626b808a1bfb9f75c94ba3722e9ac..86a1c0ca8b2d726e3a5cb1681800842d9c1a0408 100644 --- a/pystencils_tests/test_create_kernel_config.py +++ b/pystencils_tests/test_create_kernel_config.py @@ -10,9 +10,6 @@ def test_create_kernel_config(): c = ps.CreateKernelConfig(target=ps.Target.GPU) assert c.backend == ps.Backend.CUDA - c = ps.CreateKernelConfig(target=ps.Target.OPENCL) - assert c.backend == ps.Backend.OPENCL - c = ps.CreateKernelConfig(backend=ps.Backend.CUDA) assert c.target == ps.Target.CPU assert c.backend == ps.Backend.CUDA diff --git a/pystencils_tests/test_datahandling.py b/pystencils_tests/test_datahandling.py index 7004415fb98e25460462fd0d830bcc45af8c6365..be695d078384e678d93f2116b5932379035e878a 100644 --- a/pystencils_tests/test_datahandling.py +++ b/pystencils_tests/test_datahandling.py @@ -7,7 +7,6 @@ import numpy as np import pystencils as ps from pystencils import create_data_handling, create_kernel from pystencils.datahandling.pycuda import PyCudaArrayHandler -from pystencils.datahandling.pyopencl import PyOpenClArrayHandler from pystencils.enums import Target try: @@ -117,7 +116,7 @@ def synchronization(dh, test_gpu=False): def kernel_execution_jacobi(dh, target): - test_gpu = target == Target.GPU or target == Target.OPENCL + test_gpu = target == Target.GPU dh.add_array('f', gpu=test_gpu) dh.add_array('tmp', gpu=test_gpu) @@ -223,15 +222,11 @@ def test_kernel(): pass -@pytest.mark.parametrize('target', (Target.CPU, Target.GPU, Target.OPENCL)) +@pytest.mark.parametrize('target', (Target.CPU, Target.GPU)) def test_kernel_param(target): for domain_shape in [(4, 5), (3, 4, 5)]: if target == Target.GPU: pytest.importorskip('pycuda') - if target == Target.OPENCL: - pytest.importorskip('pyopencl') - from pystencils.opencl.opencljit import init_globally - init_globally() dh = create_data_handling(domain_size=domain_shape, periodicity=True, default_target=target) kernel_execution_jacobi(dh, target) @@ -362,20 +357,10 @@ def test_load_data(): assert np.all(dh.cpu_arrays['dst2']) == 0 -@pytest.mark.parametrize('target', (Target.GPU, Target.OPENCL)) -def test_array_handler(target): +def test_array_handler(): size = (2, 2) - if target == Target.GPU: - pytest.importorskip('pycuda') - array_handler = PyCudaArrayHandler() - if target == Target.OPENCL: - pytest.importorskip('pyopencl') - import pyopencl as cl - from pystencils.opencl.opencljit import init_globally - init_globally() - ctx = cl.create_some_context(0) - queue = cl.CommandQueue(ctx) - array_handler = PyOpenClArrayHandler(queue) + pytest.importorskip('pycuda') + array_handler = PyCudaArrayHandler() zero_array = array_handler.zeros(size) cpu_array = np.empty(size) diff --git a/pystencils_tests/test_opencl.py b/pystencils_tests/test_opencl.py deleted file mode 100644 index e5995742e788572405cf60becf5d1e32b5faa674..0000000000000000000000000000000000000000 --- a/pystencils_tests/test_opencl.py +++ /dev/null @@ -1,271 +0,0 @@ -import numpy as np -import pytest -import sympy as sp - -import pystencils -from pystencils.backends.cuda_backend import CudaBackend -from pystencils.backends.opencl_backend import OpenClBackend -from pystencils.opencl.opencljit import get_global_cl_queue, make_python_function - -try: - import pyopencl as cl - HAS_OPENCL = True - import pystencils.opencl.autoinit - -except Exception: - HAS_OPENCL = False - - -def test_print_opencl(): - z, y, x = pystencils.fields("z, y, x: [2d]") - - assignments = pystencils.AssignmentCollection({ - z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0]) - }) - - print(assignments) - - ast = pystencils.create_kernel(assignments, target=pystencils.Target.GPU) - - print(ast) - - pystencils.show_code(ast, custom_backend=CudaBackend()) - - opencl_code = pystencils.get_code_str(ast, custom_backend=OpenClBackend()) - print(opencl_code) - - assert "__global double * RESTRICT const _data_x" in str(opencl_code) - assert "__global double * RESTRICT" in str(opencl_code) - assert "get_local_id(0)" in str(opencl_code) - - -@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl") -def test_opencl_jit_fixed_size(): - pytest.importorskip('pycuda') - - z, y, x = pystencils.fields("z, y, x: [20,30]") - - assignments = pystencils.AssignmentCollection({ - z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0]) - }) - - print(assignments) - - ast = pystencils.create_kernel(assignments, target=pystencils.Target.GPU) # TODO maybe Target Opencl - - print(ast) - - code = pystencils.show_code(ast, custom_backend=CudaBackend()) - print(code) - opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend()) - print(opencl_code) - - cuda_kernel = ast.compile() - assert cuda_kernel is not None - - import pycuda.gpuarray as gpuarray - - x_cpu = np.random.rand(20, 30) - y_cpu = np.random.rand(20, 30) - z_cpu = np.random.rand(20, 30) - - x = gpuarray.to_gpu(x_cpu) - y = gpuarray.to_gpu(y_cpu) - z = gpuarray.to_gpu(z_cpu) - cuda_kernel(x=x, y=y, z=z) - - result_cuda = z.get() - - import pyopencl.array as array - ctx = cl.create_some_context(0) - queue = cl.CommandQueue(ctx) - - x = array.to_device(queue, x_cpu) - y = array.to_device(queue, y_cpu) - z = array.to_device(queue, z_cpu) - - opencl_kernel = make_python_function(ast, queue, ctx) - assert opencl_kernel is not None - opencl_kernel(x=x, y=y, z=z) - - result_opencl = z.get(queue) - - assert np.allclose(result_cuda, result_opencl) - - -@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl") -def test_opencl_jit(): - pytest.importorskip('pycuda') - - z, y, x = pystencils.fields("z, y, x: [2d]") - - assignments = pystencils.AssignmentCollection({ - z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0]) - }) - - print(assignments) - - ast = pystencils.create_kernel(assignments, target=pystencils.Target.GPU) - - print(ast) - - pystencils.show_code(ast, custom_backend=CudaBackend()) - - pystencils.show_code(ast, custom_backend=OpenClBackend()) - - cuda_kernel = ast.compile() - assert cuda_kernel is not None - - import pycuda.gpuarray as gpuarray - - x_cpu = np.random.rand(20, 30) - y_cpu = np.random.rand(20, 30) - z_cpu = np.random.rand(20, 30) - - x = gpuarray.to_gpu(x_cpu) - y = gpuarray.to_gpu(y_cpu) - z = gpuarray.to_gpu(z_cpu) - cuda_kernel(x=x, y=y, z=z) - - result_cuda = z.get() - - import pyopencl.array as array - ctx = cl.create_some_context(0) - queue = cl.CommandQueue(ctx) - - x = array.to_device(queue, x_cpu) - y = array.to_device(queue, y_cpu) - z = array.to_device(queue, z_cpu) - - opencl_kernel = make_python_function(ast, queue, ctx) - assert opencl_kernel is not None - opencl_kernel(x=x, y=y, z=z) - - result_opencl = z.get(queue) - - assert np.allclose(result_cuda, result_opencl) - - -@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl") -def test_opencl_jit_with_parameter(): - pytest.importorskip('pycuda') - - z, y, x = pystencils.fields("z, y, x: [2d]") - - a = sp.Symbol('a') - assignments = pystencils.AssignmentCollection({ - z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0]) + a - }) - - print(assignments) - - ast = pystencils.create_kernel(assignments, target=pystencils.Target.GPU) - - print(ast) - - code = pystencils.show_code(ast, custom_backend=CudaBackend()) - print(code) - opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend()) - print(opencl_code) - - cuda_kernel = ast.compile() - assert cuda_kernel is not None - - import pycuda.gpuarray as gpuarray - - x_cpu = np.random.rand(20, 30) - y_cpu = np.random.rand(20, 30) - z_cpu = np.random.rand(20, 30) - - x = gpuarray.to_gpu(x_cpu) - y = gpuarray.to_gpu(y_cpu) - z = gpuarray.to_gpu(z_cpu) - cuda_kernel(x=x, y=y, z=z, a=5.) - - result_cuda = z.get() - - import pyopencl.array as array - ctx = cl.create_some_context(0) - queue = cl.CommandQueue(ctx) - - x = array.to_device(queue, x_cpu) - y = array.to_device(queue, y_cpu) - z = array.to_device(queue, z_cpu) - - opencl_kernel = make_python_function(ast, queue, ctx) - assert opencl_kernel is not None - opencl_kernel(x=x, y=y, z=z, a=5.) - - result_opencl = z.get(queue) - - assert np.allclose(result_cuda, result_opencl) - - -@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl") -def test_without_cuda(): - z, y, x = pystencils.fields("z, y, x: [20,30]") - - assignments = pystencils.AssignmentCollection({ - z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0]) - }) - - print(assignments) - - ast = pystencils.create_kernel(assignments, target=pystencils.Target.GPU) - - print(ast) - - opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend()) - print(opencl_code) - - x_cpu = np.random.rand(20, 30) - y_cpu = np.random.rand(20, 30) - z_cpu = np.random.rand(20, 30) - - import pyopencl.array as array - ctx = cl.create_some_context(0) - queue = cl.CommandQueue(ctx) - - x = array.to_device(queue, x_cpu) - y = array.to_device(queue, y_cpu) - z = array.to_device(queue, z_cpu) - - opencl_kernel = make_python_function(ast, queue, ctx) - assert opencl_kernel is not None - opencl_kernel(x=x, y=y, z=z) - - -@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl") -def test_kernel_creation(): - global pystencils - z, y, x = pystencils.fields("z, y, x: [20,30]") - - assignments = pystencils.AssignmentCollection({ - z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0]) - }) - - print(assignments) - - import pystencils.opencl.autoinit - ast = pystencils.create_kernel(assignments, target=pystencils.Target.OPENCL) - - print(ast.backend) - - code = pystencils.get_code_str(ast) - print(code) - assert 'get_local_size' in code - - opencl_kernel = ast.compile() - - x_cpu = np.random.rand(20, 30) - y_cpu = np.random.rand(20, 30) - z_cpu = np.random.rand(20, 30) - - import pyopencl.array as array - assert get_global_cl_queue() - x = array.to_device(get_global_cl_queue(), x_cpu) - y = array.to_device(get_global_cl_queue(), y_cpu) - z = array.to_device(get_global_cl_queue(), z_cpu) - - assert opencl_kernel is not None - opencl_kernel(x=x, y=y, z=z) diff --git a/pystencils_tests/test_random.py b/pystencils_tests/test_random.py index 5e53ce1391fd130fd1f898f55c23bd1c118d4c1d..d1f509e6518d78df2345aa6a83b43ccb1f69d3a6 100644 --- a/pystencils_tests/test_random.py +++ b/pystencils_tests/test_random.py @@ -23,16 +23,12 @@ if get_compiler_config()['os'] == 'windows': @pytest.mark.parametrize('target,rng', ( -(Target.CPU, 'philox'), (Target.CPU, 'aesni'), (Target.GPU, 'philox'), (Target.OPENCL, 'philox'))) +(Target.CPU, 'philox'), (Target.CPU, 'aesni'), (Target.GPU, 'philox'))) @pytest.mark.parametrize('precision', ('float', 'double')) @pytest.mark.parametrize('dtype', ('float', 'double')) def test_rng(target, rng, precision, dtype, t=124, offsets=(0, 0), keys=(0, 0), offset_values=None): if target == Target.GPU: pytest.importorskip('pycuda') - if target == Target.OPENCL: - pytest.importorskip('pyopencl') - from pystencils.opencl.opencljit import init_globally - init_globally() if instruction_sets and {'neon', 'sve', 'vsx', 'rvv'}.intersection(instruction_sets) and rng == 'aesni': pytest.xfail('AES not yet implemented for this architecture') if rng == 'aesni' and len(keys) == 2: diff --git a/pystencils_tests/test_staggered_kernel.py b/pystencils_tests/test_staggered_kernel.py index 1ad634da299e6eb0ac016c27946edf84ab5b016c..db72a4ad8455ebc3c1e6214f74deb0f54cd6ba7f 100644 --- a/pystencils_tests/test_staggered_kernel.py +++ b/pystencils_tests/test_staggered_kernel.py @@ -72,12 +72,6 @@ class TestStaggeredDiffusion: def test_diffusion_4(self): self._run(4) - def test_diffusion_opencl(self): - import pytest - pytest.importorskip('pyopencl') - import pystencils.opencl.autoinit - self._run(4, Target.OPENCL) - def test_diffusion_openmp(self): self._run(4, openmp=True) diff --git a/setup.py b/setup.py index 6de1c8ddc2a05bdc42bb1d4e35e33b1e5eccf577..4643a38837063a6001b0f8ff6144270c76402848 100644 --- a/setup.py +++ b/setup.py @@ -115,7 +115,6 @@ setuptools.setup(name='pystencils', }, extras_require={ 'gpu': ['pycuda'], - 'opencl': ['pyopencl'], 'alltrafos': ['islpy', 'py-cpuinfo'], 'bench_db': ['blitzdb', 'pymongo', 'pandas'], 'interactive': ['matplotlib', 'ipy_table', 'imageio', 'jupyter', 'pyevtk', 'rich', 'graphviz'],